Turn HARD_REGNO_MODE_OK into a target hook
[official-gcc.git] / gcc / config / arm / arm.c
blob3c6c56ccdb5ae5f50ec90b0a1d02bfe40385ead2
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2017 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "target.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "memmodel.h"
31 #include "cfghooks.h"
32 #include "df.h"
33 #include "tm_p.h"
34 #include "stringpool.h"
35 #include "attribs.h"
36 #include "optabs.h"
37 #include "regs.h"
38 #include "emit-rtl.h"
39 #include "recog.h"
40 #include "cgraph.h"
41 #include "diagnostic-core.h"
42 #include "alias.h"
43 #include "fold-const.h"
44 #include "stor-layout.h"
45 #include "calls.h"
46 #include "varasm.h"
47 #include "output.h"
48 #include "insn-attr.h"
49 #include "flags.h"
50 #include "reload.h"
51 #include "explow.h"
52 #include "expr.h"
53 #include "cfgrtl.h"
54 #include "sched-int.h"
55 #include "common/common-target.h"
56 #include "langhooks.h"
57 #include "intl.h"
58 #include "libfuncs.h"
59 #include "params.h"
60 #include "opts.h"
61 #include "dumpfile.h"
62 #include "target-globals.h"
63 #include "builtins.h"
64 #include "tm-constrs.h"
65 #include "rtl-iter.h"
66 #include "optabs-libfuncs.h"
67 #include "gimplify.h"
68 #include "gimple.h"
69 #include "selftest.h"
71 /* This file should be included last. */
72 #include "target-def.h"
74 /* Forward definitions of types. */
75 typedef struct minipool_node Mnode;
76 typedef struct minipool_fixup Mfix;
78 void (*arm_lang_output_object_attributes_hook)(void);
80 struct four_ints
82 int i[4];
85 /* Forward function declarations. */
86 static bool arm_const_not_ok_for_debug_p (rtx);
87 static int arm_needs_doubleword_align (machine_mode, const_tree);
88 static int arm_compute_static_chain_stack_bytes (void);
89 static arm_stack_offsets *arm_get_frame_offsets (void);
90 static void arm_compute_frame_layout (void);
91 static void arm_add_gc_roots (void);
92 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
93 unsigned HOST_WIDE_INT, rtx, rtx, int, int);
94 static unsigned bit_count (unsigned long);
95 static unsigned bitmap_popcount (const sbitmap);
96 static int arm_address_register_rtx_p (rtx, int);
97 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
98 static bool is_called_in_ARM_mode (tree);
99 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
100 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
101 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
102 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
103 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
104 inline static int thumb1_index_register_rtx_p (rtx, int);
105 static int thumb_far_jump_used_p (void);
106 static bool thumb_force_lr_save (void);
107 static unsigned arm_size_return_regs (void);
108 static bool arm_assemble_integer (rtx, unsigned int, int);
109 static void arm_print_operand (FILE *, rtx, int);
110 static void arm_print_operand_address (FILE *, machine_mode, rtx);
111 static bool arm_print_operand_punct_valid_p (unsigned char code);
112 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
113 static arm_cc get_arm_condition_code (rtx);
114 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
115 static const char *output_multi_immediate (rtx *, const char *, const char *,
116 int, HOST_WIDE_INT);
117 static const char *shift_op (rtx, HOST_WIDE_INT *);
118 static struct machine_function *arm_init_machine_status (void);
119 static void thumb_exit (FILE *, int);
120 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
121 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
122 static Mnode *add_minipool_forward_ref (Mfix *);
123 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
124 static Mnode *add_minipool_backward_ref (Mfix *);
125 static void assign_minipool_offsets (Mfix *);
126 static void arm_print_value (FILE *, rtx);
127 static void dump_minipool (rtx_insn *);
128 static int arm_barrier_cost (rtx_insn *);
129 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
130 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
131 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
132 machine_mode, rtx);
133 static void arm_reorg (void);
134 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
135 static unsigned long arm_compute_save_reg0_reg12_mask (void);
136 static unsigned long arm_compute_save_core_reg_mask (void);
137 static unsigned long arm_isr_value (tree);
138 static unsigned long arm_compute_func_type (void);
139 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
140 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
141 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
142 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
143 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
144 #endif
145 static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *);
146 static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree, int, bool *);
147 static void arm_output_function_epilogue (FILE *);
148 static void arm_output_function_prologue (FILE *);
149 static int arm_comp_type_attributes (const_tree, const_tree);
150 static void arm_set_default_type_attributes (tree);
151 static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
152 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
153 static int optimal_immediate_sequence (enum rtx_code code,
154 unsigned HOST_WIDE_INT val,
155 struct four_ints *return_sequence);
156 static int optimal_immediate_sequence_1 (enum rtx_code code,
157 unsigned HOST_WIDE_INT val,
158 struct four_ints *return_sequence,
159 int i);
160 static int arm_get_strip_length (int);
161 static bool arm_function_ok_for_sibcall (tree, tree);
162 static machine_mode arm_promote_function_mode (const_tree,
163 machine_mode, int *,
164 const_tree, int);
165 static bool arm_return_in_memory (const_tree, const_tree);
166 static rtx arm_function_value (const_tree, const_tree, bool);
167 static rtx arm_libcall_value_1 (machine_mode);
168 static rtx arm_libcall_value (machine_mode, const_rtx);
169 static bool arm_function_value_regno_p (const unsigned int);
170 static void arm_internal_label (FILE *, const char *, unsigned long);
171 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
172 tree);
173 static bool arm_have_conditional_execution (void);
174 static bool arm_cannot_force_const_mem (machine_mode, rtx);
175 static bool arm_legitimate_constant_p (machine_mode, rtx);
176 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
177 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
178 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
179 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
180 static void emit_constant_insn (rtx cond, rtx pattern);
181 static rtx_insn *emit_set_insn (rtx, rtx);
182 static rtx emit_multi_reg_push (unsigned long, unsigned long);
183 static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
184 tree, bool);
185 static rtx arm_function_arg (cumulative_args_t, machine_mode,
186 const_tree, bool);
187 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
188 const_tree, bool);
189 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
190 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
191 const_tree);
192 static rtx aapcs_libcall_value (machine_mode);
193 static int aapcs_select_return_coproc (const_tree, const_tree);
195 #ifdef OBJECT_FORMAT_ELF
196 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
197 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
198 #endif
199 #ifndef ARM_PE
200 static void arm_encode_section_info (tree, rtx, int);
201 #endif
203 static void arm_file_end (void);
204 static void arm_file_start (void);
205 static void arm_insert_attributes (tree, tree *);
207 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
208 tree, int *, int);
209 static bool arm_pass_by_reference (cumulative_args_t,
210 machine_mode, const_tree, bool);
211 static bool arm_promote_prototypes (const_tree);
212 static bool arm_default_short_enums (void);
213 static bool arm_align_anon_bitfield (void);
214 static bool arm_return_in_msb (const_tree);
215 static bool arm_must_pass_in_stack (machine_mode, const_tree);
216 static bool arm_return_in_memory (const_tree, const_tree);
217 #if ARM_UNWIND_INFO
218 static void arm_unwind_emit (FILE *, rtx_insn *);
219 static bool arm_output_ttype (rtx);
220 static void arm_asm_emit_except_personality (rtx);
221 #endif
222 static void arm_asm_init_sections (void);
223 static rtx arm_dwarf_register_span (rtx);
225 static tree arm_cxx_guard_type (void);
226 static bool arm_cxx_guard_mask_bit (void);
227 static tree arm_get_cookie_size (tree);
228 static bool arm_cookie_has_size (void);
229 static bool arm_cxx_cdtor_returns_this (void);
230 static bool arm_cxx_key_method_may_be_inline (void);
231 static void arm_cxx_determine_class_data_visibility (tree);
232 static bool arm_cxx_class_data_always_comdat (void);
233 static bool arm_cxx_use_aeabi_atexit (void);
234 static void arm_init_libfuncs (void);
235 static tree arm_build_builtin_va_list (void);
236 static void arm_expand_builtin_va_start (tree, rtx);
237 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
238 static void arm_option_override (void);
239 static void arm_option_save (struct cl_target_option *, struct gcc_options *);
240 static void arm_option_restore (struct gcc_options *,
241 struct cl_target_option *);
242 static void arm_override_options_after_change (void);
243 static void arm_option_print (FILE *, int, struct cl_target_option *);
244 static void arm_set_current_function (tree);
245 static bool arm_can_inline_p (tree, tree);
246 static void arm_relayout_function (tree);
247 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
248 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
249 static bool arm_sched_can_speculate_insn (rtx_insn *);
250 static bool arm_macro_fusion_p (void);
251 static bool arm_cannot_copy_insn_p (rtx_insn *);
252 static int arm_issue_rate (void);
253 static int arm_first_cycle_multipass_dfa_lookahead (void);
254 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
255 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
256 static bool arm_output_addr_const_extra (FILE *, rtx);
257 static bool arm_allocate_stack_slots_for_args (void);
258 static bool arm_warn_func_return (tree);
259 static tree arm_promoted_type (const_tree t);
260 static bool arm_scalar_mode_supported_p (scalar_mode);
261 static bool arm_frame_pointer_required (void);
262 static bool arm_can_eliminate (const int, const int);
263 static void arm_asm_trampoline_template (FILE *);
264 static void arm_trampoline_init (rtx, tree, rtx);
265 static rtx arm_trampoline_adjust_address (rtx);
266 static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg);
267 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
268 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
269 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
270 static bool arm_array_mode_supported_p (machine_mode,
271 unsigned HOST_WIDE_INT);
272 static machine_mode arm_preferred_simd_mode (scalar_mode);
273 static bool arm_class_likely_spilled_p (reg_class_t);
274 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
275 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
276 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
277 const_tree type,
278 int misalignment,
279 bool is_packed);
280 static void arm_conditional_register_usage (void);
281 static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
282 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
283 static unsigned int arm_autovectorize_vector_sizes (void);
284 static int arm_default_branch_cost (bool, bool);
285 static int arm_cortex_a5_branch_cost (bool, bool);
286 static int arm_cortex_m_branch_cost (bool, bool);
287 static int arm_cortex_m7_branch_cost (bool, bool);
289 static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode,
290 const unsigned char *sel);
292 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
294 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
295 tree vectype,
296 int misalign ATTRIBUTE_UNUSED);
297 static unsigned arm_add_stmt_cost (void *data, int count,
298 enum vect_cost_for_stmt kind,
299 struct _stmt_vec_info *stmt_info,
300 int misalign,
301 enum vect_cost_model_location where);
303 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
304 bool op0_preserve_value);
305 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
307 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
308 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
309 const_tree);
310 static section *arm_function_section (tree, enum node_frequency, bool, bool);
311 static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num);
312 static unsigned int arm_elf_section_type_flags (tree decl, const char *name,
313 int reloc);
314 static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *);
315 static opt_scalar_float_mode arm_floatn_mode (int, bool);
316 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode);
318 /* Table of machine attributes. */
319 static const struct attribute_spec arm_attribute_table[] =
321 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
322 affects_type_identity } */
323 /* Function calls made to this symbol must be done indirectly, because
324 it may lie outside of the 26 bit addressing range of a normal function
325 call. */
326 { "long_call", 0, 0, false, true, true, NULL, false },
327 /* Whereas these functions are always known to reside within the 26 bit
328 addressing range. */
329 { "short_call", 0, 0, false, true, true, NULL, false },
330 /* Specify the procedure call conventions for a function. */
331 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
332 false },
333 /* Interrupt Service Routines have special prologue and epilogue requirements. */
334 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
335 false },
336 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
337 false },
338 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
339 false },
340 #ifdef ARM_PE
341 /* ARM/PE has three new attributes:
342 interfacearm - ?
343 dllexport - for exporting a function/variable that will live in a dll
344 dllimport - for importing a function/variable from a dll
346 Microsoft allows multiple declspecs in one __declspec, separating
347 them with spaces. We do NOT support this. Instead, use __declspec
348 multiple times.
350 { "dllimport", 0, 0, true, false, false, NULL, false },
351 { "dllexport", 0, 0, true, false, false, NULL, false },
352 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
353 false },
354 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
355 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
356 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
357 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
358 false },
359 #endif
360 /* ARMv8-M Security Extensions support. */
361 { "cmse_nonsecure_entry", 0, 0, true, false, false,
362 arm_handle_cmse_nonsecure_entry, false },
363 { "cmse_nonsecure_call", 0, 0, true, false, false,
364 arm_handle_cmse_nonsecure_call, true },
365 { NULL, 0, 0, false, false, false, NULL, false }
368 /* Initialize the GCC target structure. */
369 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
370 #undef TARGET_MERGE_DECL_ATTRIBUTES
371 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
372 #endif
374 #undef TARGET_LEGITIMIZE_ADDRESS
375 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
377 #undef TARGET_ATTRIBUTE_TABLE
378 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
380 #undef TARGET_INSERT_ATTRIBUTES
381 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
383 #undef TARGET_ASM_FILE_START
384 #define TARGET_ASM_FILE_START arm_file_start
385 #undef TARGET_ASM_FILE_END
386 #define TARGET_ASM_FILE_END arm_file_end
388 #undef TARGET_ASM_ALIGNED_SI_OP
389 #define TARGET_ASM_ALIGNED_SI_OP NULL
390 #undef TARGET_ASM_INTEGER
391 #define TARGET_ASM_INTEGER arm_assemble_integer
393 #undef TARGET_PRINT_OPERAND
394 #define TARGET_PRINT_OPERAND arm_print_operand
395 #undef TARGET_PRINT_OPERAND_ADDRESS
396 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
397 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
398 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
400 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
401 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
403 #undef TARGET_ASM_FUNCTION_PROLOGUE
404 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
406 #undef TARGET_ASM_FUNCTION_EPILOGUE
407 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
409 #undef TARGET_CAN_INLINE_P
410 #define TARGET_CAN_INLINE_P arm_can_inline_p
412 #undef TARGET_RELAYOUT_FUNCTION
413 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
415 #undef TARGET_OPTION_OVERRIDE
416 #define TARGET_OPTION_OVERRIDE arm_option_override
418 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
419 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
421 #undef TARGET_OPTION_SAVE
422 #define TARGET_OPTION_SAVE arm_option_save
424 #undef TARGET_OPTION_RESTORE
425 #define TARGET_OPTION_RESTORE arm_option_restore
427 #undef TARGET_OPTION_PRINT
428 #define TARGET_OPTION_PRINT arm_option_print
430 #undef TARGET_COMP_TYPE_ATTRIBUTES
431 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
433 #undef TARGET_SCHED_CAN_SPECULATE_INSN
434 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
436 #undef TARGET_SCHED_MACRO_FUSION_P
437 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
439 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
440 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
442 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
443 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
445 #undef TARGET_SCHED_ADJUST_COST
446 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
448 #undef TARGET_SET_CURRENT_FUNCTION
449 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
451 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
452 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
454 #undef TARGET_SCHED_REORDER
455 #define TARGET_SCHED_REORDER arm_sched_reorder
457 #undef TARGET_REGISTER_MOVE_COST
458 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
460 #undef TARGET_MEMORY_MOVE_COST
461 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
463 #undef TARGET_ENCODE_SECTION_INFO
464 #ifdef ARM_PE
465 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
466 #else
467 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
468 #endif
470 #undef TARGET_STRIP_NAME_ENCODING
471 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
473 #undef TARGET_ASM_INTERNAL_LABEL
474 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
476 #undef TARGET_FLOATN_MODE
477 #define TARGET_FLOATN_MODE arm_floatn_mode
479 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
480 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
482 #undef TARGET_FUNCTION_VALUE
483 #define TARGET_FUNCTION_VALUE arm_function_value
485 #undef TARGET_LIBCALL_VALUE
486 #define TARGET_LIBCALL_VALUE arm_libcall_value
488 #undef TARGET_FUNCTION_VALUE_REGNO_P
489 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
491 #undef TARGET_ASM_OUTPUT_MI_THUNK
492 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
493 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
494 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
496 #undef TARGET_RTX_COSTS
497 #define TARGET_RTX_COSTS arm_rtx_costs
498 #undef TARGET_ADDRESS_COST
499 #define TARGET_ADDRESS_COST arm_address_cost
501 #undef TARGET_SHIFT_TRUNCATION_MASK
502 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
503 #undef TARGET_VECTOR_MODE_SUPPORTED_P
504 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
505 #undef TARGET_ARRAY_MODE_SUPPORTED_P
506 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
507 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
508 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
509 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
510 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
511 arm_autovectorize_vector_sizes
513 #undef TARGET_MACHINE_DEPENDENT_REORG
514 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
516 #undef TARGET_INIT_BUILTINS
517 #define TARGET_INIT_BUILTINS arm_init_builtins
518 #undef TARGET_EXPAND_BUILTIN
519 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
520 #undef TARGET_BUILTIN_DECL
521 #define TARGET_BUILTIN_DECL arm_builtin_decl
523 #undef TARGET_INIT_LIBFUNCS
524 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
526 #undef TARGET_PROMOTE_FUNCTION_MODE
527 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
528 #undef TARGET_PROMOTE_PROTOTYPES
529 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
530 #undef TARGET_PASS_BY_REFERENCE
531 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
532 #undef TARGET_ARG_PARTIAL_BYTES
533 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
534 #undef TARGET_FUNCTION_ARG
535 #define TARGET_FUNCTION_ARG arm_function_arg
536 #undef TARGET_FUNCTION_ARG_ADVANCE
537 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
538 #undef TARGET_FUNCTION_ARG_BOUNDARY
539 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
541 #undef TARGET_SETUP_INCOMING_VARARGS
542 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
544 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
545 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
547 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
548 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
549 #undef TARGET_TRAMPOLINE_INIT
550 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
551 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
552 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
554 #undef TARGET_WARN_FUNC_RETURN
555 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
557 #undef TARGET_DEFAULT_SHORT_ENUMS
558 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
560 #undef TARGET_ALIGN_ANON_BITFIELD
561 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
563 #undef TARGET_NARROW_VOLATILE_BITFIELD
564 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
566 #undef TARGET_CXX_GUARD_TYPE
567 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
569 #undef TARGET_CXX_GUARD_MASK_BIT
570 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
572 #undef TARGET_CXX_GET_COOKIE_SIZE
573 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
575 #undef TARGET_CXX_COOKIE_HAS_SIZE
576 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
578 #undef TARGET_CXX_CDTOR_RETURNS_THIS
579 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
581 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
582 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
584 #undef TARGET_CXX_USE_AEABI_ATEXIT
585 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
587 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
588 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
589 arm_cxx_determine_class_data_visibility
591 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
592 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
594 #undef TARGET_RETURN_IN_MSB
595 #define TARGET_RETURN_IN_MSB arm_return_in_msb
597 #undef TARGET_RETURN_IN_MEMORY
598 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
600 #undef TARGET_MUST_PASS_IN_STACK
601 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
603 #if ARM_UNWIND_INFO
604 #undef TARGET_ASM_UNWIND_EMIT
605 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
607 /* EABI unwinding tables use a different format for the typeinfo tables. */
608 #undef TARGET_ASM_TTYPE
609 #define TARGET_ASM_TTYPE arm_output_ttype
611 #undef TARGET_ARM_EABI_UNWINDER
612 #define TARGET_ARM_EABI_UNWINDER true
614 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
615 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
617 #endif /* ARM_UNWIND_INFO */
619 #undef TARGET_ASM_INIT_SECTIONS
620 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
622 #undef TARGET_DWARF_REGISTER_SPAN
623 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
625 #undef TARGET_CANNOT_COPY_INSN_P
626 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
628 #ifdef HAVE_AS_TLS
629 #undef TARGET_HAVE_TLS
630 #define TARGET_HAVE_TLS true
631 #endif
633 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
634 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
636 #undef TARGET_LEGITIMATE_CONSTANT_P
637 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
639 #undef TARGET_CANNOT_FORCE_CONST_MEM
640 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
642 #undef TARGET_MAX_ANCHOR_OFFSET
643 #define TARGET_MAX_ANCHOR_OFFSET 4095
645 /* The minimum is set such that the total size of the block
646 for a particular anchor is -4088 + 1 + 4095 bytes, which is
647 divisible by eight, ensuring natural spacing of anchors. */
648 #undef TARGET_MIN_ANCHOR_OFFSET
649 #define TARGET_MIN_ANCHOR_OFFSET -4088
651 #undef TARGET_SCHED_ISSUE_RATE
652 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
654 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
655 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
656 arm_first_cycle_multipass_dfa_lookahead
658 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
659 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
660 arm_first_cycle_multipass_dfa_lookahead_guard
662 #undef TARGET_MANGLE_TYPE
663 #define TARGET_MANGLE_TYPE arm_mangle_type
665 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
666 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
668 #undef TARGET_BUILD_BUILTIN_VA_LIST
669 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
670 #undef TARGET_EXPAND_BUILTIN_VA_START
671 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
672 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
673 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
675 #ifdef HAVE_AS_TLS
676 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
677 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
678 #endif
680 #undef TARGET_LEGITIMATE_ADDRESS_P
681 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
683 #undef TARGET_PREFERRED_RELOAD_CLASS
684 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
686 #undef TARGET_PROMOTED_TYPE
687 #define TARGET_PROMOTED_TYPE arm_promoted_type
689 #undef TARGET_SCALAR_MODE_SUPPORTED_P
690 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
692 #undef TARGET_COMPUTE_FRAME_LAYOUT
693 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
695 #undef TARGET_FRAME_POINTER_REQUIRED
696 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
698 #undef TARGET_CAN_ELIMINATE
699 #define TARGET_CAN_ELIMINATE arm_can_eliminate
701 #undef TARGET_CONDITIONAL_REGISTER_USAGE
702 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
704 #undef TARGET_CLASS_LIKELY_SPILLED_P
705 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
707 #undef TARGET_VECTORIZE_BUILTINS
708 #define TARGET_VECTORIZE_BUILTINS
710 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
711 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
712 arm_builtin_vectorized_function
714 #undef TARGET_VECTOR_ALIGNMENT
715 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
717 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
718 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
719 arm_vector_alignment_reachable
721 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
722 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
723 arm_builtin_support_vector_misalignment
725 #undef TARGET_PREFERRED_RENAME_CLASS
726 #define TARGET_PREFERRED_RENAME_CLASS \
727 arm_preferred_rename_class
729 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
730 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
731 arm_vectorize_vec_perm_const_ok
733 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
734 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
735 arm_builtin_vectorization_cost
736 #undef TARGET_VECTORIZE_ADD_STMT_COST
737 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
739 #undef TARGET_CANONICALIZE_COMPARISON
740 #define TARGET_CANONICALIZE_COMPARISON \
741 arm_canonicalize_comparison
743 #undef TARGET_ASAN_SHADOW_OFFSET
744 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
746 #undef MAX_INSN_PER_IT_BLOCK
747 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
749 #undef TARGET_CAN_USE_DOLOOP_P
750 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
752 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
753 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
755 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
756 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
758 #undef TARGET_SCHED_FUSION_PRIORITY
759 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
761 #undef TARGET_ASM_FUNCTION_SECTION
762 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
764 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
765 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
767 #undef TARGET_SECTION_TYPE_FLAGS
768 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
770 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
771 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
773 #undef TARGET_C_EXCESS_PRECISION
774 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
776 /* Although the architecture reserves bits 0 and 1, only the former is
777 used for ARM/Thumb ISA selection in v7 and earlier versions. */
778 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
779 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
781 #undef TARGET_FIXED_CONDITION_CODE_REGS
782 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
784 #undef TARGET_HARD_REGNO_MODE_OK
785 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
787 /* Obstack for minipool constant handling. */
788 static struct obstack minipool_obstack;
789 static char * minipool_startobj;
791 /* The maximum number of insns skipped which
792 will be conditionalised if possible. */
793 static int max_insns_skipped = 5;
795 extern FILE * asm_out_file;
797 /* True if we are currently building a constant table. */
798 int making_const_table;
800 /* The processor for which instructions should be scheduled. */
801 enum processor_type arm_tune = TARGET_CPU_arm_none;
803 /* The current tuning set. */
804 const struct tune_params *current_tune;
806 /* Which floating point hardware to schedule for. */
807 int arm_fpu_attr;
809 /* Used for Thumb call_via trampolines. */
810 rtx thumb_call_via_label[14];
811 static int thumb_call_reg_needed;
813 /* The bits in this mask specify which instruction scheduling options should
814 be used. */
815 unsigned int tune_flags = 0;
817 /* The highest ARM architecture version supported by the
818 target. */
819 enum base_architecture arm_base_arch = BASE_ARCH_0;
821 /* Active target architecture and tuning. */
823 struct arm_build_target arm_active_target;
825 /* The following are used in the arm.md file as equivalents to bits
826 in the above two flag variables. */
828 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
829 int arm_arch3m = 0;
831 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
832 int arm_arch4 = 0;
834 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
835 int arm_arch4t = 0;
837 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
838 int arm_arch5 = 0;
840 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
841 int arm_arch5e = 0;
843 /* Nonzero if this chip supports the ARM Architecture 5TE extensions. */
844 int arm_arch5te = 0;
846 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
847 int arm_arch6 = 0;
849 /* Nonzero if this chip supports the ARM 6K extensions. */
850 int arm_arch6k = 0;
852 /* Nonzero if this chip supports the ARM 6KZ extensions. */
853 int arm_arch6kz = 0;
855 /* Nonzero if instructions present in ARMv6-M can be used. */
856 int arm_arch6m = 0;
858 /* Nonzero if this chip supports the ARM 7 extensions. */
859 int arm_arch7 = 0;
861 /* Nonzero if this chip supports the Large Physical Address Extension. */
862 int arm_arch_lpae = 0;
864 /* Nonzero if instructions not present in the 'M' profile can be used. */
865 int arm_arch_notm = 0;
867 /* Nonzero if instructions present in ARMv7E-M can be used. */
868 int arm_arch7em = 0;
870 /* Nonzero if instructions present in ARMv8 can be used. */
871 int arm_arch8 = 0;
873 /* Nonzero if this chip supports the ARMv8.1 extensions. */
874 int arm_arch8_1 = 0;
876 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */
877 int arm_arch8_2 = 0;
879 /* Nonzero if this chip supports the FP16 instructions extension of ARM
880 Architecture 8.2. */
881 int arm_fp16_inst = 0;
883 /* Nonzero if this chip can benefit from load scheduling. */
884 int arm_ld_sched = 0;
886 /* Nonzero if this chip is a StrongARM. */
887 int arm_tune_strongarm = 0;
889 /* Nonzero if this chip supports Intel Wireless MMX technology. */
890 int arm_arch_iwmmxt = 0;
892 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
893 int arm_arch_iwmmxt2 = 0;
895 /* Nonzero if this chip is an XScale. */
896 int arm_arch_xscale = 0;
898 /* Nonzero if tuning for XScale */
899 int arm_tune_xscale = 0;
901 /* Nonzero if we want to tune for stores that access the write-buffer.
902 This typically means an ARM6 or ARM7 with MMU or MPU. */
903 int arm_tune_wbuf = 0;
905 /* Nonzero if tuning for Cortex-A9. */
906 int arm_tune_cortex_a9 = 0;
908 /* Nonzero if we should define __THUMB_INTERWORK__ in the
909 preprocessor.
910 XXX This is a bit of a hack, it's intended to help work around
911 problems in GLD which doesn't understand that armv5t code is
912 interworking clean. */
913 int arm_cpp_interwork = 0;
915 /* Nonzero if chip supports Thumb 1. */
916 int arm_arch_thumb1;
918 /* Nonzero if chip supports Thumb 2. */
919 int arm_arch_thumb2;
921 /* Nonzero if chip supports integer division instruction. */
922 int arm_arch_arm_hwdiv;
923 int arm_arch_thumb_hwdiv;
925 /* Nonzero if chip disallows volatile memory access in IT block. */
926 int arm_arch_no_volatile_ce;
928 /* Nonzero if we should use Neon to handle 64-bits operations rather
929 than core registers. */
930 int prefer_neon_for_64bits = 0;
932 /* Nonzero if we shouldn't use literal pools. */
933 bool arm_disable_literal_pool = false;
935 /* The register number to be used for the PIC offset register. */
936 unsigned arm_pic_register = INVALID_REGNUM;
938 enum arm_pcs arm_pcs_default;
940 /* For an explanation of these variables, see final_prescan_insn below. */
941 int arm_ccfsm_state;
942 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
943 enum arm_cond_code arm_current_cc;
945 rtx arm_target_insn;
946 int arm_target_label;
947 /* The number of conditionally executed insns, including the current insn. */
948 int arm_condexec_count = 0;
949 /* A bitmask specifying the patterns for the IT block.
950 Zero means do not output an IT block before this insn. */
951 int arm_condexec_mask = 0;
952 /* The number of bits used in arm_condexec_mask. */
953 int arm_condexec_masklen = 0;
955 /* Nonzero if chip supports the ARMv8 CRC instructions. */
956 int arm_arch_crc = 0;
958 /* Nonzero if chip supports the ARMv8-M security extensions. */
959 int arm_arch_cmse = 0;
961 /* Nonzero if the core has a very small, high-latency, multiply unit. */
962 int arm_m_profile_small_mul = 0;
964 /* The condition codes of the ARM, and the inverse function. */
965 static const char * const arm_condition_codes[] =
967 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
968 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
971 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
972 int arm_regs_in_sequence[] =
974 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
977 #define ARM_LSL_NAME "lsl"
978 #define streq(string1, string2) (strcmp (string1, string2) == 0)
980 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
981 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
982 | (1 << PIC_OFFSET_TABLE_REGNUM)))
984 /* Initialization code. */
986 struct cpu_tune
988 enum processor_type scheduler;
989 unsigned int tune_flags;
990 const struct tune_params *tune;
993 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
994 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
996 num_slots, \
997 l1_size, \
998 l1_line_size \
1001 /* arm generic vectorizer costs. */
1002 static const
1003 struct cpu_vec_costs arm_default_vec_cost = {
1004 1, /* scalar_stmt_cost. */
1005 1, /* scalar load_cost. */
1006 1, /* scalar_store_cost. */
1007 1, /* vec_stmt_cost. */
1008 1, /* vec_to_scalar_cost. */
1009 1, /* scalar_to_vec_cost. */
1010 1, /* vec_align_load_cost. */
1011 1, /* vec_unalign_load_cost. */
1012 1, /* vec_unalign_store_cost. */
1013 1, /* vec_store_cost. */
1014 3, /* cond_taken_branch_cost. */
1015 1, /* cond_not_taken_branch_cost. */
1018 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
1019 #include "aarch-cost-tables.h"
1023 const struct cpu_cost_table cortexa9_extra_costs =
1025 /* ALU */
1027 0, /* arith. */
1028 0, /* logical. */
1029 0, /* shift. */
1030 COSTS_N_INSNS (1), /* shift_reg. */
1031 COSTS_N_INSNS (1), /* arith_shift. */
1032 COSTS_N_INSNS (2), /* arith_shift_reg. */
1033 0, /* log_shift. */
1034 COSTS_N_INSNS (1), /* log_shift_reg. */
1035 COSTS_N_INSNS (1), /* extend. */
1036 COSTS_N_INSNS (2), /* extend_arith. */
1037 COSTS_N_INSNS (1), /* bfi. */
1038 COSTS_N_INSNS (1), /* bfx. */
1039 0, /* clz. */
1040 0, /* rev. */
1041 0, /* non_exec. */
1042 true /* non_exec_costs_exec. */
1045 /* MULT SImode */
1047 COSTS_N_INSNS (3), /* simple. */
1048 COSTS_N_INSNS (3), /* flag_setting. */
1049 COSTS_N_INSNS (2), /* extend. */
1050 COSTS_N_INSNS (3), /* add. */
1051 COSTS_N_INSNS (2), /* extend_add. */
1052 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1054 /* MULT DImode */
1056 0, /* simple (N/A). */
1057 0, /* flag_setting (N/A). */
1058 COSTS_N_INSNS (4), /* extend. */
1059 0, /* add (N/A). */
1060 COSTS_N_INSNS (4), /* extend_add. */
1061 0 /* idiv (N/A). */
1064 /* LD/ST */
1066 COSTS_N_INSNS (2), /* load. */
1067 COSTS_N_INSNS (2), /* load_sign_extend. */
1068 COSTS_N_INSNS (2), /* ldrd. */
1069 COSTS_N_INSNS (2), /* ldm_1st. */
1070 1, /* ldm_regs_per_insn_1st. */
1071 2, /* ldm_regs_per_insn_subsequent. */
1072 COSTS_N_INSNS (5), /* loadf. */
1073 COSTS_N_INSNS (5), /* loadd. */
1074 COSTS_N_INSNS (1), /* load_unaligned. */
1075 COSTS_N_INSNS (2), /* store. */
1076 COSTS_N_INSNS (2), /* strd. */
1077 COSTS_N_INSNS (2), /* stm_1st. */
1078 1, /* stm_regs_per_insn_1st. */
1079 2, /* stm_regs_per_insn_subsequent. */
1080 COSTS_N_INSNS (1), /* storef. */
1081 COSTS_N_INSNS (1), /* stored. */
1082 COSTS_N_INSNS (1), /* store_unaligned. */
1083 COSTS_N_INSNS (1), /* loadv. */
1084 COSTS_N_INSNS (1) /* storev. */
1087 /* FP SFmode */
1089 COSTS_N_INSNS (14), /* div. */
1090 COSTS_N_INSNS (4), /* mult. */
1091 COSTS_N_INSNS (7), /* mult_addsub. */
1092 COSTS_N_INSNS (30), /* fma. */
1093 COSTS_N_INSNS (3), /* addsub. */
1094 COSTS_N_INSNS (1), /* fpconst. */
1095 COSTS_N_INSNS (1), /* neg. */
1096 COSTS_N_INSNS (3), /* compare. */
1097 COSTS_N_INSNS (3), /* widen. */
1098 COSTS_N_INSNS (3), /* narrow. */
1099 COSTS_N_INSNS (3), /* toint. */
1100 COSTS_N_INSNS (3), /* fromint. */
1101 COSTS_N_INSNS (3) /* roundint. */
1103 /* FP DFmode */
1105 COSTS_N_INSNS (24), /* div. */
1106 COSTS_N_INSNS (5), /* mult. */
1107 COSTS_N_INSNS (8), /* mult_addsub. */
1108 COSTS_N_INSNS (30), /* fma. */
1109 COSTS_N_INSNS (3), /* addsub. */
1110 COSTS_N_INSNS (1), /* fpconst. */
1111 COSTS_N_INSNS (1), /* neg. */
1112 COSTS_N_INSNS (3), /* compare. */
1113 COSTS_N_INSNS (3), /* widen. */
1114 COSTS_N_INSNS (3), /* narrow. */
1115 COSTS_N_INSNS (3), /* toint. */
1116 COSTS_N_INSNS (3), /* fromint. */
1117 COSTS_N_INSNS (3) /* roundint. */
1120 /* Vector */
1122 COSTS_N_INSNS (1) /* alu. */
1126 const struct cpu_cost_table cortexa8_extra_costs =
1128 /* ALU */
1130 0, /* arith. */
1131 0, /* logical. */
1132 COSTS_N_INSNS (1), /* shift. */
1133 0, /* shift_reg. */
1134 COSTS_N_INSNS (1), /* arith_shift. */
1135 0, /* arith_shift_reg. */
1136 COSTS_N_INSNS (1), /* log_shift. */
1137 0, /* log_shift_reg. */
1138 0, /* extend. */
1139 0, /* extend_arith. */
1140 0, /* bfi. */
1141 0, /* bfx. */
1142 0, /* clz. */
1143 0, /* rev. */
1144 0, /* non_exec. */
1145 true /* non_exec_costs_exec. */
1148 /* MULT SImode */
1150 COSTS_N_INSNS (1), /* simple. */
1151 COSTS_N_INSNS (1), /* flag_setting. */
1152 COSTS_N_INSNS (1), /* extend. */
1153 COSTS_N_INSNS (1), /* add. */
1154 COSTS_N_INSNS (1), /* extend_add. */
1155 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1157 /* MULT DImode */
1159 0, /* simple (N/A). */
1160 0, /* flag_setting (N/A). */
1161 COSTS_N_INSNS (2), /* extend. */
1162 0, /* add (N/A). */
1163 COSTS_N_INSNS (2), /* extend_add. */
1164 0 /* idiv (N/A). */
1167 /* LD/ST */
1169 COSTS_N_INSNS (1), /* load. */
1170 COSTS_N_INSNS (1), /* load_sign_extend. */
1171 COSTS_N_INSNS (1), /* ldrd. */
1172 COSTS_N_INSNS (1), /* ldm_1st. */
1173 1, /* ldm_regs_per_insn_1st. */
1174 2, /* ldm_regs_per_insn_subsequent. */
1175 COSTS_N_INSNS (1), /* loadf. */
1176 COSTS_N_INSNS (1), /* loadd. */
1177 COSTS_N_INSNS (1), /* load_unaligned. */
1178 COSTS_N_INSNS (1), /* store. */
1179 COSTS_N_INSNS (1), /* strd. */
1180 COSTS_N_INSNS (1), /* stm_1st. */
1181 1, /* stm_regs_per_insn_1st. */
1182 2, /* stm_regs_per_insn_subsequent. */
1183 COSTS_N_INSNS (1), /* storef. */
1184 COSTS_N_INSNS (1), /* stored. */
1185 COSTS_N_INSNS (1), /* store_unaligned. */
1186 COSTS_N_INSNS (1), /* loadv. */
1187 COSTS_N_INSNS (1) /* storev. */
1190 /* FP SFmode */
1192 COSTS_N_INSNS (36), /* div. */
1193 COSTS_N_INSNS (11), /* mult. */
1194 COSTS_N_INSNS (20), /* mult_addsub. */
1195 COSTS_N_INSNS (30), /* fma. */
1196 COSTS_N_INSNS (9), /* addsub. */
1197 COSTS_N_INSNS (3), /* fpconst. */
1198 COSTS_N_INSNS (3), /* neg. */
1199 COSTS_N_INSNS (6), /* compare. */
1200 COSTS_N_INSNS (4), /* widen. */
1201 COSTS_N_INSNS (4), /* narrow. */
1202 COSTS_N_INSNS (8), /* toint. */
1203 COSTS_N_INSNS (8), /* fromint. */
1204 COSTS_N_INSNS (8) /* roundint. */
1206 /* FP DFmode */
1208 COSTS_N_INSNS (64), /* div. */
1209 COSTS_N_INSNS (16), /* mult. */
1210 COSTS_N_INSNS (25), /* mult_addsub. */
1211 COSTS_N_INSNS (30), /* fma. */
1212 COSTS_N_INSNS (9), /* addsub. */
1213 COSTS_N_INSNS (3), /* fpconst. */
1214 COSTS_N_INSNS (3), /* neg. */
1215 COSTS_N_INSNS (6), /* compare. */
1216 COSTS_N_INSNS (6), /* widen. */
1217 COSTS_N_INSNS (6), /* narrow. */
1218 COSTS_N_INSNS (8), /* toint. */
1219 COSTS_N_INSNS (8), /* fromint. */
1220 COSTS_N_INSNS (8) /* roundint. */
1223 /* Vector */
1225 COSTS_N_INSNS (1) /* alu. */
1229 const struct cpu_cost_table cortexa5_extra_costs =
1231 /* ALU */
1233 0, /* arith. */
1234 0, /* logical. */
1235 COSTS_N_INSNS (1), /* shift. */
1236 COSTS_N_INSNS (1), /* shift_reg. */
1237 COSTS_N_INSNS (1), /* arith_shift. */
1238 COSTS_N_INSNS (1), /* arith_shift_reg. */
1239 COSTS_N_INSNS (1), /* log_shift. */
1240 COSTS_N_INSNS (1), /* log_shift_reg. */
1241 COSTS_N_INSNS (1), /* extend. */
1242 COSTS_N_INSNS (1), /* extend_arith. */
1243 COSTS_N_INSNS (1), /* bfi. */
1244 COSTS_N_INSNS (1), /* bfx. */
1245 COSTS_N_INSNS (1), /* clz. */
1246 COSTS_N_INSNS (1), /* rev. */
1247 0, /* non_exec. */
1248 true /* non_exec_costs_exec. */
1252 /* MULT SImode */
1254 0, /* simple. */
1255 COSTS_N_INSNS (1), /* flag_setting. */
1256 COSTS_N_INSNS (1), /* extend. */
1257 COSTS_N_INSNS (1), /* add. */
1258 COSTS_N_INSNS (1), /* extend_add. */
1259 COSTS_N_INSNS (7) /* idiv. */
1261 /* MULT DImode */
1263 0, /* simple (N/A). */
1264 0, /* flag_setting (N/A). */
1265 COSTS_N_INSNS (1), /* extend. */
1266 0, /* add. */
1267 COSTS_N_INSNS (2), /* extend_add. */
1268 0 /* idiv (N/A). */
1271 /* LD/ST */
1273 COSTS_N_INSNS (1), /* load. */
1274 COSTS_N_INSNS (1), /* load_sign_extend. */
1275 COSTS_N_INSNS (6), /* ldrd. */
1276 COSTS_N_INSNS (1), /* ldm_1st. */
1277 1, /* ldm_regs_per_insn_1st. */
1278 2, /* ldm_regs_per_insn_subsequent. */
1279 COSTS_N_INSNS (2), /* loadf. */
1280 COSTS_N_INSNS (4), /* loadd. */
1281 COSTS_N_INSNS (1), /* load_unaligned. */
1282 COSTS_N_INSNS (1), /* store. */
1283 COSTS_N_INSNS (3), /* strd. */
1284 COSTS_N_INSNS (1), /* stm_1st. */
1285 1, /* stm_regs_per_insn_1st. */
1286 2, /* stm_regs_per_insn_subsequent. */
1287 COSTS_N_INSNS (2), /* storef. */
1288 COSTS_N_INSNS (2), /* stored. */
1289 COSTS_N_INSNS (1), /* store_unaligned. */
1290 COSTS_N_INSNS (1), /* loadv. */
1291 COSTS_N_INSNS (1) /* storev. */
1294 /* FP SFmode */
1296 COSTS_N_INSNS (15), /* div. */
1297 COSTS_N_INSNS (3), /* mult. */
1298 COSTS_N_INSNS (7), /* mult_addsub. */
1299 COSTS_N_INSNS (7), /* fma. */
1300 COSTS_N_INSNS (3), /* addsub. */
1301 COSTS_N_INSNS (3), /* fpconst. */
1302 COSTS_N_INSNS (3), /* neg. */
1303 COSTS_N_INSNS (3), /* compare. */
1304 COSTS_N_INSNS (3), /* widen. */
1305 COSTS_N_INSNS (3), /* narrow. */
1306 COSTS_N_INSNS (3), /* toint. */
1307 COSTS_N_INSNS (3), /* fromint. */
1308 COSTS_N_INSNS (3) /* roundint. */
1310 /* FP DFmode */
1312 COSTS_N_INSNS (30), /* div. */
1313 COSTS_N_INSNS (6), /* mult. */
1314 COSTS_N_INSNS (10), /* mult_addsub. */
1315 COSTS_N_INSNS (7), /* fma. */
1316 COSTS_N_INSNS (3), /* addsub. */
1317 COSTS_N_INSNS (3), /* fpconst. */
1318 COSTS_N_INSNS (3), /* neg. */
1319 COSTS_N_INSNS (3), /* compare. */
1320 COSTS_N_INSNS (3), /* widen. */
1321 COSTS_N_INSNS (3), /* narrow. */
1322 COSTS_N_INSNS (3), /* toint. */
1323 COSTS_N_INSNS (3), /* fromint. */
1324 COSTS_N_INSNS (3) /* roundint. */
1327 /* Vector */
1329 COSTS_N_INSNS (1) /* alu. */
1334 const struct cpu_cost_table cortexa7_extra_costs =
1336 /* ALU */
1338 0, /* arith. */
1339 0, /* logical. */
1340 COSTS_N_INSNS (1), /* shift. */
1341 COSTS_N_INSNS (1), /* shift_reg. */
1342 COSTS_N_INSNS (1), /* arith_shift. */
1343 COSTS_N_INSNS (1), /* arith_shift_reg. */
1344 COSTS_N_INSNS (1), /* log_shift. */
1345 COSTS_N_INSNS (1), /* log_shift_reg. */
1346 COSTS_N_INSNS (1), /* extend. */
1347 COSTS_N_INSNS (1), /* extend_arith. */
1348 COSTS_N_INSNS (1), /* bfi. */
1349 COSTS_N_INSNS (1), /* bfx. */
1350 COSTS_N_INSNS (1), /* clz. */
1351 COSTS_N_INSNS (1), /* rev. */
1352 0, /* non_exec. */
1353 true /* non_exec_costs_exec. */
1357 /* MULT SImode */
1359 0, /* simple. */
1360 COSTS_N_INSNS (1), /* flag_setting. */
1361 COSTS_N_INSNS (1), /* extend. */
1362 COSTS_N_INSNS (1), /* add. */
1363 COSTS_N_INSNS (1), /* extend_add. */
1364 COSTS_N_INSNS (7) /* idiv. */
1366 /* MULT DImode */
1368 0, /* simple (N/A). */
1369 0, /* flag_setting (N/A). */
1370 COSTS_N_INSNS (1), /* extend. */
1371 0, /* add. */
1372 COSTS_N_INSNS (2), /* extend_add. */
1373 0 /* idiv (N/A). */
1376 /* LD/ST */
1378 COSTS_N_INSNS (1), /* load. */
1379 COSTS_N_INSNS (1), /* load_sign_extend. */
1380 COSTS_N_INSNS (3), /* ldrd. */
1381 COSTS_N_INSNS (1), /* ldm_1st. */
1382 1, /* ldm_regs_per_insn_1st. */
1383 2, /* ldm_regs_per_insn_subsequent. */
1384 COSTS_N_INSNS (2), /* loadf. */
1385 COSTS_N_INSNS (2), /* loadd. */
1386 COSTS_N_INSNS (1), /* load_unaligned. */
1387 COSTS_N_INSNS (1), /* store. */
1388 COSTS_N_INSNS (3), /* strd. */
1389 COSTS_N_INSNS (1), /* stm_1st. */
1390 1, /* stm_regs_per_insn_1st. */
1391 2, /* stm_regs_per_insn_subsequent. */
1392 COSTS_N_INSNS (2), /* storef. */
1393 COSTS_N_INSNS (2), /* stored. */
1394 COSTS_N_INSNS (1), /* store_unaligned. */
1395 COSTS_N_INSNS (1), /* loadv. */
1396 COSTS_N_INSNS (1) /* storev. */
1399 /* FP SFmode */
1401 COSTS_N_INSNS (15), /* div. */
1402 COSTS_N_INSNS (3), /* mult. */
1403 COSTS_N_INSNS (7), /* mult_addsub. */
1404 COSTS_N_INSNS (7), /* fma. */
1405 COSTS_N_INSNS (3), /* addsub. */
1406 COSTS_N_INSNS (3), /* fpconst. */
1407 COSTS_N_INSNS (3), /* neg. */
1408 COSTS_N_INSNS (3), /* compare. */
1409 COSTS_N_INSNS (3), /* widen. */
1410 COSTS_N_INSNS (3), /* narrow. */
1411 COSTS_N_INSNS (3), /* toint. */
1412 COSTS_N_INSNS (3), /* fromint. */
1413 COSTS_N_INSNS (3) /* roundint. */
1415 /* FP DFmode */
1417 COSTS_N_INSNS (30), /* div. */
1418 COSTS_N_INSNS (6), /* mult. */
1419 COSTS_N_INSNS (10), /* mult_addsub. */
1420 COSTS_N_INSNS (7), /* fma. */
1421 COSTS_N_INSNS (3), /* addsub. */
1422 COSTS_N_INSNS (3), /* fpconst. */
1423 COSTS_N_INSNS (3), /* neg. */
1424 COSTS_N_INSNS (3), /* compare. */
1425 COSTS_N_INSNS (3), /* widen. */
1426 COSTS_N_INSNS (3), /* narrow. */
1427 COSTS_N_INSNS (3), /* toint. */
1428 COSTS_N_INSNS (3), /* fromint. */
1429 COSTS_N_INSNS (3) /* roundint. */
1432 /* Vector */
1434 COSTS_N_INSNS (1) /* alu. */
1438 const struct cpu_cost_table cortexa12_extra_costs =
1440 /* ALU */
1442 0, /* arith. */
1443 0, /* logical. */
1444 0, /* shift. */
1445 COSTS_N_INSNS (1), /* shift_reg. */
1446 COSTS_N_INSNS (1), /* arith_shift. */
1447 COSTS_N_INSNS (1), /* arith_shift_reg. */
1448 COSTS_N_INSNS (1), /* log_shift. */
1449 COSTS_N_INSNS (1), /* log_shift_reg. */
1450 0, /* extend. */
1451 COSTS_N_INSNS (1), /* extend_arith. */
1452 0, /* bfi. */
1453 COSTS_N_INSNS (1), /* bfx. */
1454 COSTS_N_INSNS (1), /* clz. */
1455 COSTS_N_INSNS (1), /* rev. */
1456 0, /* non_exec. */
1457 true /* non_exec_costs_exec. */
1459 /* MULT SImode */
1462 COSTS_N_INSNS (2), /* simple. */
1463 COSTS_N_INSNS (3), /* flag_setting. */
1464 COSTS_N_INSNS (2), /* extend. */
1465 COSTS_N_INSNS (3), /* add. */
1466 COSTS_N_INSNS (2), /* extend_add. */
1467 COSTS_N_INSNS (18) /* idiv. */
1469 /* MULT DImode */
1471 0, /* simple (N/A). */
1472 0, /* flag_setting (N/A). */
1473 COSTS_N_INSNS (3), /* extend. */
1474 0, /* add (N/A). */
1475 COSTS_N_INSNS (3), /* extend_add. */
1476 0 /* idiv (N/A). */
1479 /* LD/ST */
1481 COSTS_N_INSNS (3), /* load. */
1482 COSTS_N_INSNS (3), /* load_sign_extend. */
1483 COSTS_N_INSNS (3), /* ldrd. */
1484 COSTS_N_INSNS (3), /* ldm_1st. */
1485 1, /* ldm_regs_per_insn_1st. */
1486 2, /* ldm_regs_per_insn_subsequent. */
1487 COSTS_N_INSNS (3), /* loadf. */
1488 COSTS_N_INSNS (3), /* loadd. */
1489 0, /* load_unaligned. */
1490 0, /* store. */
1491 0, /* strd. */
1492 0, /* stm_1st. */
1493 1, /* stm_regs_per_insn_1st. */
1494 2, /* stm_regs_per_insn_subsequent. */
1495 COSTS_N_INSNS (2), /* storef. */
1496 COSTS_N_INSNS (2), /* stored. */
1497 0, /* store_unaligned. */
1498 COSTS_N_INSNS (1), /* loadv. */
1499 COSTS_N_INSNS (1) /* storev. */
1502 /* FP SFmode */
1504 COSTS_N_INSNS (17), /* div. */
1505 COSTS_N_INSNS (4), /* mult. */
1506 COSTS_N_INSNS (8), /* mult_addsub. */
1507 COSTS_N_INSNS (8), /* fma. */
1508 COSTS_N_INSNS (4), /* addsub. */
1509 COSTS_N_INSNS (2), /* fpconst. */
1510 COSTS_N_INSNS (2), /* neg. */
1511 COSTS_N_INSNS (2), /* compare. */
1512 COSTS_N_INSNS (4), /* widen. */
1513 COSTS_N_INSNS (4), /* narrow. */
1514 COSTS_N_INSNS (4), /* toint. */
1515 COSTS_N_INSNS (4), /* fromint. */
1516 COSTS_N_INSNS (4) /* roundint. */
1518 /* FP DFmode */
1520 COSTS_N_INSNS (31), /* div. */
1521 COSTS_N_INSNS (4), /* mult. */
1522 COSTS_N_INSNS (8), /* mult_addsub. */
1523 COSTS_N_INSNS (8), /* fma. */
1524 COSTS_N_INSNS (4), /* addsub. */
1525 COSTS_N_INSNS (2), /* fpconst. */
1526 COSTS_N_INSNS (2), /* neg. */
1527 COSTS_N_INSNS (2), /* compare. */
1528 COSTS_N_INSNS (4), /* widen. */
1529 COSTS_N_INSNS (4), /* narrow. */
1530 COSTS_N_INSNS (4), /* toint. */
1531 COSTS_N_INSNS (4), /* fromint. */
1532 COSTS_N_INSNS (4) /* roundint. */
1535 /* Vector */
1537 COSTS_N_INSNS (1) /* alu. */
1541 const struct cpu_cost_table cortexa15_extra_costs =
1543 /* ALU */
1545 0, /* arith. */
1546 0, /* logical. */
1547 0, /* shift. */
1548 0, /* shift_reg. */
1549 COSTS_N_INSNS (1), /* arith_shift. */
1550 COSTS_N_INSNS (1), /* arith_shift_reg. */
1551 COSTS_N_INSNS (1), /* log_shift. */
1552 COSTS_N_INSNS (1), /* log_shift_reg. */
1553 0, /* extend. */
1554 COSTS_N_INSNS (1), /* extend_arith. */
1555 COSTS_N_INSNS (1), /* bfi. */
1556 0, /* bfx. */
1557 0, /* clz. */
1558 0, /* rev. */
1559 0, /* non_exec. */
1560 true /* non_exec_costs_exec. */
1562 /* MULT SImode */
1565 COSTS_N_INSNS (2), /* simple. */
1566 COSTS_N_INSNS (3), /* flag_setting. */
1567 COSTS_N_INSNS (2), /* extend. */
1568 COSTS_N_INSNS (2), /* add. */
1569 COSTS_N_INSNS (2), /* extend_add. */
1570 COSTS_N_INSNS (18) /* idiv. */
1572 /* MULT DImode */
1574 0, /* simple (N/A). */
1575 0, /* flag_setting (N/A). */
1576 COSTS_N_INSNS (3), /* extend. */
1577 0, /* add (N/A). */
1578 COSTS_N_INSNS (3), /* extend_add. */
1579 0 /* idiv (N/A). */
1582 /* LD/ST */
1584 COSTS_N_INSNS (3), /* load. */
1585 COSTS_N_INSNS (3), /* load_sign_extend. */
1586 COSTS_N_INSNS (3), /* ldrd. */
1587 COSTS_N_INSNS (4), /* ldm_1st. */
1588 1, /* ldm_regs_per_insn_1st. */
1589 2, /* ldm_regs_per_insn_subsequent. */
1590 COSTS_N_INSNS (4), /* loadf. */
1591 COSTS_N_INSNS (4), /* loadd. */
1592 0, /* load_unaligned. */
1593 0, /* store. */
1594 0, /* strd. */
1595 COSTS_N_INSNS (1), /* stm_1st. */
1596 1, /* stm_regs_per_insn_1st. */
1597 2, /* stm_regs_per_insn_subsequent. */
1598 0, /* storef. */
1599 0, /* stored. */
1600 0, /* store_unaligned. */
1601 COSTS_N_INSNS (1), /* loadv. */
1602 COSTS_N_INSNS (1) /* storev. */
1605 /* FP SFmode */
1607 COSTS_N_INSNS (17), /* div. */
1608 COSTS_N_INSNS (4), /* mult. */
1609 COSTS_N_INSNS (8), /* mult_addsub. */
1610 COSTS_N_INSNS (8), /* fma. */
1611 COSTS_N_INSNS (4), /* addsub. */
1612 COSTS_N_INSNS (2), /* fpconst. */
1613 COSTS_N_INSNS (2), /* neg. */
1614 COSTS_N_INSNS (5), /* compare. */
1615 COSTS_N_INSNS (4), /* widen. */
1616 COSTS_N_INSNS (4), /* narrow. */
1617 COSTS_N_INSNS (4), /* toint. */
1618 COSTS_N_INSNS (4), /* fromint. */
1619 COSTS_N_INSNS (4) /* roundint. */
1621 /* FP DFmode */
1623 COSTS_N_INSNS (31), /* div. */
1624 COSTS_N_INSNS (4), /* mult. */
1625 COSTS_N_INSNS (8), /* mult_addsub. */
1626 COSTS_N_INSNS (8), /* fma. */
1627 COSTS_N_INSNS (4), /* addsub. */
1628 COSTS_N_INSNS (2), /* fpconst. */
1629 COSTS_N_INSNS (2), /* neg. */
1630 COSTS_N_INSNS (2), /* compare. */
1631 COSTS_N_INSNS (4), /* widen. */
1632 COSTS_N_INSNS (4), /* narrow. */
1633 COSTS_N_INSNS (4), /* toint. */
1634 COSTS_N_INSNS (4), /* fromint. */
1635 COSTS_N_INSNS (4) /* roundint. */
1638 /* Vector */
1640 COSTS_N_INSNS (1) /* alu. */
1644 const struct cpu_cost_table v7m_extra_costs =
1646 /* ALU */
1648 0, /* arith. */
1649 0, /* logical. */
1650 0, /* shift. */
1651 0, /* shift_reg. */
1652 0, /* arith_shift. */
1653 COSTS_N_INSNS (1), /* arith_shift_reg. */
1654 0, /* log_shift. */
1655 COSTS_N_INSNS (1), /* log_shift_reg. */
1656 0, /* extend. */
1657 COSTS_N_INSNS (1), /* extend_arith. */
1658 0, /* bfi. */
1659 0, /* bfx. */
1660 0, /* clz. */
1661 0, /* rev. */
1662 COSTS_N_INSNS (1), /* non_exec. */
1663 false /* non_exec_costs_exec. */
1666 /* MULT SImode */
1668 COSTS_N_INSNS (1), /* simple. */
1669 COSTS_N_INSNS (1), /* flag_setting. */
1670 COSTS_N_INSNS (2), /* extend. */
1671 COSTS_N_INSNS (1), /* add. */
1672 COSTS_N_INSNS (3), /* extend_add. */
1673 COSTS_N_INSNS (8) /* idiv. */
1675 /* MULT DImode */
1677 0, /* simple (N/A). */
1678 0, /* flag_setting (N/A). */
1679 COSTS_N_INSNS (2), /* extend. */
1680 0, /* add (N/A). */
1681 COSTS_N_INSNS (3), /* extend_add. */
1682 0 /* idiv (N/A). */
1685 /* LD/ST */
1687 COSTS_N_INSNS (2), /* load. */
1688 0, /* load_sign_extend. */
1689 COSTS_N_INSNS (3), /* ldrd. */
1690 COSTS_N_INSNS (2), /* ldm_1st. */
1691 1, /* ldm_regs_per_insn_1st. */
1692 1, /* ldm_regs_per_insn_subsequent. */
1693 COSTS_N_INSNS (2), /* loadf. */
1694 COSTS_N_INSNS (3), /* loadd. */
1695 COSTS_N_INSNS (1), /* load_unaligned. */
1696 COSTS_N_INSNS (2), /* store. */
1697 COSTS_N_INSNS (3), /* strd. */
1698 COSTS_N_INSNS (2), /* stm_1st. */
1699 1, /* stm_regs_per_insn_1st. */
1700 1, /* stm_regs_per_insn_subsequent. */
1701 COSTS_N_INSNS (2), /* storef. */
1702 COSTS_N_INSNS (3), /* stored. */
1703 COSTS_N_INSNS (1), /* store_unaligned. */
1704 COSTS_N_INSNS (1), /* loadv. */
1705 COSTS_N_INSNS (1) /* storev. */
1708 /* FP SFmode */
1710 COSTS_N_INSNS (7), /* div. */
1711 COSTS_N_INSNS (2), /* mult. */
1712 COSTS_N_INSNS (5), /* mult_addsub. */
1713 COSTS_N_INSNS (3), /* fma. */
1714 COSTS_N_INSNS (1), /* addsub. */
1715 0, /* fpconst. */
1716 0, /* neg. */
1717 0, /* compare. */
1718 0, /* widen. */
1719 0, /* narrow. */
1720 0, /* toint. */
1721 0, /* fromint. */
1722 0 /* roundint. */
1724 /* FP DFmode */
1726 COSTS_N_INSNS (15), /* div. */
1727 COSTS_N_INSNS (5), /* mult. */
1728 COSTS_N_INSNS (7), /* mult_addsub. */
1729 COSTS_N_INSNS (7), /* fma. */
1730 COSTS_N_INSNS (3), /* addsub. */
1731 0, /* fpconst. */
1732 0, /* neg. */
1733 0, /* compare. */
1734 0, /* widen. */
1735 0, /* narrow. */
1736 0, /* toint. */
1737 0, /* fromint. */
1738 0 /* roundint. */
1741 /* Vector */
1743 COSTS_N_INSNS (1) /* alu. */
1747 const struct tune_params arm_slowmul_tune =
1749 &generic_extra_costs, /* Insn extra costs. */
1750 NULL, /* Sched adj cost. */
1751 arm_default_branch_cost,
1752 &arm_default_vec_cost,
1753 3, /* Constant limit. */
1754 5, /* Max cond insns. */
1755 8, /* Memset max inline. */
1756 1, /* Issue rate. */
1757 ARM_PREFETCH_NOT_BENEFICIAL,
1758 tune_params::PREF_CONST_POOL_TRUE,
1759 tune_params::PREF_LDRD_FALSE,
1760 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1761 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1762 tune_params::DISPARAGE_FLAGS_NEITHER,
1763 tune_params::PREF_NEON_64_FALSE,
1764 tune_params::PREF_NEON_STRINGOPS_FALSE,
1765 tune_params::FUSE_NOTHING,
1766 tune_params::SCHED_AUTOPREF_OFF
1769 const struct tune_params arm_fastmul_tune =
1771 &generic_extra_costs, /* Insn extra costs. */
1772 NULL, /* Sched adj cost. */
1773 arm_default_branch_cost,
1774 &arm_default_vec_cost,
1775 1, /* Constant limit. */
1776 5, /* Max cond insns. */
1777 8, /* Memset max inline. */
1778 1, /* Issue rate. */
1779 ARM_PREFETCH_NOT_BENEFICIAL,
1780 tune_params::PREF_CONST_POOL_TRUE,
1781 tune_params::PREF_LDRD_FALSE,
1782 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1783 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1784 tune_params::DISPARAGE_FLAGS_NEITHER,
1785 tune_params::PREF_NEON_64_FALSE,
1786 tune_params::PREF_NEON_STRINGOPS_FALSE,
1787 tune_params::FUSE_NOTHING,
1788 tune_params::SCHED_AUTOPREF_OFF
1791 /* StrongARM has early execution of branches, so a sequence that is worth
1792 skipping is shorter. Set max_insns_skipped to a lower value. */
1794 const struct tune_params arm_strongarm_tune =
1796 &generic_extra_costs, /* Insn extra costs. */
1797 NULL, /* Sched adj cost. */
1798 arm_default_branch_cost,
1799 &arm_default_vec_cost,
1800 1, /* Constant limit. */
1801 3, /* Max cond insns. */
1802 8, /* Memset max inline. */
1803 1, /* Issue rate. */
1804 ARM_PREFETCH_NOT_BENEFICIAL,
1805 tune_params::PREF_CONST_POOL_TRUE,
1806 tune_params::PREF_LDRD_FALSE,
1807 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1808 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1809 tune_params::DISPARAGE_FLAGS_NEITHER,
1810 tune_params::PREF_NEON_64_FALSE,
1811 tune_params::PREF_NEON_STRINGOPS_FALSE,
1812 tune_params::FUSE_NOTHING,
1813 tune_params::SCHED_AUTOPREF_OFF
1816 const struct tune_params arm_xscale_tune =
1818 &generic_extra_costs, /* Insn extra costs. */
1819 xscale_sched_adjust_cost,
1820 arm_default_branch_cost,
1821 &arm_default_vec_cost,
1822 2, /* Constant limit. */
1823 3, /* Max cond insns. */
1824 8, /* Memset max inline. */
1825 1, /* Issue rate. */
1826 ARM_PREFETCH_NOT_BENEFICIAL,
1827 tune_params::PREF_CONST_POOL_TRUE,
1828 tune_params::PREF_LDRD_FALSE,
1829 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1830 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1831 tune_params::DISPARAGE_FLAGS_NEITHER,
1832 tune_params::PREF_NEON_64_FALSE,
1833 tune_params::PREF_NEON_STRINGOPS_FALSE,
1834 tune_params::FUSE_NOTHING,
1835 tune_params::SCHED_AUTOPREF_OFF
1838 const struct tune_params arm_9e_tune =
1840 &generic_extra_costs, /* Insn extra costs. */
1841 NULL, /* Sched adj cost. */
1842 arm_default_branch_cost,
1843 &arm_default_vec_cost,
1844 1, /* Constant limit. */
1845 5, /* Max cond insns. */
1846 8, /* Memset max inline. */
1847 1, /* Issue rate. */
1848 ARM_PREFETCH_NOT_BENEFICIAL,
1849 tune_params::PREF_CONST_POOL_TRUE,
1850 tune_params::PREF_LDRD_FALSE,
1851 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1852 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1853 tune_params::DISPARAGE_FLAGS_NEITHER,
1854 tune_params::PREF_NEON_64_FALSE,
1855 tune_params::PREF_NEON_STRINGOPS_FALSE,
1856 tune_params::FUSE_NOTHING,
1857 tune_params::SCHED_AUTOPREF_OFF
1860 const struct tune_params arm_marvell_pj4_tune =
1862 &generic_extra_costs, /* Insn extra costs. */
1863 NULL, /* Sched adj cost. */
1864 arm_default_branch_cost,
1865 &arm_default_vec_cost,
1866 1, /* Constant limit. */
1867 5, /* Max cond insns. */
1868 8, /* Memset max inline. */
1869 2, /* Issue rate. */
1870 ARM_PREFETCH_NOT_BENEFICIAL,
1871 tune_params::PREF_CONST_POOL_TRUE,
1872 tune_params::PREF_LDRD_FALSE,
1873 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1874 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1875 tune_params::DISPARAGE_FLAGS_NEITHER,
1876 tune_params::PREF_NEON_64_FALSE,
1877 tune_params::PREF_NEON_STRINGOPS_FALSE,
1878 tune_params::FUSE_NOTHING,
1879 tune_params::SCHED_AUTOPREF_OFF
1882 const struct tune_params arm_v6t2_tune =
1884 &generic_extra_costs, /* Insn extra costs. */
1885 NULL, /* Sched adj cost. */
1886 arm_default_branch_cost,
1887 &arm_default_vec_cost,
1888 1, /* Constant limit. */
1889 5, /* Max cond insns. */
1890 8, /* Memset max inline. */
1891 1, /* Issue rate. */
1892 ARM_PREFETCH_NOT_BENEFICIAL,
1893 tune_params::PREF_CONST_POOL_FALSE,
1894 tune_params::PREF_LDRD_FALSE,
1895 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1896 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1897 tune_params::DISPARAGE_FLAGS_NEITHER,
1898 tune_params::PREF_NEON_64_FALSE,
1899 tune_params::PREF_NEON_STRINGOPS_FALSE,
1900 tune_params::FUSE_NOTHING,
1901 tune_params::SCHED_AUTOPREF_OFF
1905 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1906 const struct tune_params arm_cortex_tune =
1908 &generic_extra_costs,
1909 NULL, /* Sched adj cost. */
1910 arm_default_branch_cost,
1911 &arm_default_vec_cost,
1912 1, /* Constant limit. */
1913 5, /* Max cond insns. */
1914 8, /* Memset max inline. */
1915 2, /* Issue rate. */
1916 ARM_PREFETCH_NOT_BENEFICIAL,
1917 tune_params::PREF_CONST_POOL_FALSE,
1918 tune_params::PREF_LDRD_FALSE,
1919 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1920 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1921 tune_params::DISPARAGE_FLAGS_NEITHER,
1922 tune_params::PREF_NEON_64_FALSE,
1923 tune_params::PREF_NEON_STRINGOPS_FALSE,
1924 tune_params::FUSE_NOTHING,
1925 tune_params::SCHED_AUTOPREF_OFF
1928 const struct tune_params arm_cortex_a8_tune =
1930 &cortexa8_extra_costs,
1931 NULL, /* Sched adj cost. */
1932 arm_default_branch_cost,
1933 &arm_default_vec_cost,
1934 1, /* Constant limit. */
1935 5, /* Max cond insns. */
1936 8, /* Memset max inline. */
1937 2, /* Issue rate. */
1938 ARM_PREFETCH_NOT_BENEFICIAL,
1939 tune_params::PREF_CONST_POOL_FALSE,
1940 tune_params::PREF_LDRD_FALSE,
1941 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1942 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1943 tune_params::DISPARAGE_FLAGS_NEITHER,
1944 tune_params::PREF_NEON_64_FALSE,
1945 tune_params::PREF_NEON_STRINGOPS_TRUE,
1946 tune_params::FUSE_NOTHING,
1947 tune_params::SCHED_AUTOPREF_OFF
1950 const struct tune_params arm_cortex_a7_tune =
1952 &cortexa7_extra_costs,
1953 NULL, /* Sched adj cost. */
1954 arm_default_branch_cost,
1955 &arm_default_vec_cost,
1956 1, /* Constant limit. */
1957 5, /* Max cond insns. */
1958 8, /* Memset max inline. */
1959 2, /* Issue rate. */
1960 ARM_PREFETCH_NOT_BENEFICIAL,
1961 tune_params::PREF_CONST_POOL_FALSE,
1962 tune_params::PREF_LDRD_FALSE,
1963 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1964 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1965 tune_params::DISPARAGE_FLAGS_NEITHER,
1966 tune_params::PREF_NEON_64_FALSE,
1967 tune_params::PREF_NEON_STRINGOPS_TRUE,
1968 tune_params::FUSE_NOTHING,
1969 tune_params::SCHED_AUTOPREF_OFF
1972 const struct tune_params arm_cortex_a15_tune =
1974 &cortexa15_extra_costs,
1975 NULL, /* Sched adj cost. */
1976 arm_default_branch_cost,
1977 &arm_default_vec_cost,
1978 1, /* Constant limit. */
1979 2, /* Max cond insns. */
1980 8, /* Memset max inline. */
1981 3, /* Issue rate. */
1982 ARM_PREFETCH_NOT_BENEFICIAL,
1983 tune_params::PREF_CONST_POOL_FALSE,
1984 tune_params::PREF_LDRD_TRUE,
1985 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1986 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1987 tune_params::DISPARAGE_FLAGS_ALL,
1988 tune_params::PREF_NEON_64_FALSE,
1989 tune_params::PREF_NEON_STRINGOPS_TRUE,
1990 tune_params::FUSE_NOTHING,
1991 tune_params::SCHED_AUTOPREF_FULL
1994 const struct tune_params arm_cortex_a35_tune =
1996 &cortexa53_extra_costs,
1997 NULL, /* Sched adj cost. */
1998 arm_default_branch_cost,
1999 &arm_default_vec_cost,
2000 1, /* Constant limit. */
2001 5, /* Max cond insns. */
2002 8, /* Memset max inline. */
2003 1, /* Issue rate. */
2004 ARM_PREFETCH_NOT_BENEFICIAL,
2005 tune_params::PREF_CONST_POOL_FALSE,
2006 tune_params::PREF_LDRD_FALSE,
2007 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2008 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2009 tune_params::DISPARAGE_FLAGS_NEITHER,
2010 tune_params::PREF_NEON_64_FALSE,
2011 tune_params::PREF_NEON_STRINGOPS_TRUE,
2012 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2013 tune_params::SCHED_AUTOPREF_OFF
2016 const struct tune_params arm_cortex_a53_tune =
2018 &cortexa53_extra_costs,
2019 NULL, /* Sched adj cost. */
2020 arm_default_branch_cost,
2021 &arm_default_vec_cost,
2022 1, /* Constant limit. */
2023 5, /* Max cond insns. */
2024 8, /* Memset max inline. */
2025 2, /* Issue rate. */
2026 ARM_PREFETCH_NOT_BENEFICIAL,
2027 tune_params::PREF_CONST_POOL_FALSE,
2028 tune_params::PREF_LDRD_FALSE,
2029 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2030 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2031 tune_params::DISPARAGE_FLAGS_NEITHER,
2032 tune_params::PREF_NEON_64_FALSE,
2033 tune_params::PREF_NEON_STRINGOPS_TRUE,
2034 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2035 tune_params::SCHED_AUTOPREF_OFF
2038 const struct tune_params arm_cortex_a57_tune =
2040 &cortexa57_extra_costs,
2041 NULL, /* Sched adj cost. */
2042 arm_default_branch_cost,
2043 &arm_default_vec_cost,
2044 1, /* Constant limit. */
2045 2, /* Max cond insns. */
2046 8, /* Memset max inline. */
2047 3, /* Issue rate. */
2048 ARM_PREFETCH_NOT_BENEFICIAL,
2049 tune_params::PREF_CONST_POOL_FALSE,
2050 tune_params::PREF_LDRD_TRUE,
2051 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2052 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2053 tune_params::DISPARAGE_FLAGS_ALL,
2054 tune_params::PREF_NEON_64_FALSE,
2055 tune_params::PREF_NEON_STRINGOPS_TRUE,
2056 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2057 tune_params::SCHED_AUTOPREF_FULL
2060 const struct tune_params arm_exynosm1_tune =
2062 &exynosm1_extra_costs,
2063 NULL, /* Sched adj cost. */
2064 arm_default_branch_cost,
2065 &arm_default_vec_cost,
2066 1, /* Constant limit. */
2067 2, /* Max cond insns. */
2068 8, /* Memset max inline. */
2069 3, /* Issue rate. */
2070 ARM_PREFETCH_NOT_BENEFICIAL,
2071 tune_params::PREF_CONST_POOL_FALSE,
2072 tune_params::PREF_LDRD_TRUE,
2073 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2074 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2075 tune_params::DISPARAGE_FLAGS_ALL,
2076 tune_params::PREF_NEON_64_FALSE,
2077 tune_params::PREF_NEON_STRINGOPS_TRUE,
2078 tune_params::FUSE_NOTHING,
2079 tune_params::SCHED_AUTOPREF_OFF
2082 const struct tune_params arm_xgene1_tune =
2084 &xgene1_extra_costs,
2085 NULL, /* Sched adj cost. */
2086 arm_default_branch_cost,
2087 &arm_default_vec_cost,
2088 1, /* Constant limit. */
2089 2, /* Max cond insns. */
2090 32, /* Memset max inline. */
2091 4, /* Issue rate. */
2092 ARM_PREFETCH_NOT_BENEFICIAL,
2093 tune_params::PREF_CONST_POOL_FALSE,
2094 tune_params::PREF_LDRD_TRUE,
2095 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2096 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2097 tune_params::DISPARAGE_FLAGS_ALL,
2098 tune_params::PREF_NEON_64_FALSE,
2099 tune_params::PREF_NEON_STRINGOPS_FALSE,
2100 tune_params::FUSE_NOTHING,
2101 tune_params::SCHED_AUTOPREF_OFF
2104 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2105 less appealing. Set max_insns_skipped to a low value. */
2107 const struct tune_params arm_cortex_a5_tune =
2109 &cortexa5_extra_costs,
2110 NULL, /* Sched adj cost. */
2111 arm_cortex_a5_branch_cost,
2112 &arm_default_vec_cost,
2113 1, /* Constant limit. */
2114 1, /* Max cond insns. */
2115 8, /* Memset max inline. */
2116 2, /* Issue rate. */
2117 ARM_PREFETCH_NOT_BENEFICIAL,
2118 tune_params::PREF_CONST_POOL_FALSE,
2119 tune_params::PREF_LDRD_FALSE,
2120 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2121 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2122 tune_params::DISPARAGE_FLAGS_NEITHER,
2123 tune_params::PREF_NEON_64_FALSE,
2124 tune_params::PREF_NEON_STRINGOPS_TRUE,
2125 tune_params::FUSE_NOTHING,
2126 tune_params::SCHED_AUTOPREF_OFF
2129 const struct tune_params arm_cortex_a9_tune =
2131 &cortexa9_extra_costs,
2132 cortex_a9_sched_adjust_cost,
2133 arm_default_branch_cost,
2134 &arm_default_vec_cost,
2135 1, /* Constant limit. */
2136 5, /* Max cond insns. */
2137 8, /* Memset max inline. */
2138 2, /* Issue rate. */
2139 ARM_PREFETCH_BENEFICIAL(4,32,32),
2140 tune_params::PREF_CONST_POOL_FALSE,
2141 tune_params::PREF_LDRD_FALSE,
2142 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2143 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2144 tune_params::DISPARAGE_FLAGS_NEITHER,
2145 tune_params::PREF_NEON_64_FALSE,
2146 tune_params::PREF_NEON_STRINGOPS_FALSE,
2147 tune_params::FUSE_NOTHING,
2148 tune_params::SCHED_AUTOPREF_OFF
2151 const struct tune_params arm_cortex_a12_tune =
2153 &cortexa12_extra_costs,
2154 NULL, /* Sched adj cost. */
2155 arm_default_branch_cost,
2156 &arm_default_vec_cost, /* Vectorizer costs. */
2157 1, /* Constant limit. */
2158 2, /* Max cond insns. */
2159 8, /* Memset max inline. */
2160 2, /* Issue rate. */
2161 ARM_PREFETCH_NOT_BENEFICIAL,
2162 tune_params::PREF_CONST_POOL_FALSE,
2163 tune_params::PREF_LDRD_TRUE,
2164 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2165 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2166 tune_params::DISPARAGE_FLAGS_ALL,
2167 tune_params::PREF_NEON_64_FALSE,
2168 tune_params::PREF_NEON_STRINGOPS_TRUE,
2169 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2170 tune_params::SCHED_AUTOPREF_OFF
2173 const struct tune_params arm_cortex_a73_tune =
2175 &cortexa57_extra_costs,
2176 NULL, /* Sched adj cost. */
2177 arm_default_branch_cost,
2178 &arm_default_vec_cost, /* Vectorizer costs. */
2179 1, /* Constant limit. */
2180 2, /* Max cond insns. */
2181 8, /* Memset max inline. */
2182 2, /* Issue rate. */
2183 ARM_PREFETCH_NOT_BENEFICIAL,
2184 tune_params::PREF_CONST_POOL_FALSE,
2185 tune_params::PREF_LDRD_TRUE,
2186 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2187 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2188 tune_params::DISPARAGE_FLAGS_ALL,
2189 tune_params::PREF_NEON_64_FALSE,
2190 tune_params::PREF_NEON_STRINGOPS_TRUE,
2191 FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2192 tune_params::SCHED_AUTOPREF_FULL
2195 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2196 cycle to execute each. An LDR from the constant pool also takes two cycles
2197 to execute, but mildly increases pipelining opportunity (consecutive
2198 loads/stores can be pipelined together, saving one cycle), and may also
2199 improve icache utilisation. Hence we prefer the constant pool for such
2200 processors. */
2202 const struct tune_params arm_v7m_tune =
2204 &v7m_extra_costs,
2205 NULL, /* Sched adj cost. */
2206 arm_cortex_m_branch_cost,
2207 &arm_default_vec_cost,
2208 1, /* Constant limit. */
2209 2, /* Max cond insns. */
2210 8, /* Memset max inline. */
2211 1, /* Issue rate. */
2212 ARM_PREFETCH_NOT_BENEFICIAL,
2213 tune_params::PREF_CONST_POOL_TRUE,
2214 tune_params::PREF_LDRD_FALSE,
2215 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2216 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2217 tune_params::DISPARAGE_FLAGS_NEITHER,
2218 tune_params::PREF_NEON_64_FALSE,
2219 tune_params::PREF_NEON_STRINGOPS_FALSE,
2220 tune_params::FUSE_NOTHING,
2221 tune_params::SCHED_AUTOPREF_OFF
2224 /* Cortex-M7 tuning. */
2226 const struct tune_params arm_cortex_m7_tune =
2228 &v7m_extra_costs,
2229 NULL, /* Sched adj cost. */
2230 arm_cortex_m7_branch_cost,
2231 &arm_default_vec_cost,
2232 0, /* Constant limit. */
2233 1, /* Max cond insns. */
2234 8, /* Memset max inline. */
2235 2, /* Issue rate. */
2236 ARM_PREFETCH_NOT_BENEFICIAL,
2237 tune_params::PREF_CONST_POOL_TRUE,
2238 tune_params::PREF_LDRD_FALSE,
2239 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2240 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2241 tune_params::DISPARAGE_FLAGS_NEITHER,
2242 tune_params::PREF_NEON_64_FALSE,
2243 tune_params::PREF_NEON_STRINGOPS_FALSE,
2244 tune_params::FUSE_NOTHING,
2245 tune_params::SCHED_AUTOPREF_OFF
2248 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2249 arm_v6t2_tune. It is used for cortex-m0, cortex-m1, cortex-m0plus and
2250 cortex-m23. */
2251 const struct tune_params arm_v6m_tune =
2253 &generic_extra_costs, /* Insn extra costs. */
2254 NULL, /* Sched adj cost. */
2255 arm_default_branch_cost,
2256 &arm_default_vec_cost, /* Vectorizer costs. */
2257 1, /* Constant limit. */
2258 5, /* Max cond insns. */
2259 8, /* Memset max inline. */
2260 1, /* Issue rate. */
2261 ARM_PREFETCH_NOT_BENEFICIAL,
2262 tune_params::PREF_CONST_POOL_FALSE,
2263 tune_params::PREF_LDRD_FALSE,
2264 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2265 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2266 tune_params::DISPARAGE_FLAGS_NEITHER,
2267 tune_params::PREF_NEON_64_FALSE,
2268 tune_params::PREF_NEON_STRINGOPS_FALSE,
2269 tune_params::FUSE_NOTHING,
2270 tune_params::SCHED_AUTOPREF_OFF
2273 const struct tune_params arm_fa726te_tune =
2275 &generic_extra_costs, /* Insn extra costs. */
2276 fa726te_sched_adjust_cost,
2277 arm_default_branch_cost,
2278 &arm_default_vec_cost,
2279 1, /* Constant limit. */
2280 5, /* Max cond insns. */
2281 8, /* Memset max inline. */
2282 2, /* Issue rate. */
2283 ARM_PREFETCH_NOT_BENEFICIAL,
2284 tune_params::PREF_CONST_POOL_TRUE,
2285 tune_params::PREF_LDRD_FALSE,
2286 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2287 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2288 tune_params::DISPARAGE_FLAGS_NEITHER,
2289 tune_params::PREF_NEON_64_FALSE,
2290 tune_params::PREF_NEON_STRINGOPS_FALSE,
2291 tune_params::FUSE_NOTHING,
2292 tune_params::SCHED_AUTOPREF_OFF
2295 /* Auto-generated CPU, FPU and architecture tables. */
2296 #include "arm-cpu-data.h"
2298 /* The name of the preprocessor macro to define for this architecture. PROFILE
2299 is replaced by the architecture name (eg. 8A) in arm_option_override () and
2300 is thus chosen to be big enough to hold the longest architecture name. */
2302 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2304 /* Supported TLS relocations. */
2306 enum tls_reloc {
2307 TLS_GD32,
2308 TLS_LDM32,
2309 TLS_LDO32,
2310 TLS_IE32,
2311 TLS_LE32,
2312 TLS_DESCSEQ /* GNU scheme */
2315 /* The maximum number of insns to be used when loading a constant. */
2316 inline static int
2317 arm_constant_limit (bool size_p)
2319 return size_p ? 1 : current_tune->constant_limit;
2322 /* Emit an insn that's a simple single-set. Both the operands must be known
2323 to be valid. */
2324 inline static rtx_insn *
2325 emit_set_insn (rtx x, rtx y)
2327 return emit_insn (gen_rtx_SET (x, y));
2330 /* Return the number of bits set in VALUE. */
2331 static unsigned
2332 bit_count (unsigned long value)
2334 unsigned long count = 0;
2336 while (value)
2338 count++;
2339 value &= value - 1; /* Clear the least-significant set bit. */
2342 return count;
2345 /* Return the number of bits set in BMAP. */
2346 static unsigned
2347 bitmap_popcount (const sbitmap bmap)
2349 unsigned int count = 0;
2350 unsigned int n = 0;
2351 sbitmap_iterator sbi;
2353 EXECUTE_IF_SET_IN_BITMAP (bmap, 0, n, sbi)
2354 count++;
2355 return count;
2358 typedef struct
2360 machine_mode mode;
2361 const char *name;
2362 } arm_fixed_mode_set;
2364 /* A small helper for setting fixed-point library libfuncs. */
2366 static void
2367 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2368 const char *funcname, const char *modename,
2369 int num_suffix)
2371 char buffer[50];
2373 if (num_suffix == 0)
2374 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2375 else
2376 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2378 set_optab_libfunc (optable, mode, buffer);
2381 static void
2382 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2383 machine_mode from, const char *funcname,
2384 const char *toname, const char *fromname)
2386 char buffer[50];
2387 const char *maybe_suffix_2 = "";
2389 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2390 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2391 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2392 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2393 maybe_suffix_2 = "2";
2395 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2396 maybe_suffix_2);
2398 set_conv_libfunc (optable, to, from, buffer);
2401 /* Set up library functions unique to ARM. */
2403 static void
2404 arm_init_libfuncs (void)
2406 /* For Linux, we have access to kernel support for atomic operations. */
2407 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2408 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2410 /* There are no special library functions unless we are using the
2411 ARM BPABI. */
2412 if (!TARGET_BPABI)
2413 return;
2415 /* The functions below are described in Section 4 of the "Run-Time
2416 ABI for the ARM architecture", Version 1.0. */
2418 /* Double-precision floating-point arithmetic. Table 2. */
2419 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2420 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2421 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2422 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2423 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2425 /* Double-precision comparisons. Table 3. */
2426 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2427 set_optab_libfunc (ne_optab, DFmode, NULL);
2428 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2429 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2430 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2431 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2432 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2434 /* Single-precision floating-point arithmetic. Table 4. */
2435 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2436 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2437 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2438 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2439 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2441 /* Single-precision comparisons. Table 5. */
2442 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2443 set_optab_libfunc (ne_optab, SFmode, NULL);
2444 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2445 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2446 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2447 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2448 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2450 /* Floating-point to integer conversions. Table 6. */
2451 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2452 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2453 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2454 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2455 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2456 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2457 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2458 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2460 /* Conversions between floating types. Table 7. */
2461 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2462 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2464 /* Integer to floating-point conversions. Table 8. */
2465 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2466 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2467 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2468 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2469 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2470 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2471 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2472 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2474 /* Long long. Table 9. */
2475 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2476 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2477 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2478 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2479 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2480 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2481 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2482 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2484 /* Integer (32/32->32) division. \S 4.3.1. */
2485 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2486 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2488 /* The divmod functions are designed so that they can be used for
2489 plain division, even though they return both the quotient and the
2490 remainder. The quotient is returned in the usual location (i.e.,
2491 r0 for SImode, {r0, r1} for DImode), just as would be expected
2492 for an ordinary division routine. Because the AAPCS calling
2493 conventions specify that all of { r0, r1, r2, r3 } are
2494 callee-saved registers, there is no need to tell the compiler
2495 explicitly that those registers are clobbered by these
2496 routines. */
2497 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2498 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2500 /* For SImode division the ABI provides div-without-mod routines,
2501 which are faster. */
2502 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2503 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2505 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2506 divmod libcalls instead. */
2507 set_optab_libfunc (smod_optab, DImode, NULL);
2508 set_optab_libfunc (umod_optab, DImode, NULL);
2509 set_optab_libfunc (smod_optab, SImode, NULL);
2510 set_optab_libfunc (umod_optab, SImode, NULL);
2512 /* Half-precision float operations. The compiler handles all operations
2513 with NULL libfuncs by converting the SFmode. */
2514 switch (arm_fp16_format)
2516 case ARM_FP16_FORMAT_IEEE:
2517 case ARM_FP16_FORMAT_ALTERNATIVE:
2519 /* Conversions. */
2520 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2521 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2522 ? "__gnu_f2h_ieee"
2523 : "__gnu_f2h_alternative"));
2524 set_conv_libfunc (sext_optab, SFmode, HFmode,
2525 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2526 ? "__gnu_h2f_ieee"
2527 : "__gnu_h2f_alternative"));
2529 set_conv_libfunc (trunc_optab, HFmode, DFmode,
2530 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2531 ? "__gnu_d2h_ieee"
2532 : "__gnu_d2h_alternative"));
2534 /* Arithmetic. */
2535 set_optab_libfunc (add_optab, HFmode, NULL);
2536 set_optab_libfunc (sdiv_optab, HFmode, NULL);
2537 set_optab_libfunc (smul_optab, HFmode, NULL);
2538 set_optab_libfunc (neg_optab, HFmode, NULL);
2539 set_optab_libfunc (sub_optab, HFmode, NULL);
2541 /* Comparisons. */
2542 set_optab_libfunc (eq_optab, HFmode, NULL);
2543 set_optab_libfunc (ne_optab, HFmode, NULL);
2544 set_optab_libfunc (lt_optab, HFmode, NULL);
2545 set_optab_libfunc (le_optab, HFmode, NULL);
2546 set_optab_libfunc (ge_optab, HFmode, NULL);
2547 set_optab_libfunc (gt_optab, HFmode, NULL);
2548 set_optab_libfunc (unord_optab, HFmode, NULL);
2549 break;
2551 default:
2552 break;
2555 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2557 const arm_fixed_mode_set fixed_arith_modes[] =
2559 { E_QQmode, "qq" },
2560 { E_UQQmode, "uqq" },
2561 { E_HQmode, "hq" },
2562 { E_UHQmode, "uhq" },
2563 { E_SQmode, "sq" },
2564 { E_USQmode, "usq" },
2565 { E_DQmode, "dq" },
2566 { E_UDQmode, "udq" },
2567 { E_TQmode, "tq" },
2568 { E_UTQmode, "utq" },
2569 { E_HAmode, "ha" },
2570 { E_UHAmode, "uha" },
2571 { E_SAmode, "sa" },
2572 { E_USAmode, "usa" },
2573 { E_DAmode, "da" },
2574 { E_UDAmode, "uda" },
2575 { E_TAmode, "ta" },
2576 { E_UTAmode, "uta" }
2578 const arm_fixed_mode_set fixed_conv_modes[] =
2580 { E_QQmode, "qq" },
2581 { E_UQQmode, "uqq" },
2582 { E_HQmode, "hq" },
2583 { E_UHQmode, "uhq" },
2584 { E_SQmode, "sq" },
2585 { E_USQmode, "usq" },
2586 { E_DQmode, "dq" },
2587 { E_UDQmode, "udq" },
2588 { E_TQmode, "tq" },
2589 { E_UTQmode, "utq" },
2590 { E_HAmode, "ha" },
2591 { E_UHAmode, "uha" },
2592 { E_SAmode, "sa" },
2593 { E_USAmode, "usa" },
2594 { E_DAmode, "da" },
2595 { E_UDAmode, "uda" },
2596 { E_TAmode, "ta" },
2597 { E_UTAmode, "uta" },
2598 { E_QImode, "qi" },
2599 { E_HImode, "hi" },
2600 { E_SImode, "si" },
2601 { E_DImode, "di" },
2602 { E_TImode, "ti" },
2603 { E_SFmode, "sf" },
2604 { E_DFmode, "df" }
2606 unsigned int i, j;
2608 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2610 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2611 "add", fixed_arith_modes[i].name, 3);
2612 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2613 "ssadd", fixed_arith_modes[i].name, 3);
2614 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2615 "usadd", fixed_arith_modes[i].name, 3);
2616 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2617 "sub", fixed_arith_modes[i].name, 3);
2618 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2619 "sssub", fixed_arith_modes[i].name, 3);
2620 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2621 "ussub", fixed_arith_modes[i].name, 3);
2622 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2623 "mul", fixed_arith_modes[i].name, 3);
2624 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2625 "ssmul", fixed_arith_modes[i].name, 3);
2626 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2627 "usmul", fixed_arith_modes[i].name, 3);
2628 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2629 "div", fixed_arith_modes[i].name, 3);
2630 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2631 "udiv", fixed_arith_modes[i].name, 3);
2632 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2633 "ssdiv", fixed_arith_modes[i].name, 3);
2634 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2635 "usdiv", fixed_arith_modes[i].name, 3);
2636 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2637 "neg", fixed_arith_modes[i].name, 2);
2638 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2639 "ssneg", fixed_arith_modes[i].name, 2);
2640 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2641 "usneg", fixed_arith_modes[i].name, 2);
2642 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2643 "ashl", fixed_arith_modes[i].name, 3);
2644 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2645 "ashr", fixed_arith_modes[i].name, 3);
2646 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2647 "lshr", fixed_arith_modes[i].name, 3);
2648 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2649 "ssashl", fixed_arith_modes[i].name, 3);
2650 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2651 "usashl", fixed_arith_modes[i].name, 3);
2652 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2653 "cmp", fixed_arith_modes[i].name, 2);
2656 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2657 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2659 if (i == j
2660 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2661 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2662 continue;
2664 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2665 fixed_conv_modes[j].mode, "fract",
2666 fixed_conv_modes[i].name,
2667 fixed_conv_modes[j].name);
2668 arm_set_fixed_conv_libfunc (satfract_optab,
2669 fixed_conv_modes[i].mode,
2670 fixed_conv_modes[j].mode, "satfract",
2671 fixed_conv_modes[i].name,
2672 fixed_conv_modes[j].name);
2673 arm_set_fixed_conv_libfunc (fractuns_optab,
2674 fixed_conv_modes[i].mode,
2675 fixed_conv_modes[j].mode, "fractuns",
2676 fixed_conv_modes[i].name,
2677 fixed_conv_modes[j].name);
2678 arm_set_fixed_conv_libfunc (satfractuns_optab,
2679 fixed_conv_modes[i].mode,
2680 fixed_conv_modes[j].mode, "satfractuns",
2681 fixed_conv_modes[i].name,
2682 fixed_conv_modes[j].name);
2686 if (TARGET_AAPCS_BASED)
2687 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2690 /* On AAPCS systems, this is the "struct __va_list". */
2691 static GTY(()) tree va_list_type;
2693 /* Return the type to use as __builtin_va_list. */
2694 static tree
2695 arm_build_builtin_va_list (void)
2697 tree va_list_name;
2698 tree ap_field;
2700 if (!TARGET_AAPCS_BASED)
2701 return std_build_builtin_va_list ();
2703 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2704 defined as:
2706 struct __va_list
2708 void *__ap;
2711 The C Library ABI further reinforces this definition in \S
2712 4.1.
2714 We must follow this definition exactly. The structure tag
2715 name is visible in C++ mangled names, and thus forms a part
2716 of the ABI. The field name may be used by people who
2717 #include <stdarg.h>. */
2718 /* Create the type. */
2719 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2720 /* Give it the required name. */
2721 va_list_name = build_decl (BUILTINS_LOCATION,
2722 TYPE_DECL,
2723 get_identifier ("__va_list"),
2724 va_list_type);
2725 DECL_ARTIFICIAL (va_list_name) = 1;
2726 TYPE_NAME (va_list_type) = va_list_name;
2727 TYPE_STUB_DECL (va_list_type) = va_list_name;
2728 /* Create the __ap field. */
2729 ap_field = build_decl (BUILTINS_LOCATION,
2730 FIELD_DECL,
2731 get_identifier ("__ap"),
2732 ptr_type_node);
2733 DECL_ARTIFICIAL (ap_field) = 1;
2734 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2735 TYPE_FIELDS (va_list_type) = ap_field;
2736 /* Compute its layout. */
2737 layout_type (va_list_type);
2739 return va_list_type;
2742 /* Return an expression of type "void *" pointing to the next
2743 available argument in a variable-argument list. VALIST is the
2744 user-level va_list object, of type __builtin_va_list. */
2745 static tree
2746 arm_extract_valist_ptr (tree valist)
2748 if (TREE_TYPE (valist) == error_mark_node)
2749 return error_mark_node;
2751 /* On an AAPCS target, the pointer is stored within "struct
2752 va_list". */
2753 if (TARGET_AAPCS_BASED)
2755 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2756 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2757 valist, ap_field, NULL_TREE);
2760 return valist;
2763 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2764 static void
2765 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2767 valist = arm_extract_valist_ptr (valist);
2768 std_expand_builtin_va_start (valist, nextarg);
2771 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2772 static tree
2773 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2774 gimple_seq *post_p)
2776 valist = arm_extract_valist_ptr (valist);
2777 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2780 /* Check any incompatible options that the user has specified. */
2781 static void
2782 arm_option_check_internal (struct gcc_options *opts)
2784 int flags = opts->x_target_flags;
2786 /* iWMMXt and NEON are incompatible. */
2787 if (TARGET_IWMMXT
2788 && bitmap_bit_p (arm_active_target.isa, isa_bit_neon))
2789 error ("iWMMXt and NEON are incompatible");
2791 /* Make sure that the processor choice does not conflict with any of the
2792 other command line choices. */
2793 if (TARGET_ARM_P (flags)
2794 && !bitmap_bit_p (arm_active_target.isa, isa_bit_notm))
2795 error ("target CPU does not support ARM mode");
2797 /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet. */
2798 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2799 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2801 if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2802 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2804 /* If this target is normally configured to use APCS frames, warn if they
2805 are turned off and debugging is turned on. */
2806 if (TARGET_ARM_P (flags)
2807 && write_symbols != NO_DEBUG
2808 && !TARGET_APCS_FRAME
2809 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2810 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2812 /* iWMMXt unsupported under Thumb mode. */
2813 if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2814 error ("iWMMXt unsupported under Thumb mode");
2816 if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
2817 error ("can not use -mtp=cp15 with 16-bit Thumb");
2819 if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
2821 error ("RTP PIC is incompatible with Thumb");
2822 flag_pic = 0;
2825 /* We only support -mpure-code and -mslow-flash-data on M-profile targets
2826 with MOVT. */
2827 if ((target_pure_code || target_slow_flash_data)
2828 && (!TARGET_HAVE_MOVT || arm_arch_notm || flag_pic || TARGET_NEON))
2830 const char *flag = (target_pure_code ? "-mpure-code" :
2831 "-mslow-flash-data");
2832 error ("%s only supports non-pic code on M-profile targets with the "
2833 "MOVT instruction", flag);
2838 /* Recompute the global settings depending on target attribute options. */
2840 static void
2841 arm_option_params_internal (void)
2843 /* If we are not using the default (ARM mode) section anchor offset
2844 ranges, then set the correct ranges now. */
2845 if (TARGET_THUMB1)
2847 /* Thumb-1 LDR instructions cannot have negative offsets.
2848 Permissible positive offset ranges are 5-bit (for byte loads),
2849 6-bit (for halfword loads), or 7-bit (for word loads).
2850 Empirical results suggest a 7-bit anchor range gives the best
2851 overall code size. */
2852 targetm.min_anchor_offset = 0;
2853 targetm.max_anchor_offset = 127;
2855 else if (TARGET_THUMB2)
2857 /* The minimum is set such that the total size of the block
2858 for a particular anchor is 248 + 1 + 4095 bytes, which is
2859 divisible by eight, ensuring natural spacing of anchors. */
2860 targetm.min_anchor_offset = -248;
2861 targetm.max_anchor_offset = 4095;
2863 else
2865 targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
2866 targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
2869 if (optimize_size)
2871 /* If optimizing for size, bump the number of instructions that we
2872 are prepared to conditionally execute (even on a StrongARM). */
2873 max_insns_skipped = 6;
2875 /* For THUMB2, we limit the conditional sequence to one IT block. */
2876 if (TARGET_THUMB2)
2877 max_insns_skipped = arm_restrict_it ? 1 : 4;
2879 else
2880 /* When -mrestrict-it is in use tone down the if-conversion. */
2881 max_insns_skipped = (TARGET_THUMB2 && arm_restrict_it)
2882 ? 1 : current_tune->max_insns_skipped;
2885 /* True if -mflip-thumb should next add an attribute for the default
2886 mode, false if it should next add an attribute for the opposite mode. */
2887 static GTY(()) bool thumb_flipper;
2889 /* Options after initial target override. */
2890 static GTY(()) tree init_optimize;
2892 static void
2893 arm_override_options_after_change_1 (struct gcc_options *opts)
2895 if (opts->x_align_functions <= 0)
2896 opts->x_align_functions = TARGET_THUMB_P (opts->x_target_flags)
2897 && opts->x_optimize_size ? 2 : 4;
2900 /* Implement targetm.override_options_after_change. */
2902 static void
2903 arm_override_options_after_change (void)
2905 arm_configure_build_target (&arm_active_target,
2906 TREE_TARGET_OPTION (target_option_default_node),
2907 &global_options_set, false);
2909 arm_override_options_after_change_1 (&global_options);
2912 /* Implement TARGET_OPTION_SAVE. */
2913 static void
2914 arm_option_save (struct cl_target_option *ptr, struct gcc_options *opts)
2916 ptr->x_arm_arch_string = opts->x_arm_arch_string;
2917 ptr->x_arm_cpu_string = opts->x_arm_cpu_string;
2918 ptr->x_arm_tune_string = opts->x_arm_tune_string;
2921 /* Implement TARGET_OPTION_RESTORE. */
2922 static void
2923 arm_option_restore (struct gcc_options *opts, struct cl_target_option *ptr)
2925 opts->x_arm_arch_string = ptr->x_arm_arch_string;
2926 opts->x_arm_cpu_string = ptr->x_arm_cpu_string;
2927 opts->x_arm_tune_string = ptr->x_arm_tune_string;
2928 arm_configure_build_target (&arm_active_target, ptr, &global_options_set,
2929 false);
2932 /* Reset options between modes that the user has specified. */
2933 static void
2934 arm_option_override_internal (struct gcc_options *opts,
2935 struct gcc_options *opts_set)
2937 arm_override_options_after_change_1 (opts);
2939 if (TARGET_INTERWORK && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
2941 /* The default is to enable interworking, so this warning message would
2942 be confusing to users who have just compiled with, eg, -march=armv3. */
2943 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
2944 opts->x_target_flags &= ~MASK_INTERWORK;
2947 if (TARGET_THUMB_P (opts->x_target_flags)
2948 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
2950 warning (0, "target CPU does not support THUMB instructions");
2951 opts->x_target_flags &= ~MASK_THUMB;
2954 if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
2956 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2957 opts->x_target_flags &= ~MASK_APCS_FRAME;
2960 /* Callee super interworking implies thumb interworking. Adding
2961 this to the flags here simplifies the logic elsewhere. */
2962 if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
2963 opts->x_target_flags |= MASK_INTERWORK;
2965 /* need to remember initial values so combinaisons of options like
2966 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
2967 cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
2969 if (! opts_set->x_arm_restrict_it)
2970 opts->x_arm_restrict_it = arm_arch8;
2972 /* ARM execution state and M profile don't have [restrict] IT. */
2973 if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
2974 opts->x_arm_restrict_it = 0;
2976 /* Enable -munaligned-access by default for
2977 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
2978 i.e. Thumb2 and ARM state only.
2979 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2980 - ARMv8 architecture-base processors.
2982 Disable -munaligned-access by default for
2983 - all pre-ARMv6 architecture-based processors
2984 - ARMv6-M architecture-based processors
2985 - ARMv8-M Baseline processors. */
2987 if (! opts_set->x_unaligned_access)
2989 opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
2990 && arm_arch6 && (arm_arch_notm || arm_arch7));
2992 else if (opts->x_unaligned_access == 1
2993 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
2995 warning (0, "target CPU does not support unaligned accesses");
2996 opts->x_unaligned_access = 0;
2999 /* Don't warn since it's on by default in -O2. */
3000 if (TARGET_THUMB1_P (opts->x_target_flags))
3001 opts->x_flag_schedule_insns = 0;
3002 else
3003 opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
3005 /* Disable shrink-wrap when optimizing function for size, since it tends to
3006 generate additional returns. */
3007 if (optimize_function_for_size_p (cfun)
3008 && TARGET_THUMB2_P (opts->x_target_flags))
3009 opts->x_flag_shrink_wrap = false;
3010 else
3011 opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
3013 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3014 - epilogue_insns - does not accurately model the corresponding insns
3015 emitted in the asm file. In particular, see the comment in thumb_exit
3016 'Find out how many of the (return) argument registers we can corrupt'.
3017 As a consequence, the epilogue may clobber registers without fipa-ra
3018 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3019 TODO: Accurately model clobbers for epilogue_insns and reenable
3020 fipa-ra. */
3021 if (TARGET_THUMB1_P (opts->x_target_flags))
3022 opts->x_flag_ipa_ra = 0;
3023 else
3024 opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3026 /* Thumb2 inline assembly code should always use unified syntax.
3027 This will apply to ARM and Thumb1 eventually. */
3028 opts->x_inline_asm_unified = TARGET_THUMB2_P (opts->x_target_flags);
3030 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3031 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3032 #endif
3035 static sbitmap isa_all_fpubits;
3036 static sbitmap isa_quirkbits;
3038 /* Configure a build target TARGET from the user-specified options OPTS and
3039 OPTS_SET. If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3040 architecture have been specified, but the two are not identical. */
3041 void
3042 arm_configure_build_target (struct arm_build_target *target,
3043 struct cl_target_option *opts,
3044 struct gcc_options *opts_set,
3045 bool warn_compatible)
3047 const cpu_option *arm_selected_tune = NULL;
3048 const arch_option *arm_selected_arch = NULL;
3049 const cpu_option *arm_selected_cpu = NULL;
3050 const arm_fpu_desc *arm_selected_fpu = NULL;
3051 const char *tune_opts = NULL;
3052 const char *arch_opts = NULL;
3053 const char *cpu_opts = NULL;
3055 bitmap_clear (target->isa);
3056 target->core_name = NULL;
3057 target->arch_name = NULL;
3059 if (opts_set->x_arm_arch_string)
3061 arm_selected_arch = arm_parse_arch_option_name (all_architectures,
3062 "-march",
3063 opts->x_arm_arch_string);
3064 arch_opts = strchr (opts->x_arm_arch_string, '+');
3067 if (opts_set->x_arm_cpu_string)
3069 arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "-mcpu",
3070 opts->x_arm_cpu_string);
3071 cpu_opts = strchr (opts->x_arm_cpu_string, '+');
3072 arm_selected_tune = arm_selected_cpu;
3073 /* If taking the tuning from -mcpu, we don't need to rescan the
3074 options for tuning. */
3077 if (opts_set->x_arm_tune_string)
3079 arm_selected_tune = arm_parse_cpu_option_name (all_cores, "-mtune",
3080 opts->x_arm_tune_string);
3081 tune_opts = strchr (opts->x_arm_tune_string, '+');
3084 if (arm_selected_arch)
3086 arm_initialize_isa (target->isa, arm_selected_arch->common.isa_bits);
3087 arm_parse_option_features (target->isa, &arm_selected_arch->common,
3088 arch_opts);
3090 if (arm_selected_cpu)
3092 auto_sbitmap cpu_isa (isa_num_bits);
3093 auto_sbitmap isa_delta (isa_num_bits);
3095 arm_initialize_isa (cpu_isa, arm_selected_cpu->common.isa_bits);
3096 arm_parse_option_features (cpu_isa, &arm_selected_cpu->common,
3097 cpu_opts);
3098 bitmap_xor (isa_delta, cpu_isa, target->isa);
3099 /* Ignore any bits that are quirk bits. */
3100 bitmap_and_compl (isa_delta, isa_delta, isa_quirkbits);
3101 /* Ignore (for now) any bits that might be set by -mfpu. */
3102 bitmap_and_compl (isa_delta, isa_delta, isa_all_fpubits);
3104 if (!bitmap_empty_p (isa_delta))
3106 if (warn_compatible)
3107 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
3108 arm_selected_cpu->common.name,
3109 arm_selected_arch->common.name);
3110 /* -march wins for code generation.
3111 -mcpu wins for default tuning. */
3112 if (!arm_selected_tune)
3113 arm_selected_tune = arm_selected_cpu;
3115 arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3116 target->arch_name = arm_selected_arch->common.name;
3118 else
3120 /* Architecture and CPU are essentially the same.
3121 Prefer the CPU setting. */
3122 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3123 target->core_name = arm_selected_cpu->common.name;
3124 /* Copy the CPU's capabilities, so that we inherit the
3125 appropriate extensions and quirks. */
3126 bitmap_copy (target->isa, cpu_isa);
3129 else
3131 /* Pick a CPU based on the architecture. */
3132 arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3133 target->arch_name = arm_selected_arch->common.name;
3134 /* Note: target->core_name is left unset in this path. */
3137 else if (arm_selected_cpu)
3139 target->core_name = arm_selected_cpu->common.name;
3140 arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3141 arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3142 cpu_opts);
3143 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3145 /* If the user did not specify a processor or architecture, choose
3146 one for them. */
3147 else
3149 const cpu_option *sel;
3150 auto_sbitmap sought_isa (isa_num_bits);
3151 bitmap_clear (sought_isa);
3152 auto_sbitmap default_isa (isa_num_bits);
3154 arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "default CPU",
3155 TARGET_CPU_DEFAULT);
3156 cpu_opts = strchr (TARGET_CPU_DEFAULT, '+');
3157 gcc_assert (arm_selected_cpu->common.name);
3159 /* RWE: All of the selection logic below (to the end of this
3160 'if' clause) looks somewhat suspect. It appears to be mostly
3161 there to support forcing thumb support when the default CPU
3162 does not have thumb (somewhat dubious in terms of what the
3163 user might be expecting). I think it should be removed once
3164 support for the pre-thumb era cores is removed. */
3165 sel = arm_selected_cpu;
3166 arm_initialize_isa (default_isa, sel->common.isa_bits);
3167 arm_parse_option_features (default_isa, &arm_selected_cpu->common,
3168 cpu_opts);
3170 /* Now check to see if the user has specified any command line
3171 switches that require certain abilities from the cpu. */
3173 if (TARGET_INTERWORK || TARGET_THUMB)
3175 bitmap_set_bit (sought_isa, isa_bit_thumb);
3176 bitmap_set_bit (sought_isa, isa_bit_mode32);
3178 /* There are no ARM processors that support both APCS-26 and
3179 interworking. Therefore we forcibly remove MODE26 from
3180 from the isa features here (if it was set), so that the
3181 search below will always be able to find a compatible
3182 processor. */
3183 bitmap_clear_bit (default_isa, isa_bit_mode26);
3186 /* If there are such requirements and the default CPU does not
3187 satisfy them, we need to run over the complete list of
3188 cores looking for one that is satisfactory. */
3189 if (!bitmap_empty_p (sought_isa)
3190 && !bitmap_subset_p (sought_isa, default_isa))
3192 auto_sbitmap candidate_isa (isa_num_bits);
3193 /* We're only interested in a CPU with at least the
3194 capabilities of the default CPU and the required
3195 additional features. */
3196 bitmap_ior (default_isa, default_isa, sought_isa);
3198 /* Try to locate a CPU type that supports all of the abilities
3199 of the default CPU, plus the extra abilities requested by
3200 the user. */
3201 for (sel = all_cores; sel->common.name != NULL; sel++)
3203 arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3204 /* An exact match? */
3205 if (bitmap_equal_p (default_isa, candidate_isa))
3206 break;
3209 if (sel->common.name == NULL)
3211 unsigned current_bit_count = isa_num_bits;
3212 const cpu_option *best_fit = NULL;
3214 /* Ideally we would like to issue an error message here
3215 saying that it was not possible to find a CPU compatible
3216 with the default CPU, but which also supports the command
3217 line options specified by the programmer, and so they
3218 ought to use the -mcpu=<name> command line option to
3219 override the default CPU type.
3221 If we cannot find a CPU that has exactly the
3222 characteristics of the default CPU and the given
3223 command line options we scan the array again looking
3224 for a best match. The best match must have at least
3225 the capabilities of the perfect match. */
3226 for (sel = all_cores; sel->common.name != NULL; sel++)
3228 arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3230 if (bitmap_subset_p (default_isa, candidate_isa))
3232 unsigned count;
3234 bitmap_and_compl (candidate_isa, candidate_isa,
3235 default_isa);
3236 count = bitmap_popcount (candidate_isa);
3238 if (count < current_bit_count)
3240 best_fit = sel;
3241 current_bit_count = count;
3245 gcc_assert (best_fit);
3246 sel = best_fit;
3249 arm_selected_cpu = sel;
3252 /* Now we know the CPU, we can finally initialize the target
3253 structure. */
3254 target->core_name = arm_selected_cpu->common.name;
3255 arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3256 arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3257 cpu_opts);
3258 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3261 gcc_assert (arm_selected_cpu);
3262 gcc_assert (arm_selected_arch);
3264 if (opts->x_arm_fpu_index != TARGET_FPU_auto)
3266 arm_selected_fpu = &all_fpus[opts->x_arm_fpu_index];
3267 auto_sbitmap fpu_bits (isa_num_bits);
3269 arm_initialize_isa (fpu_bits, arm_selected_fpu->isa_bits);
3270 bitmap_and_compl (target->isa, target->isa, isa_all_fpubits);
3271 bitmap_ior (target->isa, target->isa, fpu_bits);
3274 if (!arm_selected_tune)
3275 arm_selected_tune = arm_selected_cpu;
3276 else /* Validate the features passed to -mtune. */
3277 arm_parse_option_features (NULL, &arm_selected_tune->common, tune_opts);
3279 const cpu_tune *tune_data = &all_tunes[arm_selected_tune - all_cores];
3281 /* Finish initializing the target structure. */
3282 target->arch_pp_name = arm_selected_arch->arch;
3283 target->base_arch = arm_selected_arch->base_arch;
3284 target->profile = arm_selected_arch->profile;
3286 target->tune_flags = tune_data->tune_flags;
3287 target->tune = tune_data->tune;
3288 target->tune_core = tune_data->scheduler;
3291 /* Fix up any incompatible options that the user has specified. */
3292 static void
3293 arm_option_override (void)
3295 static const enum isa_feature fpu_bitlist[]
3296 = { ISA_ALL_FPU_INTERNAL, isa_nobit };
3297 static const enum isa_feature quirk_bitlist[] = { ISA_ALL_QUIRKS, isa_nobit};
3298 cl_target_option opts;
3300 isa_quirkbits = sbitmap_alloc (isa_num_bits);
3301 arm_initialize_isa (isa_quirkbits, quirk_bitlist);
3303 isa_all_fpubits = sbitmap_alloc (isa_num_bits);
3304 arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
3306 arm_active_target.isa = sbitmap_alloc (isa_num_bits);
3308 if (!global_options_set.x_arm_fpu_index)
3310 bool ok;
3311 int fpu_index;
3313 ok = opt_enum_arg_to_value (OPT_mfpu_, FPUTYPE_AUTO, &fpu_index,
3314 CL_TARGET);
3315 gcc_assert (ok);
3316 arm_fpu_index = (enum fpu_type) fpu_index;
3319 cl_target_option_save (&opts, &global_options);
3320 arm_configure_build_target (&arm_active_target, &opts, &global_options_set,
3321 true);
3323 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3324 SUBTARGET_OVERRIDE_OPTIONS;
3325 #endif
3327 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_active_target.arch_pp_name);
3328 arm_base_arch = arm_active_target.base_arch;
3330 arm_tune = arm_active_target.tune_core;
3331 tune_flags = arm_active_target.tune_flags;
3332 current_tune = arm_active_target.tune;
3334 /* TBD: Dwarf info for apcs frame is not handled yet. */
3335 if (TARGET_APCS_FRAME)
3336 flag_shrink_wrap = false;
3338 /* BPABI targets use linker tricks to allow interworking on cores
3339 without thumb support. */
3340 if (TARGET_INTERWORK
3341 && !TARGET_BPABI
3342 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3344 warning (0, "target CPU does not support interworking" );
3345 target_flags &= ~MASK_INTERWORK;
3348 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3350 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3351 target_flags |= MASK_APCS_FRAME;
3354 if (TARGET_POKE_FUNCTION_NAME)
3355 target_flags |= MASK_APCS_FRAME;
3357 if (TARGET_APCS_REENT && flag_pic)
3358 error ("-fpic and -mapcs-reent are incompatible");
3360 if (TARGET_APCS_REENT)
3361 warning (0, "APCS reentrant code not supported. Ignored");
3363 /* Initialize boolean versions of the architectural flags, for use
3364 in the arm.md file. */
3365 arm_arch3m = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv3m);
3366 arm_arch4 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv4);
3367 arm_arch4t = arm_arch4 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3368 arm_arch5 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv5);
3369 arm_arch5e = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv5e);
3370 arm_arch5te = arm_arch5e
3371 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3372 arm_arch6 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv6);
3373 arm_arch6k = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv6k);
3374 arm_arch_notm = bitmap_bit_p (arm_active_target.isa, isa_bit_notm);
3375 arm_arch6m = arm_arch6 && !arm_arch_notm;
3376 arm_arch7 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv7);
3377 arm_arch7em = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv7em);
3378 arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv8);
3379 arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv8_1);
3380 arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv8_2);
3381 arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3382 arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
3383 arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
3384 arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt);
3385 arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2);
3386 arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv);
3387 arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
3388 arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
3389 arm_arch_cmse = bitmap_bit_p (arm_active_target.isa, isa_bit_cmse);
3390 arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16);
3391 arm_arch_lpae = bitmap_bit_p (arm_active_target.isa, isa_bit_lpae);
3392 if (arm_fp16_inst)
3394 if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
3395 error ("selected fp16 options are incompatible");
3396 arm_fp16_format = ARM_FP16_FORMAT_IEEE;
3400 /* Set up some tuning parameters. */
3401 arm_ld_sched = (tune_flags & TF_LDSCHED) != 0;
3402 arm_tune_strongarm = (tune_flags & TF_STRONG) != 0;
3403 arm_tune_wbuf = (tune_flags & TF_WBUF) != 0;
3404 arm_tune_xscale = (tune_flags & TF_XSCALE) != 0;
3405 arm_tune_cortex_a9 = (arm_tune == TARGET_CPU_cortexa9) != 0;
3406 arm_m_profile_small_mul = (tune_flags & TF_SMALLMUL) != 0;
3408 /* And finally, set up some quirks. */
3409 arm_arch_no_volatile_ce
3410 = bitmap_bit_p (arm_active_target.isa, isa_quirk_no_volatile_ce);
3411 arm_arch6kz
3412 = arm_arch6k && bitmap_bit_p (arm_active_target.isa, isa_quirk_ARMv6kz);
3414 /* V5 code we generate is completely interworking capable, so we turn off
3415 TARGET_INTERWORK here to avoid many tests later on. */
3417 /* XXX However, we must pass the right pre-processor defines to CPP
3418 or GLD can get confused. This is a hack. */
3419 if (TARGET_INTERWORK)
3420 arm_cpp_interwork = 1;
3422 if (arm_arch5)
3423 target_flags &= ~MASK_INTERWORK;
3425 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3426 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3428 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3429 error ("iwmmxt abi requires an iwmmxt capable cpu");
3431 /* If soft-float is specified then don't use FPU. */
3432 if (TARGET_SOFT_FLOAT)
3433 arm_fpu_attr = FPU_NONE;
3434 else
3435 arm_fpu_attr = FPU_VFP;
3437 if (TARGET_AAPCS_BASED)
3439 if (TARGET_CALLER_INTERWORKING)
3440 error ("AAPCS does not support -mcaller-super-interworking");
3441 else
3442 if (TARGET_CALLEE_INTERWORKING)
3443 error ("AAPCS does not support -mcallee-super-interworking");
3446 /* __fp16 support currently assumes the core has ldrh. */
3447 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3448 sorry ("__fp16 and no ldrh");
3450 if (TARGET_AAPCS_BASED)
3452 if (arm_abi == ARM_ABI_IWMMXT)
3453 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3454 else if (TARGET_HARD_FLOAT_ABI)
3456 arm_pcs_default = ARM_PCS_AAPCS_VFP;
3457 if (!bitmap_bit_p (arm_active_target.isa, isa_bit_VFPv2))
3458 error ("-mfloat-abi=hard: selected processor lacks an FPU");
3460 else
3461 arm_pcs_default = ARM_PCS_AAPCS;
3463 else
3465 if (arm_float_abi == ARM_FLOAT_ABI_HARD)
3466 sorry ("-mfloat-abi=hard and VFP");
3468 if (arm_abi == ARM_ABI_APCS)
3469 arm_pcs_default = ARM_PCS_APCS;
3470 else
3471 arm_pcs_default = ARM_PCS_ATPCS;
3474 /* For arm2/3 there is no need to do any scheduling if we are doing
3475 software floating-point. */
3476 if (TARGET_SOFT_FLOAT && (tune_flags & TF_NO_MODE32))
3477 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3479 /* Use the cp15 method if it is available. */
3480 if (target_thread_pointer == TP_AUTO)
3482 if (arm_arch6k && !TARGET_THUMB1)
3483 target_thread_pointer = TP_CP15;
3484 else
3485 target_thread_pointer = TP_SOFT;
3488 /* Override the default structure alignment for AAPCS ABI. */
3489 if (!global_options_set.x_arm_structure_size_boundary)
3491 if (TARGET_AAPCS_BASED)
3492 arm_structure_size_boundary = 8;
3494 else
3496 warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3498 if (arm_structure_size_boundary != 8
3499 && arm_structure_size_boundary != 32
3500 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3502 if (ARM_DOUBLEWORD_ALIGN)
3503 warning (0,
3504 "structure size boundary can only be set to 8, 32 or 64");
3505 else
3506 warning (0, "structure size boundary can only be set to 8 or 32");
3507 arm_structure_size_boundary
3508 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3512 if (TARGET_VXWORKS_RTP)
3514 if (!global_options_set.x_arm_pic_data_is_text_relative)
3515 arm_pic_data_is_text_relative = 0;
3517 else if (flag_pic
3518 && !arm_pic_data_is_text_relative
3519 && !(global_options_set.x_target_flags & MASK_SINGLE_PIC_BASE))
3520 /* When text & data segments don't have a fixed displacement, the
3521 intended use is with a single, read only, pic base register.
3522 Unless the user explicitly requested not to do that, set
3523 it. */
3524 target_flags |= MASK_SINGLE_PIC_BASE;
3526 /* If stack checking is disabled, we can use r10 as the PIC register,
3527 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3528 if (flag_pic && TARGET_SINGLE_PIC_BASE)
3530 if (TARGET_VXWORKS_RTP)
3531 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3532 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3535 if (flag_pic && TARGET_VXWORKS_RTP)
3536 arm_pic_register = 9;
3538 if (arm_pic_register_string != NULL)
3540 int pic_register = decode_reg_name (arm_pic_register_string);
3542 if (!flag_pic)
3543 warning (0, "-mpic-register= is useless without -fpic");
3545 /* Prevent the user from choosing an obviously stupid PIC register. */
3546 else if (pic_register < 0 || call_used_regs[pic_register]
3547 || pic_register == HARD_FRAME_POINTER_REGNUM
3548 || pic_register == STACK_POINTER_REGNUM
3549 || pic_register >= PC_REGNUM
3550 || (TARGET_VXWORKS_RTP
3551 && (unsigned int) pic_register != arm_pic_register))
3552 error ("unable to use '%s' for PIC register", arm_pic_register_string);
3553 else
3554 arm_pic_register = pic_register;
3557 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3558 if (fix_cm3_ldrd == 2)
3560 if (bitmap_bit_p (arm_active_target.isa, isa_quirk_cm3_ldrd))
3561 fix_cm3_ldrd = 1;
3562 else
3563 fix_cm3_ldrd = 0;
3566 /* Hot/Cold partitioning is not currently supported, since we can't
3567 handle literal pool placement in that case. */
3568 if (flag_reorder_blocks_and_partition)
3570 inform (input_location,
3571 "-freorder-blocks-and-partition not supported on this architecture");
3572 flag_reorder_blocks_and_partition = 0;
3573 flag_reorder_blocks = 1;
3576 if (flag_pic)
3577 /* Hoisting PIC address calculations more aggressively provides a small,
3578 but measurable, size reduction for PIC code. Therefore, we decrease
3579 the bar for unrestricted expression hoisting to the cost of PIC address
3580 calculation, which is 2 instructions. */
3581 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3582 global_options.x_param_values,
3583 global_options_set.x_param_values);
3585 /* ARM EABI defaults to strict volatile bitfields. */
3586 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3587 && abi_version_at_least(2))
3588 flag_strict_volatile_bitfields = 1;
3590 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3591 have deemed it beneficial (signified by setting
3592 prefetch.num_slots to 1 or more). */
3593 if (flag_prefetch_loop_arrays < 0
3594 && HAVE_prefetch
3595 && optimize >= 3
3596 && current_tune->prefetch.num_slots > 0)
3597 flag_prefetch_loop_arrays = 1;
3599 /* Set up parameters to be used in prefetching algorithm. Do not
3600 override the defaults unless we are tuning for a core we have
3601 researched values for. */
3602 if (current_tune->prefetch.num_slots > 0)
3603 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3604 current_tune->prefetch.num_slots,
3605 global_options.x_param_values,
3606 global_options_set.x_param_values);
3607 if (current_tune->prefetch.l1_cache_line_size >= 0)
3608 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3609 current_tune->prefetch.l1_cache_line_size,
3610 global_options.x_param_values,
3611 global_options_set.x_param_values);
3612 if (current_tune->prefetch.l1_cache_size >= 0)
3613 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3614 current_tune->prefetch.l1_cache_size,
3615 global_options.x_param_values,
3616 global_options_set.x_param_values);
3618 /* Use Neon to perform 64-bits operations rather than core
3619 registers. */
3620 prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3621 if (use_neon_for_64bits == 1)
3622 prefer_neon_for_64bits = true;
3624 /* Use the alternative scheduling-pressure algorithm by default. */
3625 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3626 global_options.x_param_values,
3627 global_options_set.x_param_values);
3629 /* Look through ready list and all of queue for instructions
3630 relevant for L2 auto-prefetcher. */
3631 int param_sched_autopref_queue_depth;
3633 switch (current_tune->sched_autopref)
3635 case tune_params::SCHED_AUTOPREF_OFF:
3636 param_sched_autopref_queue_depth = -1;
3637 break;
3639 case tune_params::SCHED_AUTOPREF_RANK:
3640 param_sched_autopref_queue_depth = 0;
3641 break;
3643 case tune_params::SCHED_AUTOPREF_FULL:
3644 param_sched_autopref_queue_depth = max_insn_queue_index + 1;
3645 break;
3647 default:
3648 gcc_unreachable ();
3651 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
3652 param_sched_autopref_queue_depth,
3653 global_options.x_param_values,
3654 global_options_set.x_param_values);
3656 /* Currently, for slow flash data, we just disable literal pools. We also
3657 disable it for pure-code. */
3658 if (target_slow_flash_data || target_pure_code)
3659 arm_disable_literal_pool = true;
3661 if (use_cmse && !arm_arch_cmse)
3662 error ("target CPU does not support ARMv8-M Security Extensions");
3664 /* Disable scheduling fusion by default if it's not armv7 processor
3665 or doesn't prefer ldrd/strd. */
3666 if (flag_schedule_fusion == 2
3667 && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3668 flag_schedule_fusion = 0;
3670 /* Need to remember initial options before they are overriden. */
3671 init_optimize = build_optimization_node (&global_options);
3673 arm_option_override_internal (&global_options, &global_options_set);
3674 arm_option_check_internal (&global_options);
3675 arm_option_params_internal ();
3677 /* Create the default target_options structure. */
3678 target_option_default_node = target_option_current_node
3679 = build_target_option_node (&global_options);
3681 /* Register global variables with the garbage collector. */
3682 arm_add_gc_roots ();
3684 /* Init initial mode for testing. */
3685 thumb_flipper = TARGET_THUMB;
3688 static void
3689 arm_add_gc_roots (void)
3691 gcc_obstack_init(&minipool_obstack);
3692 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3695 /* A table of known ARM exception types.
3696 For use with the interrupt function attribute. */
3698 typedef struct
3700 const char *const arg;
3701 const unsigned long return_value;
3703 isr_attribute_arg;
3705 static const isr_attribute_arg isr_attribute_args [] =
3707 { "IRQ", ARM_FT_ISR },
3708 { "irq", ARM_FT_ISR },
3709 { "FIQ", ARM_FT_FIQ },
3710 { "fiq", ARM_FT_FIQ },
3711 { "ABORT", ARM_FT_ISR },
3712 { "abort", ARM_FT_ISR },
3713 { "ABORT", ARM_FT_ISR },
3714 { "abort", ARM_FT_ISR },
3715 { "UNDEF", ARM_FT_EXCEPTION },
3716 { "undef", ARM_FT_EXCEPTION },
3717 { "SWI", ARM_FT_EXCEPTION },
3718 { "swi", ARM_FT_EXCEPTION },
3719 { NULL, ARM_FT_NORMAL }
3722 /* Returns the (interrupt) function type of the current
3723 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3725 static unsigned long
3726 arm_isr_value (tree argument)
3728 const isr_attribute_arg * ptr;
3729 const char * arg;
3731 if (!arm_arch_notm)
3732 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3734 /* No argument - default to IRQ. */
3735 if (argument == NULL_TREE)
3736 return ARM_FT_ISR;
3738 /* Get the value of the argument. */
3739 if (TREE_VALUE (argument) == NULL_TREE
3740 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3741 return ARM_FT_UNKNOWN;
3743 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3745 /* Check it against the list of known arguments. */
3746 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3747 if (streq (arg, ptr->arg))
3748 return ptr->return_value;
3750 /* An unrecognized interrupt type. */
3751 return ARM_FT_UNKNOWN;
3754 /* Computes the type of the current function. */
3756 static unsigned long
3757 arm_compute_func_type (void)
3759 unsigned long type = ARM_FT_UNKNOWN;
3760 tree a;
3761 tree attr;
3763 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3765 /* Decide if the current function is volatile. Such functions
3766 never return, and many memory cycles can be saved by not storing
3767 register values that will never be needed again. This optimization
3768 was added to speed up context switching in a kernel application. */
3769 if (optimize > 0
3770 && (TREE_NOTHROW (current_function_decl)
3771 || !(flag_unwind_tables
3772 || (flag_exceptions
3773 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3774 && TREE_THIS_VOLATILE (current_function_decl))
3775 type |= ARM_FT_VOLATILE;
3777 if (cfun->static_chain_decl != NULL)
3778 type |= ARM_FT_NESTED;
3780 attr = DECL_ATTRIBUTES (current_function_decl);
3782 a = lookup_attribute ("naked", attr);
3783 if (a != NULL_TREE)
3784 type |= ARM_FT_NAKED;
3786 a = lookup_attribute ("isr", attr);
3787 if (a == NULL_TREE)
3788 a = lookup_attribute ("interrupt", attr);
3790 if (a == NULL_TREE)
3791 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3792 else
3793 type |= arm_isr_value (TREE_VALUE (a));
3795 if (lookup_attribute ("cmse_nonsecure_entry", attr))
3796 type |= ARM_FT_CMSE_ENTRY;
3798 return type;
3801 /* Returns the type of the current function. */
3803 unsigned long
3804 arm_current_func_type (void)
3806 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3807 cfun->machine->func_type = arm_compute_func_type ();
3809 return cfun->machine->func_type;
3812 bool
3813 arm_allocate_stack_slots_for_args (void)
3815 /* Naked functions should not allocate stack slots for arguments. */
3816 return !IS_NAKED (arm_current_func_type ());
3819 static bool
3820 arm_warn_func_return (tree decl)
3822 /* Naked functions are implemented entirely in assembly, including the
3823 return sequence, so suppress warnings about this. */
3824 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3828 /* Output assembler code for a block containing the constant parts
3829 of a trampoline, leaving space for the variable parts.
3831 On the ARM, (if r8 is the static chain regnum, and remembering that
3832 referencing pc adds an offset of 8) the trampoline looks like:
3833 ldr r8, [pc, #0]
3834 ldr pc, [pc]
3835 .word static chain value
3836 .word function's address
3837 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3839 static void
3840 arm_asm_trampoline_template (FILE *f)
3842 fprintf (f, "\t.syntax unified\n");
3844 if (TARGET_ARM)
3846 fprintf (f, "\t.arm\n");
3847 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3848 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3850 else if (TARGET_THUMB2)
3852 fprintf (f, "\t.thumb\n");
3853 /* The Thumb-2 trampoline is similar to the arm implementation.
3854 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3855 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3856 STATIC_CHAIN_REGNUM, PC_REGNUM);
3857 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3859 else
3861 ASM_OUTPUT_ALIGN (f, 2);
3862 fprintf (f, "\t.code\t16\n");
3863 fprintf (f, ".Ltrampoline_start:\n");
3864 asm_fprintf (f, "\tpush\t{r0, r1}\n");
3865 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3866 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3867 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3868 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3869 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3871 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3872 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3875 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3877 static void
3878 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3880 rtx fnaddr, mem, a_tramp;
3882 emit_block_move (m_tramp, assemble_trampoline_template (),
3883 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3885 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3886 emit_move_insn (mem, chain_value);
3888 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3889 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3890 emit_move_insn (mem, fnaddr);
3892 a_tramp = XEXP (m_tramp, 0);
3893 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3894 LCT_NORMAL, VOIDmode, a_tramp, Pmode,
3895 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3898 /* Thumb trampolines should be entered in thumb mode, so set
3899 the bottom bit of the address. */
3901 static rtx
3902 arm_trampoline_adjust_address (rtx addr)
3904 if (TARGET_THUMB)
3905 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3906 NULL, 0, OPTAB_LIB_WIDEN);
3907 return addr;
3910 /* Return 1 if it is possible to return using a single instruction.
3911 If SIBLING is non-null, this is a test for a return before a sibling
3912 call. SIBLING is the call insn, so we can examine its register usage. */
3915 use_return_insn (int iscond, rtx sibling)
3917 int regno;
3918 unsigned int func_type;
3919 unsigned long saved_int_regs;
3920 unsigned HOST_WIDE_INT stack_adjust;
3921 arm_stack_offsets *offsets;
3923 /* Never use a return instruction before reload has run. */
3924 if (!reload_completed)
3925 return 0;
3927 func_type = arm_current_func_type ();
3929 /* Naked, volatile and stack alignment functions need special
3930 consideration. */
3931 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3932 return 0;
3934 /* So do interrupt functions that use the frame pointer and Thumb
3935 interrupt functions. */
3936 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3937 return 0;
3939 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3940 && !optimize_function_for_size_p (cfun))
3941 return 0;
3943 offsets = arm_get_frame_offsets ();
3944 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3946 /* As do variadic functions. */
3947 if (crtl->args.pretend_args_size
3948 || cfun->machine->uses_anonymous_args
3949 /* Or if the function calls __builtin_eh_return () */
3950 || crtl->calls_eh_return
3951 /* Or if the function calls alloca */
3952 || cfun->calls_alloca
3953 /* Or if there is a stack adjustment. However, if the stack pointer
3954 is saved on the stack, we can use a pre-incrementing stack load. */
3955 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3956 && stack_adjust == 4))
3957 /* Or if the static chain register was saved above the frame, under the
3958 assumption that the stack pointer isn't saved on the stack. */
3959 || (!(TARGET_APCS_FRAME && frame_pointer_needed)
3960 && arm_compute_static_chain_stack_bytes() != 0))
3961 return 0;
3963 saved_int_regs = offsets->saved_regs_mask;
3965 /* Unfortunately, the insn
3967 ldmib sp, {..., sp, ...}
3969 triggers a bug on most SA-110 based devices, such that the stack
3970 pointer won't be correctly restored if the instruction takes a
3971 page fault. We work around this problem by popping r3 along with
3972 the other registers, since that is never slower than executing
3973 another instruction.
3975 We test for !arm_arch5 here, because code for any architecture
3976 less than this could potentially be run on one of the buggy
3977 chips. */
3978 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3980 /* Validate that r3 is a call-clobbered register (always true in
3981 the default abi) ... */
3982 if (!call_used_regs[3])
3983 return 0;
3985 /* ... that it isn't being used for a return value ... */
3986 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
3987 return 0;
3989 /* ... or for a tail-call argument ... */
3990 if (sibling)
3992 gcc_assert (CALL_P (sibling));
3994 if (find_regno_fusage (sibling, USE, 3))
3995 return 0;
3998 /* ... and that there are no call-saved registers in r0-r2
3999 (always true in the default ABI). */
4000 if (saved_int_regs & 0x7)
4001 return 0;
4004 /* Can't be done if interworking with Thumb, and any registers have been
4005 stacked. */
4006 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
4007 return 0;
4009 /* On StrongARM, conditional returns are expensive if they aren't
4010 taken and multiple registers have been stacked. */
4011 if (iscond && arm_tune_strongarm)
4013 /* Conditional return when just the LR is stored is a simple
4014 conditional-load instruction, that's not expensive. */
4015 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
4016 return 0;
4018 if (flag_pic
4019 && arm_pic_register != INVALID_REGNUM
4020 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
4021 return 0;
4024 /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4025 several instructions if anything needs to be popped. */
4026 if (saved_int_regs && IS_CMSE_ENTRY (func_type))
4027 return 0;
4029 /* If there are saved registers but the LR isn't saved, then we need
4030 two instructions for the return. */
4031 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
4032 return 0;
4034 /* Can't be done if any of the VFP regs are pushed,
4035 since this also requires an insn. */
4036 if (TARGET_HARD_FLOAT)
4037 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
4038 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
4039 return 0;
4041 if (TARGET_REALLY_IWMMXT)
4042 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
4043 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
4044 return 0;
4046 return 1;
4049 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4050 shrink-wrapping if possible. This is the case if we need to emit a
4051 prologue, which we can test by looking at the offsets. */
4052 bool
4053 use_simple_return_p (void)
4055 arm_stack_offsets *offsets;
4057 /* Note this function can be called before or after reload. */
4058 if (!reload_completed)
4059 arm_compute_frame_layout ();
4061 offsets = arm_get_frame_offsets ();
4062 return offsets->outgoing_args != 0;
4065 /* Return TRUE if int I is a valid immediate ARM constant. */
4068 const_ok_for_arm (HOST_WIDE_INT i)
4070 int lowbit;
4072 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4073 be all zero, or all one. */
4074 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
4075 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
4076 != ((~(unsigned HOST_WIDE_INT) 0)
4077 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
4078 return FALSE;
4080 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
4082 /* Fast return for 0 and small values. We must do this for zero, since
4083 the code below can't handle that one case. */
4084 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
4085 return TRUE;
4087 /* Get the number of trailing zeros. */
4088 lowbit = ffs((int) i) - 1;
4090 /* Only even shifts are allowed in ARM mode so round down to the
4091 nearest even number. */
4092 if (TARGET_ARM)
4093 lowbit &= ~1;
4095 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
4096 return TRUE;
4098 if (TARGET_ARM)
4100 /* Allow rotated constants in ARM mode. */
4101 if (lowbit <= 4
4102 && ((i & ~0xc000003f) == 0
4103 || (i & ~0xf000000f) == 0
4104 || (i & ~0xfc000003) == 0))
4105 return TRUE;
4107 else if (TARGET_THUMB2)
4109 HOST_WIDE_INT v;
4111 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
4112 v = i & 0xff;
4113 v |= v << 16;
4114 if (i == v || i == (v | (v << 8)))
4115 return TRUE;
4117 /* Allow repeated pattern 0xXY00XY00. */
4118 v = i & 0xff00;
4119 v |= v << 16;
4120 if (i == v)
4121 return TRUE;
4123 else if (TARGET_HAVE_MOVT)
4125 /* Thumb-1 Targets with MOVT. */
4126 if (i > 0xffff)
4127 return FALSE;
4128 else
4129 return TRUE;
4132 return FALSE;
4135 /* Return true if I is a valid constant for the operation CODE. */
4137 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
4139 if (const_ok_for_arm (i))
4140 return 1;
4142 switch (code)
4144 case SET:
4145 /* See if we can use movw. */
4146 if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
4147 return 1;
4148 else
4149 /* Otherwise, try mvn. */
4150 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4152 case PLUS:
4153 /* See if we can use addw or subw. */
4154 if (TARGET_THUMB2
4155 && ((i & 0xfffff000) == 0
4156 || ((-i) & 0xfffff000) == 0))
4157 return 1;
4158 /* Fall through. */
4159 case COMPARE:
4160 case EQ:
4161 case NE:
4162 case GT:
4163 case LE:
4164 case LT:
4165 case GE:
4166 case GEU:
4167 case LTU:
4168 case GTU:
4169 case LEU:
4170 case UNORDERED:
4171 case ORDERED:
4172 case UNEQ:
4173 case UNGE:
4174 case UNLT:
4175 case UNGT:
4176 case UNLE:
4177 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
4179 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
4180 case XOR:
4181 return 0;
4183 case IOR:
4184 if (TARGET_THUMB2)
4185 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4186 return 0;
4188 case AND:
4189 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4191 default:
4192 gcc_unreachable ();
4196 /* Return true if I is a valid di mode constant for the operation CODE. */
4198 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4200 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4201 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4202 rtx hi = GEN_INT (hi_val);
4203 rtx lo = GEN_INT (lo_val);
4205 if (TARGET_THUMB1)
4206 return 0;
4208 switch (code)
4210 case AND:
4211 case IOR:
4212 case XOR:
4213 return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
4214 && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
4215 case PLUS:
4216 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4218 default:
4219 return 0;
4223 /* Emit a sequence of insns to handle a large constant.
4224 CODE is the code of the operation required, it can be any of SET, PLUS,
4225 IOR, AND, XOR, MINUS;
4226 MODE is the mode in which the operation is being performed;
4227 VAL is the integer to operate on;
4228 SOURCE is the other operand (a register, or a null-pointer for SET);
4229 SUBTARGETS means it is safe to create scratch registers if that will
4230 either produce a simpler sequence, or we will want to cse the values.
4231 Return value is the number of insns emitted. */
4233 /* ??? Tweak this for thumb2. */
4235 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4236 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4238 rtx cond;
4240 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4241 cond = COND_EXEC_TEST (PATTERN (insn));
4242 else
4243 cond = NULL_RTX;
4245 if (subtargets || code == SET
4246 || (REG_P (target) && REG_P (source)
4247 && REGNO (target) != REGNO (source)))
4249 /* After arm_reorg has been called, we can't fix up expensive
4250 constants by pushing them into memory so we must synthesize
4251 them in-line, regardless of the cost. This is only likely to
4252 be more costly on chips that have load delay slots and we are
4253 compiling without running the scheduler (so no splitting
4254 occurred before the final instruction emission).
4256 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4258 if (!cfun->machine->after_arm_reorg
4259 && !cond
4260 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4261 1, 0)
4262 > (arm_constant_limit (optimize_function_for_size_p (cfun))
4263 + (code != SET))))
4265 if (code == SET)
4267 /* Currently SET is the only monadic value for CODE, all
4268 the rest are diadic. */
4269 if (TARGET_USE_MOVT)
4270 arm_emit_movpair (target, GEN_INT (val));
4271 else
4272 emit_set_insn (target, GEN_INT (val));
4274 return 1;
4276 else
4278 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4280 if (TARGET_USE_MOVT)
4281 arm_emit_movpair (temp, GEN_INT (val));
4282 else
4283 emit_set_insn (temp, GEN_INT (val));
4285 /* For MINUS, the value is subtracted from, since we never
4286 have subtraction of a constant. */
4287 if (code == MINUS)
4288 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4289 else
4290 emit_set_insn (target,
4291 gen_rtx_fmt_ee (code, mode, source, temp));
4292 return 2;
4297 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4301 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4302 ARM/THUMB2 immediates, and add up to VAL.
4303 Thr function return value gives the number of insns required. */
4304 static int
4305 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4306 struct four_ints *return_sequence)
4308 int best_consecutive_zeros = 0;
4309 int i;
4310 int best_start = 0;
4311 int insns1, insns2;
4312 struct four_ints tmp_sequence;
4314 /* If we aren't targeting ARM, the best place to start is always at
4315 the bottom, otherwise look more closely. */
4316 if (TARGET_ARM)
4318 for (i = 0; i < 32; i += 2)
4320 int consecutive_zeros = 0;
4322 if (!(val & (3 << i)))
4324 while ((i < 32) && !(val & (3 << i)))
4326 consecutive_zeros += 2;
4327 i += 2;
4329 if (consecutive_zeros > best_consecutive_zeros)
4331 best_consecutive_zeros = consecutive_zeros;
4332 best_start = i - consecutive_zeros;
4334 i -= 2;
4339 /* So long as it won't require any more insns to do so, it's
4340 desirable to emit a small constant (in bits 0...9) in the last
4341 insn. This way there is more chance that it can be combined with
4342 a later addressing insn to form a pre-indexed load or store
4343 operation. Consider:
4345 *((volatile int *)0xe0000100) = 1;
4346 *((volatile int *)0xe0000110) = 2;
4348 We want this to wind up as:
4350 mov rA, #0xe0000000
4351 mov rB, #1
4352 str rB, [rA, #0x100]
4353 mov rB, #2
4354 str rB, [rA, #0x110]
4356 rather than having to synthesize both large constants from scratch.
4358 Therefore, we calculate how many insns would be required to emit
4359 the constant starting from `best_start', and also starting from
4360 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4361 yield a shorter sequence, we may as well use zero. */
4362 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4363 if (best_start != 0
4364 && ((HOST_WIDE_INT_1U << best_start) < val))
4366 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4367 if (insns2 <= insns1)
4369 *return_sequence = tmp_sequence;
4370 insns1 = insns2;
4374 return insns1;
4377 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4378 static int
4379 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4380 struct four_ints *return_sequence, int i)
4382 int remainder = val & 0xffffffff;
4383 int insns = 0;
4385 /* Try and find a way of doing the job in either two or three
4386 instructions.
4388 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4389 location. We start at position I. This may be the MSB, or
4390 optimial_immediate_sequence may have positioned it at the largest block
4391 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4392 wrapping around to the top of the word when we drop off the bottom.
4393 In the worst case this code should produce no more than four insns.
4395 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4396 constants, shifted to any arbitrary location. We should always start
4397 at the MSB. */
4400 int end;
4401 unsigned int b1, b2, b3, b4;
4402 unsigned HOST_WIDE_INT result;
4403 int loc;
4405 gcc_assert (insns < 4);
4407 if (i <= 0)
4408 i += 32;
4410 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4411 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4413 loc = i;
4414 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4415 /* We can use addw/subw for the last 12 bits. */
4416 result = remainder;
4417 else
4419 /* Use an 8-bit shifted/rotated immediate. */
4420 end = i - 8;
4421 if (end < 0)
4422 end += 32;
4423 result = remainder & ((0x0ff << end)
4424 | ((i < end) ? (0xff >> (32 - end))
4425 : 0));
4426 i -= 8;
4429 else
4431 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4432 arbitrary shifts. */
4433 i -= TARGET_ARM ? 2 : 1;
4434 continue;
4437 /* Next, see if we can do a better job with a thumb2 replicated
4438 constant.
4440 We do it this way around to catch the cases like 0x01F001E0 where
4441 two 8-bit immediates would work, but a replicated constant would
4442 make it worse.
4444 TODO: 16-bit constants that don't clear all the bits, but still win.
4445 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4446 if (TARGET_THUMB2)
4448 b1 = (remainder & 0xff000000) >> 24;
4449 b2 = (remainder & 0x00ff0000) >> 16;
4450 b3 = (remainder & 0x0000ff00) >> 8;
4451 b4 = remainder & 0xff;
4453 if (loc > 24)
4455 /* The 8-bit immediate already found clears b1 (and maybe b2),
4456 but must leave b3 and b4 alone. */
4458 /* First try to find a 32-bit replicated constant that clears
4459 almost everything. We can assume that we can't do it in one,
4460 or else we wouldn't be here. */
4461 unsigned int tmp = b1 & b2 & b3 & b4;
4462 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4463 + (tmp << 24);
4464 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4465 + (tmp == b3) + (tmp == b4);
4466 if (tmp
4467 && (matching_bytes >= 3
4468 || (matching_bytes == 2
4469 && const_ok_for_op (remainder & ~tmp2, code))))
4471 /* At least 3 of the bytes match, and the fourth has at
4472 least as many bits set, or two of the bytes match
4473 and it will only require one more insn to finish. */
4474 result = tmp2;
4475 i = tmp != b1 ? 32
4476 : tmp != b2 ? 24
4477 : tmp != b3 ? 16
4478 : 8;
4481 /* Second, try to find a 16-bit replicated constant that can
4482 leave three of the bytes clear. If b2 or b4 is already
4483 zero, then we can. If the 8-bit from above would not
4484 clear b2 anyway, then we still win. */
4485 else if (b1 == b3 && (!b2 || !b4
4486 || (remainder & 0x00ff0000 & ~result)))
4488 result = remainder & 0xff00ff00;
4489 i = 24;
4492 else if (loc > 16)
4494 /* The 8-bit immediate already found clears b2 (and maybe b3)
4495 and we don't get here unless b1 is alredy clear, but it will
4496 leave b4 unchanged. */
4498 /* If we can clear b2 and b4 at once, then we win, since the
4499 8-bits couldn't possibly reach that far. */
4500 if (b2 == b4)
4502 result = remainder & 0x00ff00ff;
4503 i = 16;
4508 return_sequence->i[insns++] = result;
4509 remainder &= ~result;
4511 if (code == SET || code == MINUS)
4512 code = PLUS;
4514 while (remainder);
4516 return insns;
4519 /* Emit an instruction with the indicated PATTERN. If COND is
4520 non-NULL, conditionalize the execution of the instruction on COND
4521 being true. */
4523 static void
4524 emit_constant_insn (rtx cond, rtx pattern)
4526 if (cond)
4527 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4528 emit_insn (pattern);
4531 /* As above, but extra parameter GENERATE which, if clear, suppresses
4532 RTL generation. */
4534 static int
4535 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4536 unsigned HOST_WIDE_INT val, rtx target, rtx source,
4537 int subtargets, int generate)
4539 int can_invert = 0;
4540 int can_negate = 0;
4541 int final_invert = 0;
4542 int i;
4543 int set_sign_bit_copies = 0;
4544 int clear_sign_bit_copies = 0;
4545 int clear_zero_bit_copies = 0;
4546 int set_zero_bit_copies = 0;
4547 int insns = 0, neg_insns, inv_insns;
4548 unsigned HOST_WIDE_INT temp1, temp2;
4549 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4550 struct four_ints *immediates;
4551 struct four_ints pos_immediates, neg_immediates, inv_immediates;
4553 /* Find out which operations are safe for a given CODE. Also do a quick
4554 check for degenerate cases; these can occur when DImode operations
4555 are split. */
4556 switch (code)
4558 case SET:
4559 can_invert = 1;
4560 break;
4562 case PLUS:
4563 can_negate = 1;
4564 break;
4566 case IOR:
4567 if (remainder == 0xffffffff)
4569 if (generate)
4570 emit_constant_insn (cond,
4571 gen_rtx_SET (target,
4572 GEN_INT (ARM_SIGN_EXTEND (val))));
4573 return 1;
4576 if (remainder == 0)
4578 if (reload_completed && rtx_equal_p (target, source))
4579 return 0;
4581 if (generate)
4582 emit_constant_insn (cond, gen_rtx_SET (target, source));
4583 return 1;
4585 break;
4587 case AND:
4588 if (remainder == 0)
4590 if (generate)
4591 emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
4592 return 1;
4594 if (remainder == 0xffffffff)
4596 if (reload_completed && rtx_equal_p (target, source))
4597 return 0;
4598 if (generate)
4599 emit_constant_insn (cond, gen_rtx_SET (target, source));
4600 return 1;
4602 can_invert = 1;
4603 break;
4605 case XOR:
4606 if (remainder == 0)
4608 if (reload_completed && rtx_equal_p (target, source))
4609 return 0;
4610 if (generate)
4611 emit_constant_insn (cond, gen_rtx_SET (target, source));
4612 return 1;
4615 if (remainder == 0xffffffff)
4617 if (generate)
4618 emit_constant_insn (cond,
4619 gen_rtx_SET (target,
4620 gen_rtx_NOT (mode, source)));
4621 return 1;
4623 final_invert = 1;
4624 break;
4626 case MINUS:
4627 /* We treat MINUS as (val - source), since (source - val) is always
4628 passed as (source + (-val)). */
4629 if (remainder == 0)
4631 if (generate)
4632 emit_constant_insn (cond,
4633 gen_rtx_SET (target,
4634 gen_rtx_NEG (mode, source)));
4635 return 1;
4637 if (const_ok_for_arm (val))
4639 if (generate)
4640 emit_constant_insn (cond,
4641 gen_rtx_SET (target,
4642 gen_rtx_MINUS (mode, GEN_INT (val),
4643 source)));
4644 return 1;
4647 break;
4649 default:
4650 gcc_unreachable ();
4653 /* If we can do it in one insn get out quickly. */
4654 if (const_ok_for_op (val, code))
4656 if (generate)
4657 emit_constant_insn (cond,
4658 gen_rtx_SET (target,
4659 (source
4660 ? gen_rtx_fmt_ee (code, mode, source,
4661 GEN_INT (val))
4662 : GEN_INT (val))));
4663 return 1;
4666 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4667 insn. */
4668 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4669 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4671 if (generate)
4673 if (mode == SImode && i == 16)
4674 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4675 smaller insn. */
4676 emit_constant_insn (cond,
4677 gen_zero_extendhisi2
4678 (target, gen_lowpart (HImode, source)));
4679 else
4680 /* Extz only supports SImode, but we can coerce the operands
4681 into that mode. */
4682 emit_constant_insn (cond,
4683 gen_extzv_t2 (gen_lowpart (SImode, target),
4684 gen_lowpart (SImode, source),
4685 GEN_INT (i), const0_rtx));
4688 return 1;
4691 /* Calculate a few attributes that may be useful for specific
4692 optimizations. */
4693 /* Count number of leading zeros. */
4694 for (i = 31; i >= 0; i--)
4696 if ((remainder & (1 << i)) == 0)
4697 clear_sign_bit_copies++;
4698 else
4699 break;
4702 /* Count number of leading 1's. */
4703 for (i = 31; i >= 0; i--)
4705 if ((remainder & (1 << i)) != 0)
4706 set_sign_bit_copies++;
4707 else
4708 break;
4711 /* Count number of trailing zero's. */
4712 for (i = 0; i <= 31; i++)
4714 if ((remainder & (1 << i)) == 0)
4715 clear_zero_bit_copies++;
4716 else
4717 break;
4720 /* Count number of trailing 1's. */
4721 for (i = 0; i <= 31; i++)
4723 if ((remainder & (1 << i)) != 0)
4724 set_zero_bit_copies++;
4725 else
4726 break;
4729 switch (code)
4731 case SET:
4732 /* See if we can do this by sign_extending a constant that is known
4733 to be negative. This is a good, way of doing it, since the shift
4734 may well merge into a subsequent insn. */
4735 if (set_sign_bit_copies > 1)
4737 if (const_ok_for_arm
4738 (temp1 = ARM_SIGN_EXTEND (remainder
4739 << (set_sign_bit_copies - 1))))
4741 if (generate)
4743 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4744 emit_constant_insn (cond,
4745 gen_rtx_SET (new_src, GEN_INT (temp1)));
4746 emit_constant_insn (cond,
4747 gen_ashrsi3 (target, new_src,
4748 GEN_INT (set_sign_bit_copies - 1)));
4750 return 2;
4752 /* For an inverted constant, we will need to set the low bits,
4753 these will be shifted out of harm's way. */
4754 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4755 if (const_ok_for_arm (~temp1))
4757 if (generate)
4759 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4760 emit_constant_insn (cond,
4761 gen_rtx_SET (new_src, GEN_INT (temp1)));
4762 emit_constant_insn (cond,
4763 gen_ashrsi3 (target, new_src,
4764 GEN_INT (set_sign_bit_copies - 1)));
4766 return 2;
4770 /* See if we can calculate the value as the difference between two
4771 valid immediates. */
4772 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4774 int topshift = clear_sign_bit_copies & ~1;
4776 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4777 & (0xff000000 >> topshift));
4779 /* If temp1 is zero, then that means the 9 most significant
4780 bits of remainder were 1 and we've caused it to overflow.
4781 When topshift is 0 we don't need to do anything since we
4782 can borrow from 'bit 32'. */
4783 if (temp1 == 0 && topshift != 0)
4784 temp1 = 0x80000000 >> (topshift - 1);
4786 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4788 if (const_ok_for_arm (temp2))
4790 if (generate)
4792 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4793 emit_constant_insn (cond,
4794 gen_rtx_SET (new_src, GEN_INT (temp1)));
4795 emit_constant_insn (cond,
4796 gen_addsi3 (target, new_src,
4797 GEN_INT (-temp2)));
4800 return 2;
4804 /* See if we can generate this by setting the bottom (or the top)
4805 16 bits, and then shifting these into the other half of the
4806 word. We only look for the simplest cases, to do more would cost
4807 too much. Be careful, however, not to generate this when the
4808 alternative would take fewer insns. */
4809 if (val & 0xffff0000)
4811 temp1 = remainder & 0xffff0000;
4812 temp2 = remainder & 0x0000ffff;
4814 /* Overlaps outside this range are best done using other methods. */
4815 for (i = 9; i < 24; i++)
4817 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4818 && !const_ok_for_arm (temp2))
4820 rtx new_src = (subtargets
4821 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4822 : target);
4823 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4824 source, subtargets, generate);
4825 source = new_src;
4826 if (generate)
4827 emit_constant_insn
4828 (cond,
4829 gen_rtx_SET
4830 (target,
4831 gen_rtx_IOR (mode,
4832 gen_rtx_ASHIFT (mode, source,
4833 GEN_INT (i)),
4834 source)));
4835 return insns + 1;
4839 /* Don't duplicate cases already considered. */
4840 for (i = 17; i < 24; i++)
4842 if (((temp1 | (temp1 >> i)) == remainder)
4843 && !const_ok_for_arm (temp1))
4845 rtx new_src = (subtargets
4846 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4847 : target);
4848 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4849 source, subtargets, generate);
4850 source = new_src;
4851 if (generate)
4852 emit_constant_insn
4853 (cond,
4854 gen_rtx_SET (target,
4855 gen_rtx_IOR
4856 (mode,
4857 gen_rtx_LSHIFTRT (mode, source,
4858 GEN_INT (i)),
4859 source)));
4860 return insns + 1;
4864 break;
4866 case IOR:
4867 case XOR:
4868 /* If we have IOR or XOR, and the constant can be loaded in a
4869 single instruction, and we can find a temporary to put it in,
4870 then this can be done in two instructions instead of 3-4. */
4871 if (subtargets
4872 /* TARGET can't be NULL if SUBTARGETS is 0 */
4873 || (reload_completed && !reg_mentioned_p (target, source)))
4875 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4877 if (generate)
4879 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4881 emit_constant_insn (cond,
4882 gen_rtx_SET (sub, GEN_INT (val)));
4883 emit_constant_insn (cond,
4884 gen_rtx_SET (target,
4885 gen_rtx_fmt_ee (code, mode,
4886 source, sub)));
4888 return 2;
4892 if (code == XOR)
4893 break;
4895 /* Convert.
4896 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4897 and the remainder 0s for e.g. 0xfff00000)
4898 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4900 This can be done in 2 instructions by using shifts with mov or mvn.
4901 e.g. for
4902 x = x | 0xfff00000;
4903 we generate.
4904 mvn r0, r0, asl #12
4905 mvn r0, r0, lsr #12 */
4906 if (set_sign_bit_copies > 8
4907 && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
4909 if (generate)
4911 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4912 rtx shift = GEN_INT (set_sign_bit_copies);
4914 emit_constant_insn
4915 (cond,
4916 gen_rtx_SET (sub,
4917 gen_rtx_NOT (mode,
4918 gen_rtx_ASHIFT (mode,
4919 source,
4920 shift))));
4921 emit_constant_insn
4922 (cond,
4923 gen_rtx_SET (target,
4924 gen_rtx_NOT (mode,
4925 gen_rtx_LSHIFTRT (mode, sub,
4926 shift))));
4928 return 2;
4931 /* Convert
4932 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4934 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4936 For eg. r0 = r0 | 0xfff
4937 mvn r0, r0, lsr #12
4938 mvn r0, r0, asl #12
4941 if (set_zero_bit_copies > 8
4942 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4944 if (generate)
4946 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4947 rtx shift = GEN_INT (set_zero_bit_copies);
4949 emit_constant_insn
4950 (cond,
4951 gen_rtx_SET (sub,
4952 gen_rtx_NOT (mode,
4953 gen_rtx_LSHIFTRT (mode,
4954 source,
4955 shift))));
4956 emit_constant_insn
4957 (cond,
4958 gen_rtx_SET (target,
4959 gen_rtx_NOT (mode,
4960 gen_rtx_ASHIFT (mode, sub,
4961 shift))));
4963 return 2;
4966 /* This will never be reached for Thumb2 because orn is a valid
4967 instruction. This is for Thumb1 and the ARM 32 bit cases.
4969 x = y | constant (such that ~constant is a valid constant)
4970 Transform this to
4971 x = ~(~y & ~constant).
4973 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4975 if (generate)
4977 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4978 emit_constant_insn (cond,
4979 gen_rtx_SET (sub,
4980 gen_rtx_NOT (mode, source)));
4981 source = sub;
4982 if (subtargets)
4983 sub = gen_reg_rtx (mode);
4984 emit_constant_insn (cond,
4985 gen_rtx_SET (sub,
4986 gen_rtx_AND (mode, source,
4987 GEN_INT (temp1))));
4988 emit_constant_insn (cond,
4989 gen_rtx_SET (target,
4990 gen_rtx_NOT (mode, sub)));
4992 return 3;
4994 break;
4996 case AND:
4997 /* See if two shifts will do 2 or more insn's worth of work. */
4998 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
5000 HOST_WIDE_INT shift_mask = ((0xffffffff
5001 << (32 - clear_sign_bit_copies))
5002 & 0xffffffff);
5004 if ((remainder | shift_mask) != 0xffffffff)
5006 HOST_WIDE_INT new_val
5007 = ARM_SIGN_EXTEND (remainder | shift_mask);
5009 if (generate)
5011 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5012 insns = arm_gen_constant (AND, SImode, cond, new_val,
5013 new_src, source, subtargets, 1);
5014 source = new_src;
5016 else
5018 rtx targ = subtargets ? NULL_RTX : target;
5019 insns = arm_gen_constant (AND, mode, cond, new_val,
5020 targ, source, subtargets, 0);
5024 if (generate)
5026 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5027 rtx shift = GEN_INT (clear_sign_bit_copies);
5029 emit_insn (gen_ashlsi3 (new_src, source, shift));
5030 emit_insn (gen_lshrsi3 (target, new_src, shift));
5033 return insns + 2;
5036 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
5038 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
5040 if ((remainder | shift_mask) != 0xffffffff)
5042 HOST_WIDE_INT new_val
5043 = ARM_SIGN_EXTEND (remainder | shift_mask);
5044 if (generate)
5046 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5048 insns = arm_gen_constant (AND, mode, cond, new_val,
5049 new_src, source, subtargets, 1);
5050 source = new_src;
5052 else
5054 rtx targ = subtargets ? NULL_RTX : target;
5056 insns = arm_gen_constant (AND, mode, cond, new_val,
5057 targ, source, subtargets, 0);
5061 if (generate)
5063 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5064 rtx shift = GEN_INT (clear_zero_bit_copies);
5066 emit_insn (gen_lshrsi3 (new_src, source, shift));
5067 emit_insn (gen_ashlsi3 (target, new_src, shift));
5070 return insns + 2;
5073 break;
5075 default:
5076 break;
5079 /* Calculate what the instruction sequences would be if we generated it
5080 normally, negated, or inverted. */
5081 if (code == AND)
5082 /* AND cannot be split into multiple insns, so invert and use BIC. */
5083 insns = 99;
5084 else
5085 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
5087 if (can_negate)
5088 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
5089 &neg_immediates);
5090 else
5091 neg_insns = 99;
5093 if (can_invert || final_invert)
5094 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
5095 &inv_immediates);
5096 else
5097 inv_insns = 99;
5099 immediates = &pos_immediates;
5101 /* Is the negated immediate sequence more efficient? */
5102 if (neg_insns < insns && neg_insns <= inv_insns)
5104 insns = neg_insns;
5105 immediates = &neg_immediates;
5107 else
5108 can_negate = 0;
5110 /* Is the inverted immediate sequence more efficient?
5111 We must allow for an extra NOT instruction for XOR operations, although
5112 there is some chance that the final 'mvn' will get optimized later. */
5113 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
5115 insns = inv_insns;
5116 immediates = &inv_immediates;
5118 else
5120 can_invert = 0;
5121 final_invert = 0;
5124 /* Now output the chosen sequence as instructions. */
5125 if (generate)
5127 for (i = 0; i < insns; i++)
5129 rtx new_src, temp1_rtx;
5131 temp1 = immediates->i[i];
5133 if (code == SET || code == MINUS)
5134 new_src = (subtargets ? gen_reg_rtx (mode) : target);
5135 else if ((final_invert || i < (insns - 1)) && subtargets)
5136 new_src = gen_reg_rtx (mode);
5137 else
5138 new_src = target;
5140 if (can_invert)
5141 temp1 = ~temp1;
5142 else if (can_negate)
5143 temp1 = -temp1;
5145 temp1 = trunc_int_for_mode (temp1, mode);
5146 temp1_rtx = GEN_INT (temp1);
5148 if (code == SET)
5150 else if (code == MINUS)
5151 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
5152 else
5153 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
5155 emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
5156 source = new_src;
5158 if (code == SET)
5160 can_negate = can_invert;
5161 can_invert = 0;
5162 code = PLUS;
5164 else if (code == MINUS)
5165 code = PLUS;
5169 if (final_invert)
5171 if (generate)
5172 emit_constant_insn (cond, gen_rtx_SET (target,
5173 gen_rtx_NOT (mode, source)));
5174 insns++;
5177 return insns;
5180 /* Canonicalize a comparison so that we are more likely to recognize it.
5181 This can be done for a few constant compares, where we can make the
5182 immediate value easier to load. */
5184 static void
5185 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5186 bool op0_preserve_value)
5188 machine_mode mode;
5189 unsigned HOST_WIDE_INT i, maxval;
5191 mode = GET_MODE (*op0);
5192 if (mode == VOIDmode)
5193 mode = GET_MODE (*op1);
5195 maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5197 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
5198 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
5199 reversed or (for constant OP1) adjusted to GE/LT. Similarly
5200 for GTU/LEU in Thumb mode. */
5201 if (mode == DImode)
5204 if (*code == GT || *code == LE
5205 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
5207 /* Missing comparison. First try to use an available
5208 comparison. */
5209 if (CONST_INT_P (*op1))
5211 i = INTVAL (*op1);
5212 switch (*code)
5214 case GT:
5215 case LE:
5216 if (i != maxval
5217 && arm_const_double_by_immediates (GEN_INT (i + 1)))
5219 *op1 = GEN_INT (i + 1);
5220 *code = *code == GT ? GE : LT;
5221 return;
5223 break;
5224 case GTU:
5225 case LEU:
5226 if (i != ~((unsigned HOST_WIDE_INT) 0)
5227 && arm_const_double_by_immediates (GEN_INT (i + 1)))
5229 *op1 = GEN_INT (i + 1);
5230 *code = *code == GTU ? GEU : LTU;
5231 return;
5233 break;
5234 default:
5235 gcc_unreachable ();
5239 /* If that did not work, reverse the condition. */
5240 if (!op0_preserve_value)
5242 std::swap (*op0, *op1);
5243 *code = (int)swap_condition ((enum rtx_code)*code);
5246 return;
5249 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5250 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5251 to facilitate possible combining with a cmp into 'ands'. */
5252 if (mode == SImode
5253 && GET_CODE (*op0) == ZERO_EXTEND
5254 && GET_CODE (XEXP (*op0, 0)) == SUBREG
5255 && GET_MODE (XEXP (*op0, 0)) == QImode
5256 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5257 && subreg_lowpart_p (XEXP (*op0, 0))
5258 && *op1 == const0_rtx)
5259 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5260 GEN_INT (255));
5262 /* Comparisons smaller than DImode. Only adjust comparisons against
5263 an out-of-range constant. */
5264 if (!CONST_INT_P (*op1)
5265 || const_ok_for_arm (INTVAL (*op1))
5266 || const_ok_for_arm (- INTVAL (*op1)))
5267 return;
5269 i = INTVAL (*op1);
5271 switch (*code)
5273 case EQ:
5274 case NE:
5275 return;
5277 case GT:
5278 case LE:
5279 if (i != maxval
5280 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5282 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5283 *code = *code == GT ? GE : LT;
5284 return;
5286 break;
5288 case GE:
5289 case LT:
5290 if (i != ~maxval
5291 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5293 *op1 = GEN_INT (i - 1);
5294 *code = *code == GE ? GT : LE;
5295 return;
5297 break;
5299 case GTU:
5300 case LEU:
5301 if (i != ~((unsigned HOST_WIDE_INT) 0)
5302 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5304 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5305 *code = *code == GTU ? GEU : LTU;
5306 return;
5308 break;
5310 case GEU:
5311 case LTU:
5312 if (i != 0
5313 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5315 *op1 = GEN_INT (i - 1);
5316 *code = *code == GEU ? GTU : LEU;
5317 return;
5319 break;
5321 default:
5322 gcc_unreachable ();
5327 /* Define how to find the value returned by a function. */
5329 static rtx
5330 arm_function_value(const_tree type, const_tree func,
5331 bool outgoing ATTRIBUTE_UNUSED)
5333 machine_mode mode;
5334 int unsignedp ATTRIBUTE_UNUSED;
5335 rtx r ATTRIBUTE_UNUSED;
5337 mode = TYPE_MODE (type);
5339 if (TARGET_AAPCS_BASED)
5340 return aapcs_allocate_return_reg (mode, type, func);
5342 /* Promote integer types. */
5343 if (INTEGRAL_TYPE_P (type))
5344 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5346 /* Promotes small structs returned in a register to full-word size
5347 for big-endian AAPCS. */
5348 if (arm_return_in_msb (type))
5350 HOST_WIDE_INT size = int_size_in_bytes (type);
5351 if (size % UNITS_PER_WORD != 0)
5353 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5354 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5358 return arm_libcall_value_1 (mode);
5361 /* libcall hashtable helpers. */
5363 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5365 static inline hashval_t hash (const rtx_def *);
5366 static inline bool equal (const rtx_def *, const rtx_def *);
5367 static inline void remove (rtx_def *);
5370 inline bool
5371 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5373 return rtx_equal_p (p1, p2);
5376 inline hashval_t
5377 libcall_hasher::hash (const rtx_def *p1)
5379 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5382 typedef hash_table<libcall_hasher> libcall_table_type;
5384 static void
5385 add_libcall (libcall_table_type *htab, rtx libcall)
5387 *htab->find_slot (libcall, INSERT) = libcall;
5390 static bool
5391 arm_libcall_uses_aapcs_base (const_rtx libcall)
5393 static bool init_done = false;
5394 static libcall_table_type *libcall_htab = NULL;
5396 if (!init_done)
5398 init_done = true;
5400 libcall_htab = new libcall_table_type (31);
5401 add_libcall (libcall_htab,
5402 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5403 add_libcall (libcall_htab,
5404 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5405 add_libcall (libcall_htab,
5406 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5407 add_libcall (libcall_htab,
5408 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5410 add_libcall (libcall_htab,
5411 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5412 add_libcall (libcall_htab,
5413 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5414 add_libcall (libcall_htab,
5415 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5416 add_libcall (libcall_htab,
5417 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5419 add_libcall (libcall_htab,
5420 convert_optab_libfunc (sext_optab, SFmode, HFmode));
5421 add_libcall (libcall_htab,
5422 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5423 add_libcall (libcall_htab,
5424 convert_optab_libfunc (sfix_optab, SImode, DFmode));
5425 add_libcall (libcall_htab,
5426 convert_optab_libfunc (ufix_optab, SImode, DFmode));
5427 add_libcall (libcall_htab,
5428 convert_optab_libfunc (sfix_optab, DImode, DFmode));
5429 add_libcall (libcall_htab,
5430 convert_optab_libfunc (ufix_optab, DImode, DFmode));
5431 add_libcall (libcall_htab,
5432 convert_optab_libfunc (sfix_optab, DImode, SFmode));
5433 add_libcall (libcall_htab,
5434 convert_optab_libfunc (ufix_optab, DImode, SFmode));
5436 /* Values from double-precision helper functions are returned in core
5437 registers if the selected core only supports single-precision
5438 arithmetic, even if we are using the hard-float ABI. The same is
5439 true for single-precision helpers, but we will never be using the
5440 hard-float ABI on a CPU which doesn't support single-precision
5441 operations in hardware. */
5442 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5443 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5444 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5445 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5446 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5447 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5448 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5449 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5450 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5451 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5452 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5453 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5454 SFmode));
5455 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5456 DFmode));
5457 add_libcall (libcall_htab,
5458 convert_optab_libfunc (trunc_optab, HFmode, DFmode));
5461 return libcall && libcall_htab->find (libcall) != NULL;
5464 static rtx
5465 arm_libcall_value_1 (machine_mode mode)
5467 if (TARGET_AAPCS_BASED)
5468 return aapcs_libcall_value (mode);
5469 else if (TARGET_IWMMXT_ABI
5470 && arm_vector_mode_supported_p (mode))
5471 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5472 else
5473 return gen_rtx_REG (mode, ARG_REGISTER (1));
5476 /* Define how to find the value returned by a library function
5477 assuming the value has mode MODE. */
5479 static rtx
5480 arm_libcall_value (machine_mode mode, const_rtx libcall)
5482 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5483 && GET_MODE_CLASS (mode) == MODE_FLOAT)
5485 /* The following libcalls return their result in integer registers,
5486 even though they return a floating point value. */
5487 if (arm_libcall_uses_aapcs_base (libcall))
5488 return gen_rtx_REG (mode, ARG_REGISTER(1));
5492 return arm_libcall_value_1 (mode);
5495 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5497 static bool
5498 arm_function_value_regno_p (const unsigned int regno)
5500 if (regno == ARG_REGISTER (1)
5501 || (TARGET_32BIT
5502 && TARGET_AAPCS_BASED
5503 && TARGET_HARD_FLOAT
5504 && regno == FIRST_VFP_REGNUM)
5505 || (TARGET_IWMMXT_ABI
5506 && regno == FIRST_IWMMXT_REGNUM))
5507 return true;
5509 return false;
5512 /* Determine the amount of memory needed to store the possible return
5513 registers of an untyped call. */
5515 arm_apply_result_size (void)
5517 int size = 16;
5519 if (TARGET_32BIT)
5521 if (TARGET_HARD_FLOAT_ABI)
5522 size += 32;
5523 if (TARGET_IWMMXT_ABI)
5524 size += 8;
5527 return size;
5530 /* Decide whether TYPE should be returned in memory (true)
5531 or in a register (false). FNTYPE is the type of the function making
5532 the call. */
5533 static bool
5534 arm_return_in_memory (const_tree type, const_tree fntype)
5536 HOST_WIDE_INT size;
5538 size = int_size_in_bytes (type); /* Negative if not fixed size. */
5540 if (TARGET_AAPCS_BASED)
5542 /* Simple, non-aggregate types (ie not including vectors and
5543 complex) are always returned in a register (or registers).
5544 We don't care about which register here, so we can short-cut
5545 some of the detail. */
5546 if (!AGGREGATE_TYPE_P (type)
5547 && TREE_CODE (type) != VECTOR_TYPE
5548 && TREE_CODE (type) != COMPLEX_TYPE)
5549 return false;
5551 /* Any return value that is no larger than one word can be
5552 returned in r0. */
5553 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5554 return false;
5556 /* Check any available co-processors to see if they accept the
5557 type as a register candidate (VFP, for example, can return
5558 some aggregates in consecutive registers). These aren't
5559 available if the call is variadic. */
5560 if (aapcs_select_return_coproc (type, fntype) >= 0)
5561 return false;
5563 /* Vector values should be returned using ARM registers, not
5564 memory (unless they're over 16 bytes, which will break since
5565 we only have four call-clobbered registers to play with). */
5566 if (TREE_CODE (type) == VECTOR_TYPE)
5567 return (size < 0 || size > (4 * UNITS_PER_WORD));
5569 /* The rest go in memory. */
5570 return true;
5573 if (TREE_CODE (type) == VECTOR_TYPE)
5574 return (size < 0 || size > (4 * UNITS_PER_WORD));
5576 if (!AGGREGATE_TYPE_P (type) &&
5577 (TREE_CODE (type) != VECTOR_TYPE))
5578 /* All simple types are returned in registers. */
5579 return false;
5581 if (arm_abi != ARM_ABI_APCS)
5583 /* ATPCS and later return aggregate types in memory only if they are
5584 larger than a word (or are variable size). */
5585 return (size < 0 || size > UNITS_PER_WORD);
5588 /* For the arm-wince targets we choose to be compatible with Microsoft's
5589 ARM and Thumb compilers, which always return aggregates in memory. */
5590 #ifndef ARM_WINCE
5591 /* All structures/unions bigger than one word are returned in memory.
5592 Also catch the case where int_size_in_bytes returns -1. In this case
5593 the aggregate is either huge or of variable size, and in either case
5594 we will want to return it via memory and not in a register. */
5595 if (size < 0 || size > UNITS_PER_WORD)
5596 return true;
5598 if (TREE_CODE (type) == RECORD_TYPE)
5600 tree field;
5602 /* For a struct the APCS says that we only return in a register
5603 if the type is 'integer like' and every addressable element
5604 has an offset of zero. For practical purposes this means
5605 that the structure can have at most one non bit-field element
5606 and that this element must be the first one in the structure. */
5608 /* Find the first field, ignoring non FIELD_DECL things which will
5609 have been created by C++. */
5610 for (field = TYPE_FIELDS (type);
5611 field && TREE_CODE (field) != FIELD_DECL;
5612 field = DECL_CHAIN (field))
5613 continue;
5615 if (field == NULL)
5616 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5618 /* Check that the first field is valid for returning in a register. */
5620 /* ... Floats are not allowed */
5621 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5622 return true;
5624 /* ... Aggregates that are not themselves valid for returning in
5625 a register are not allowed. */
5626 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5627 return true;
5629 /* Now check the remaining fields, if any. Only bitfields are allowed,
5630 since they are not addressable. */
5631 for (field = DECL_CHAIN (field);
5632 field;
5633 field = DECL_CHAIN (field))
5635 if (TREE_CODE (field) != FIELD_DECL)
5636 continue;
5638 if (!DECL_BIT_FIELD_TYPE (field))
5639 return true;
5642 return false;
5645 if (TREE_CODE (type) == UNION_TYPE)
5647 tree field;
5649 /* Unions can be returned in registers if every element is
5650 integral, or can be returned in an integer register. */
5651 for (field = TYPE_FIELDS (type);
5652 field;
5653 field = DECL_CHAIN (field))
5655 if (TREE_CODE (field) != FIELD_DECL)
5656 continue;
5658 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5659 return true;
5661 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5662 return true;
5665 return false;
5667 #endif /* not ARM_WINCE */
5669 /* Return all other types in memory. */
5670 return true;
5673 const struct pcs_attribute_arg
5675 const char *arg;
5676 enum arm_pcs value;
5677 } pcs_attribute_args[] =
5679 {"aapcs", ARM_PCS_AAPCS},
5680 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5681 #if 0
5682 /* We could recognize these, but changes would be needed elsewhere
5683 * to implement them. */
5684 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5685 {"atpcs", ARM_PCS_ATPCS},
5686 {"apcs", ARM_PCS_APCS},
5687 #endif
5688 {NULL, ARM_PCS_UNKNOWN}
5691 static enum arm_pcs
5692 arm_pcs_from_attribute (tree attr)
5694 const struct pcs_attribute_arg *ptr;
5695 const char *arg;
5697 /* Get the value of the argument. */
5698 if (TREE_VALUE (attr) == NULL_TREE
5699 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5700 return ARM_PCS_UNKNOWN;
5702 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5704 /* Check it against the list of known arguments. */
5705 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5706 if (streq (arg, ptr->arg))
5707 return ptr->value;
5709 /* An unrecognized interrupt type. */
5710 return ARM_PCS_UNKNOWN;
5713 /* Get the PCS variant to use for this call. TYPE is the function's type
5714 specification, DECL is the specific declartion. DECL may be null if
5715 the call could be indirect or if this is a library call. */
5716 static enum arm_pcs
5717 arm_get_pcs_model (const_tree type, const_tree decl)
5719 bool user_convention = false;
5720 enum arm_pcs user_pcs = arm_pcs_default;
5721 tree attr;
5723 gcc_assert (type);
5725 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5726 if (attr)
5728 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5729 user_convention = true;
5732 if (TARGET_AAPCS_BASED)
5734 /* Detect varargs functions. These always use the base rules
5735 (no argument is ever a candidate for a co-processor
5736 register). */
5737 bool base_rules = stdarg_p (type);
5739 if (user_convention)
5741 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5742 sorry ("non-AAPCS derived PCS variant");
5743 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5744 error ("variadic functions must use the base AAPCS variant");
5747 if (base_rules)
5748 return ARM_PCS_AAPCS;
5749 else if (user_convention)
5750 return user_pcs;
5751 else if (decl && flag_unit_at_a_time)
5753 /* Local functions never leak outside this compilation unit,
5754 so we are free to use whatever conventions are
5755 appropriate. */
5756 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5757 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5758 if (i && i->local)
5759 return ARM_PCS_AAPCS_LOCAL;
5762 else if (user_convention && user_pcs != arm_pcs_default)
5763 sorry ("PCS variant");
5765 /* For everything else we use the target's default. */
5766 return arm_pcs_default;
5770 static void
5771 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5772 const_tree fntype ATTRIBUTE_UNUSED,
5773 rtx libcall ATTRIBUTE_UNUSED,
5774 const_tree fndecl ATTRIBUTE_UNUSED)
5776 /* Record the unallocated VFP registers. */
5777 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5778 pcum->aapcs_vfp_reg_alloc = 0;
5781 /* Walk down the type tree of TYPE counting consecutive base elements.
5782 If *MODEP is VOIDmode, then set it to the first valid floating point
5783 type. If a non-floating point type is found, or if a floating point
5784 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5785 otherwise return the count in the sub-tree. */
5786 static int
5787 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5789 machine_mode mode;
5790 HOST_WIDE_INT size;
5792 switch (TREE_CODE (type))
5794 case REAL_TYPE:
5795 mode = TYPE_MODE (type);
5796 if (mode != DFmode && mode != SFmode && mode != HFmode)
5797 return -1;
5799 if (*modep == VOIDmode)
5800 *modep = mode;
5802 if (*modep == mode)
5803 return 1;
5805 break;
5807 case COMPLEX_TYPE:
5808 mode = TYPE_MODE (TREE_TYPE (type));
5809 if (mode != DFmode && mode != SFmode)
5810 return -1;
5812 if (*modep == VOIDmode)
5813 *modep = mode;
5815 if (*modep == mode)
5816 return 2;
5818 break;
5820 case VECTOR_TYPE:
5821 /* Use V2SImode and V4SImode as representatives of all 64-bit
5822 and 128-bit vector types, whether or not those modes are
5823 supported with the present options. */
5824 size = int_size_in_bytes (type);
5825 switch (size)
5827 case 8:
5828 mode = V2SImode;
5829 break;
5830 case 16:
5831 mode = V4SImode;
5832 break;
5833 default:
5834 return -1;
5837 if (*modep == VOIDmode)
5838 *modep = mode;
5840 /* Vector modes are considered to be opaque: two vectors are
5841 equivalent for the purposes of being homogeneous aggregates
5842 if they are the same size. */
5843 if (*modep == mode)
5844 return 1;
5846 break;
5848 case ARRAY_TYPE:
5850 int count;
5851 tree index = TYPE_DOMAIN (type);
5853 /* Can't handle incomplete types nor sizes that are not
5854 fixed. */
5855 if (!COMPLETE_TYPE_P (type)
5856 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5857 return -1;
5859 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5860 if (count == -1
5861 || !index
5862 || !TYPE_MAX_VALUE (index)
5863 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5864 || !TYPE_MIN_VALUE (index)
5865 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5866 || count < 0)
5867 return -1;
5869 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5870 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5872 /* There must be no padding. */
5873 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5874 return -1;
5876 return count;
5879 case RECORD_TYPE:
5881 int count = 0;
5882 int sub_count;
5883 tree field;
5885 /* Can't handle incomplete types nor sizes that are not
5886 fixed. */
5887 if (!COMPLETE_TYPE_P (type)
5888 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5889 return -1;
5891 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5893 if (TREE_CODE (field) != FIELD_DECL)
5894 continue;
5896 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5897 if (sub_count < 0)
5898 return -1;
5899 count += sub_count;
5902 /* There must be no padding. */
5903 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5904 return -1;
5906 return count;
5909 case UNION_TYPE:
5910 case QUAL_UNION_TYPE:
5912 /* These aren't very interesting except in a degenerate case. */
5913 int count = 0;
5914 int sub_count;
5915 tree field;
5917 /* Can't handle incomplete types nor sizes that are not
5918 fixed. */
5919 if (!COMPLETE_TYPE_P (type)
5920 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5921 return -1;
5923 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5925 if (TREE_CODE (field) != FIELD_DECL)
5926 continue;
5928 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5929 if (sub_count < 0)
5930 return -1;
5931 count = count > sub_count ? count : sub_count;
5934 /* There must be no padding. */
5935 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5936 return -1;
5938 return count;
5941 default:
5942 break;
5945 return -1;
5948 /* Return true if PCS_VARIANT should use VFP registers. */
5949 static bool
5950 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5952 if (pcs_variant == ARM_PCS_AAPCS_VFP)
5954 static bool seen_thumb1_vfp = false;
5956 if (TARGET_THUMB1 && !seen_thumb1_vfp)
5958 sorry ("Thumb-1 hard-float VFP ABI");
5959 /* sorry() is not immediately fatal, so only display this once. */
5960 seen_thumb1_vfp = true;
5963 return true;
5966 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5967 return false;
5969 return (TARGET_32BIT && TARGET_HARD_FLOAT &&
5970 (TARGET_VFP_DOUBLE || !is_double));
5973 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5974 suitable for passing or returning in VFP registers for the PCS
5975 variant selected. If it is, then *BASE_MODE is updated to contain
5976 a machine mode describing each element of the argument's type and
5977 *COUNT to hold the number of such elements. */
5978 static bool
5979 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5980 machine_mode mode, const_tree type,
5981 machine_mode *base_mode, int *count)
5983 machine_mode new_mode = VOIDmode;
5985 /* If we have the type information, prefer that to working things
5986 out from the mode. */
5987 if (type)
5989 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5991 if (ag_count > 0 && ag_count <= 4)
5992 *count = ag_count;
5993 else
5994 return false;
5996 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
5997 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5998 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6000 *count = 1;
6001 new_mode = mode;
6003 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6005 *count = 2;
6006 new_mode = (mode == DCmode ? DFmode : SFmode);
6008 else
6009 return false;
6012 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
6013 return false;
6015 *base_mode = new_mode;
6016 return true;
6019 static bool
6020 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
6021 machine_mode mode, const_tree type)
6023 int count ATTRIBUTE_UNUSED;
6024 machine_mode ag_mode ATTRIBUTE_UNUSED;
6026 if (!use_vfp_abi (pcs_variant, false))
6027 return false;
6028 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6029 &ag_mode, &count);
6032 static bool
6033 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6034 const_tree type)
6036 if (!use_vfp_abi (pcum->pcs_variant, false))
6037 return false;
6039 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
6040 &pcum->aapcs_vfp_rmode,
6041 &pcum->aapcs_vfp_rcount);
6044 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
6045 for the behaviour of this function. */
6047 static bool
6048 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6049 const_tree type ATTRIBUTE_UNUSED)
6051 int rmode_size
6052 = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
6053 int shift = rmode_size / GET_MODE_SIZE (SFmode);
6054 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
6055 int regno;
6057 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
6058 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
6060 pcum->aapcs_vfp_reg_alloc = mask << regno;
6061 if (mode == BLKmode
6062 || (mode == TImode && ! TARGET_NEON)
6063 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
6065 int i;
6066 int rcount = pcum->aapcs_vfp_rcount;
6067 int rshift = shift;
6068 machine_mode rmode = pcum->aapcs_vfp_rmode;
6069 rtx par;
6070 if (!TARGET_NEON)
6072 /* Avoid using unsupported vector modes. */
6073 if (rmode == V2SImode)
6074 rmode = DImode;
6075 else if (rmode == V4SImode)
6077 rmode = DImode;
6078 rcount *= 2;
6079 rshift /= 2;
6082 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
6083 for (i = 0; i < rcount; i++)
6085 rtx tmp = gen_rtx_REG (rmode,
6086 FIRST_VFP_REGNUM + regno + i * rshift);
6087 tmp = gen_rtx_EXPR_LIST
6088 (VOIDmode, tmp,
6089 GEN_INT (i * GET_MODE_SIZE (rmode)));
6090 XVECEXP (par, 0, i) = tmp;
6093 pcum->aapcs_reg = par;
6095 else
6096 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
6097 return true;
6099 return false;
6102 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
6103 comment there for the behaviour of this function. */
6105 static rtx
6106 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
6107 machine_mode mode,
6108 const_tree type ATTRIBUTE_UNUSED)
6110 if (!use_vfp_abi (pcs_variant, false))
6111 return NULL;
6113 if (mode == BLKmode
6114 || (GET_MODE_CLASS (mode) == MODE_INT
6115 && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
6116 && !TARGET_NEON))
6118 int count;
6119 machine_mode ag_mode;
6120 int i;
6121 rtx par;
6122 int shift;
6124 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6125 &ag_mode, &count);
6127 if (!TARGET_NEON)
6129 if (ag_mode == V2SImode)
6130 ag_mode = DImode;
6131 else if (ag_mode == V4SImode)
6133 ag_mode = DImode;
6134 count *= 2;
6137 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
6138 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
6139 for (i = 0; i < count; i++)
6141 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
6142 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
6143 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
6144 XVECEXP (par, 0, i) = tmp;
6147 return par;
6150 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
6153 static void
6154 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
6155 machine_mode mode ATTRIBUTE_UNUSED,
6156 const_tree type ATTRIBUTE_UNUSED)
6158 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
6159 pcum->aapcs_vfp_reg_alloc = 0;
6160 return;
6163 #define AAPCS_CP(X) \
6165 aapcs_ ## X ## _cum_init, \
6166 aapcs_ ## X ## _is_call_candidate, \
6167 aapcs_ ## X ## _allocate, \
6168 aapcs_ ## X ## _is_return_candidate, \
6169 aapcs_ ## X ## _allocate_return_reg, \
6170 aapcs_ ## X ## _advance \
6173 /* Table of co-processors that can be used to pass arguments in
6174 registers. Idealy no arugment should be a candidate for more than
6175 one co-processor table entry, but the table is processed in order
6176 and stops after the first match. If that entry then fails to put
6177 the argument into a co-processor register, the argument will go on
6178 the stack. */
6179 static struct
6181 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
6182 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
6184 /* Return true if an argument of mode MODE (or type TYPE if MODE is
6185 BLKmode) is a candidate for this co-processor's registers; this
6186 function should ignore any position-dependent state in
6187 CUMULATIVE_ARGS and only use call-type dependent information. */
6188 bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6190 /* Return true if the argument does get a co-processor register; it
6191 should set aapcs_reg to an RTX of the register allocated as is
6192 required for a return from FUNCTION_ARG. */
6193 bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6195 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6196 be returned in this co-processor's registers. */
6197 bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6199 /* Allocate and return an RTX element to hold the return type of a call. This
6200 routine must not fail and will only be called if is_return_candidate
6201 returned true with the same parameters. */
6202 rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6204 /* Finish processing this argument and prepare to start processing
6205 the next one. */
6206 void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6207 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6209 AAPCS_CP(vfp)
6212 #undef AAPCS_CP
6214 static int
6215 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6216 const_tree type)
6218 int i;
6220 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6221 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6222 return i;
6224 return -1;
6227 static int
6228 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6230 /* We aren't passed a decl, so we can't check that a call is local.
6231 However, it isn't clear that that would be a win anyway, since it
6232 might limit some tail-calling opportunities. */
6233 enum arm_pcs pcs_variant;
6235 if (fntype)
6237 const_tree fndecl = NULL_TREE;
6239 if (TREE_CODE (fntype) == FUNCTION_DECL)
6241 fndecl = fntype;
6242 fntype = TREE_TYPE (fntype);
6245 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6247 else
6248 pcs_variant = arm_pcs_default;
6250 if (pcs_variant != ARM_PCS_AAPCS)
6252 int i;
6254 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6255 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6256 TYPE_MODE (type),
6257 type))
6258 return i;
6260 return -1;
6263 static rtx
6264 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6265 const_tree fntype)
6267 /* We aren't passed a decl, so we can't check that a call is local.
6268 However, it isn't clear that that would be a win anyway, since it
6269 might limit some tail-calling opportunities. */
6270 enum arm_pcs pcs_variant;
6271 int unsignedp ATTRIBUTE_UNUSED;
6273 if (fntype)
6275 const_tree fndecl = NULL_TREE;
6277 if (TREE_CODE (fntype) == FUNCTION_DECL)
6279 fndecl = fntype;
6280 fntype = TREE_TYPE (fntype);
6283 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6285 else
6286 pcs_variant = arm_pcs_default;
6288 /* Promote integer types. */
6289 if (type && INTEGRAL_TYPE_P (type))
6290 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
6292 if (pcs_variant != ARM_PCS_AAPCS)
6294 int i;
6296 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6297 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6298 type))
6299 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6300 mode, type);
6303 /* Promotes small structs returned in a register to full-word size
6304 for big-endian AAPCS. */
6305 if (type && arm_return_in_msb (type))
6307 HOST_WIDE_INT size = int_size_in_bytes (type);
6308 if (size % UNITS_PER_WORD != 0)
6310 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6311 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
6315 return gen_rtx_REG (mode, R0_REGNUM);
6318 static rtx
6319 aapcs_libcall_value (machine_mode mode)
6321 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6322 && GET_MODE_SIZE (mode) <= 4)
6323 mode = SImode;
6325 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
6328 /* Lay out a function argument using the AAPCS rules. The rule
6329 numbers referred to here are those in the AAPCS. */
6330 static void
6331 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
6332 const_tree type, bool named)
6334 int nregs, nregs2;
6335 int ncrn;
6337 /* We only need to do this once per argument. */
6338 if (pcum->aapcs_arg_processed)
6339 return;
6341 pcum->aapcs_arg_processed = true;
6343 /* Special case: if named is false then we are handling an incoming
6344 anonymous argument which is on the stack. */
6345 if (!named)
6346 return;
6348 /* Is this a potential co-processor register candidate? */
6349 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6351 int slot = aapcs_select_call_coproc (pcum, mode, type);
6352 pcum->aapcs_cprc_slot = slot;
6354 /* We don't have to apply any of the rules from part B of the
6355 preparation phase, these are handled elsewhere in the
6356 compiler. */
6358 if (slot >= 0)
6360 /* A Co-processor register candidate goes either in its own
6361 class of registers or on the stack. */
6362 if (!pcum->aapcs_cprc_failed[slot])
6364 /* C1.cp - Try to allocate the argument to co-processor
6365 registers. */
6366 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
6367 return;
6369 /* C2.cp - Put the argument on the stack and note that we
6370 can't assign any more candidates in this slot. We also
6371 need to note that we have allocated stack space, so that
6372 we won't later try to split a non-cprc candidate between
6373 core registers and the stack. */
6374 pcum->aapcs_cprc_failed[slot] = true;
6375 pcum->can_split = false;
6378 /* We didn't get a register, so this argument goes on the
6379 stack. */
6380 gcc_assert (pcum->can_split == false);
6381 return;
6385 /* C3 - For double-word aligned arguments, round the NCRN up to the
6386 next even number. */
6387 ncrn = pcum->aapcs_ncrn;
6388 if (ncrn & 1)
6390 int res = arm_needs_doubleword_align (mode, type);
6391 /* Only warn during RTL expansion of call stmts, otherwise we would
6392 warn e.g. during gimplification even on functions that will be
6393 always inlined, and we'd warn multiple times. Don't warn when
6394 called in expand_function_start either, as we warn instead in
6395 arm_function_arg_boundary in that case. */
6396 if (res < 0 && warn_psabi && currently_expanding_gimple_stmt)
6397 inform (input_location, "parameter passing for argument of type "
6398 "%qT changed in GCC 7.1", type);
6399 else if (res > 0)
6400 ncrn++;
6403 nregs = ARM_NUM_REGS2(mode, type);
6405 /* Sigh, this test should really assert that nregs > 0, but a GCC
6406 extension allows empty structs and then gives them empty size; it
6407 then allows such a structure to be passed by value. For some of
6408 the code below we have to pretend that such an argument has
6409 non-zero size so that we 'locate' it correctly either in
6410 registers or on the stack. */
6411 gcc_assert (nregs >= 0);
6413 nregs2 = nregs ? nregs : 1;
6415 /* C4 - Argument fits entirely in core registers. */
6416 if (ncrn + nregs2 <= NUM_ARG_REGS)
6418 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6419 pcum->aapcs_next_ncrn = ncrn + nregs;
6420 return;
6423 /* C5 - Some core registers left and there are no arguments already
6424 on the stack: split this argument between the remaining core
6425 registers and the stack. */
6426 if (ncrn < NUM_ARG_REGS && pcum->can_split)
6428 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6429 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6430 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
6431 return;
6434 /* C6 - NCRN is set to 4. */
6435 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6437 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
6438 return;
6441 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6442 for a call to a function whose data type is FNTYPE.
6443 For a library call, FNTYPE is NULL. */
6444 void
6445 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
6446 rtx libname,
6447 tree fndecl ATTRIBUTE_UNUSED)
6449 /* Long call handling. */
6450 if (fntype)
6451 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
6452 else
6453 pcum->pcs_variant = arm_pcs_default;
6455 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6457 if (arm_libcall_uses_aapcs_base (libname))
6458 pcum->pcs_variant = ARM_PCS_AAPCS;
6460 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
6461 pcum->aapcs_reg = NULL_RTX;
6462 pcum->aapcs_partial = 0;
6463 pcum->aapcs_arg_processed = false;
6464 pcum->aapcs_cprc_slot = -1;
6465 pcum->can_split = true;
6467 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6469 int i;
6471 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6473 pcum->aapcs_cprc_failed[i] = false;
6474 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
6477 return;
6480 /* Legacy ABIs */
6482 /* On the ARM, the offset starts at 0. */
6483 pcum->nregs = 0;
6484 pcum->iwmmxt_nregs = 0;
6485 pcum->can_split = true;
6487 /* Varargs vectors are treated the same as long long.
6488 named_count avoids having to change the way arm handles 'named' */
6489 pcum->named_count = 0;
6490 pcum->nargs = 0;
6492 if (TARGET_REALLY_IWMMXT && fntype)
6494 tree fn_arg;
6496 for (fn_arg = TYPE_ARG_TYPES (fntype);
6497 fn_arg;
6498 fn_arg = TREE_CHAIN (fn_arg))
6499 pcum->named_count += 1;
6501 if (! pcum->named_count)
6502 pcum->named_count = INT_MAX;
6506 /* Return 1 if double word alignment is required for argument passing.
6507 Return -1 if double word alignment used to be required for argument
6508 passing before PR77728 ABI fix, but is not required anymore.
6509 Return 0 if double word alignment is not required and wasn't requried
6510 before either. */
6511 static int
6512 arm_needs_doubleword_align (machine_mode mode, const_tree type)
6514 if (!type)
6515 return GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY;
6517 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
6518 if (!AGGREGATE_TYPE_P (type))
6519 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
6521 /* Array types: Use member alignment of element type. */
6522 if (TREE_CODE (type) == ARRAY_TYPE)
6523 return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
6525 int ret = 0;
6526 /* Record/aggregate types: Use greatest member alignment of any member. */
6527 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6528 if (DECL_ALIGN (field) > PARM_BOUNDARY)
6530 if (TREE_CODE (field) == FIELD_DECL)
6531 return 1;
6532 else
6533 /* Before PR77728 fix, we were incorrectly considering also
6534 other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
6535 Make sure we can warn about that with -Wpsabi. */
6536 ret = -1;
6539 return ret;
6543 /* Determine where to put an argument to a function.
6544 Value is zero to push the argument on the stack,
6545 or a hard register in which to store the argument.
6547 MODE is the argument's machine mode.
6548 TYPE is the data type of the argument (as a tree).
6549 This is null for libcalls where that information may
6550 not be available.
6551 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6552 the preceding args and about the function being called.
6553 NAMED is nonzero if this argument is a named parameter
6554 (otherwise it is an extra parameter matching an ellipsis).
6556 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6557 other arguments are passed on the stack. If (NAMED == 0) (which happens
6558 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6559 defined), say it is passed in the stack (function_prologue will
6560 indeed make it pass in the stack if necessary). */
6562 static rtx
6563 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
6564 const_tree type, bool named)
6566 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6567 int nregs;
6569 /* Handle the special case quickly. Pick an arbitrary value for op2 of
6570 a call insn (op3 of a call_value insn). */
6571 if (mode == VOIDmode)
6572 return const0_rtx;
6574 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6576 aapcs_layout_arg (pcum, mode, type, named);
6577 return pcum->aapcs_reg;
6580 /* Varargs vectors are treated the same as long long.
6581 named_count avoids having to change the way arm handles 'named' */
6582 if (TARGET_IWMMXT_ABI
6583 && arm_vector_mode_supported_p (mode)
6584 && pcum->named_count > pcum->nargs + 1)
6586 if (pcum->iwmmxt_nregs <= 9)
6587 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
6588 else
6590 pcum->can_split = false;
6591 return NULL_RTX;
6595 /* Put doubleword aligned quantities in even register pairs. */
6596 if ((pcum->nregs & 1) && ARM_DOUBLEWORD_ALIGN)
6598 int res = arm_needs_doubleword_align (mode, type);
6599 if (res < 0 && warn_psabi)
6600 inform (input_location, "parameter passing for argument of type "
6601 "%qT changed in GCC 7.1", type);
6602 else if (res > 0)
6603 pcum->nregs++;
6606 /* Only allow splitting an arg between regs and memory if all preceding
6607 args were allocated to regs. For args passed by reference we only count
6608 the reference pointer. */
6609 if (pcum->can_split)
6610 nregs = 1;
6611 else
6612 nregs = ARM_NUM_REGS2 (mode, type);
6614 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
6615 return NULL_RTX;
6617 return gen_rtx_REG (mode, pcum->nregs);
6620 static unsigned int
6621 arm_function_arg_boundary (machine_mode mode, const_tree type)
6623 if (!ARM_DOUBLEWORD_ALIGN)
6624 return PARM_BOUNDARY;
6626 int res = arm_needs_doubleword_align (mode, type);
6627 if (res < 0 && warn_psabi)
6628 inform (input_location, "parameter passing for argument of type %qT "
6629 "changed in GCC 7.1", type);
6631 return res > 0 ? DOUBLEWORD_ALIGNMENT : PARM_BOUNDARY;
6634 static int
6635 arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
6636 tree type, bool named)
6638 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6639 int nregs = pcum->nregs;
6641 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6643 aapcs_layout_arg (pcum, mode, type, named);
6644 return pcum->aapcs_partial;
6647 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6648 return 0;
6650 if (NUM_ARG_REGS > nregs
6651 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6652 && pcum->can_split)
6653 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6655 return 0;
6658 /* Update the data in PCUM to advance over an argument
6659 of mode MODE and data type TYPE.
6660 (TYPE is null for libcalls where that information may not be available.) */
6662 static void
6663 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6664 const_tree type, bool named)
6666 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6668 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6670 aapcs_layout_arg (pcum, mode, type, named);
6672 if (pcum->aapcs_cprc_slot >= 0)
6674 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6675 type);
6676 pcum->aapcs_cprc_slot = -1;
6679 /* Generic stuff. */
6680 pcum->aapcs_arg_processed = false;
6681 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6682 pcum->aapcs_reg = NULL_RTX;
6683 pcum->aapcs_partial = 0;
6685 else
6687 pcum->nargs += 1;
6688 if (arm_vector_mode_supported_p (mode)
6689 && pcum->named_count > pcum->nargs
6690 && TARGET_IWMMXT_ABI)
6691 pcum->iwmmxt_nregs += 1;
6692 else
6693 pcum->nregs += ARM_NUM_REGS2 (mode, type);
6697 /* Variable sized types are passed by reference. This is a GCC
6698 extension to the ARM ABI. */
6700 static bool
6701 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6702 machine_mode mode ATTRIBUTE_UNUSED,
6703 const_tree type, bool named ATTRIBUTE_UNUSED)
6705 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6708 /* Encode the current state of the #pragma [no_]long_calls. */
6709 typedef enum
6711 OFF, /* No #pragma [no_]long_calls is in effect. */
6712 LONG, /* #pragma long_calls is in effect. */
6713 SHORT /* #pragma no_long_calls is in effect. */
6714 } arm_pragma_enum;
6716 static arm_pragma_enum arm_pragma_long_calls = OFF;
6718 void
6719 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6721 arm_pragma_long_calls = LONG;
6724 void
6725 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6727 arm_pragma_long_calls = SHORT;
6730 void
6731 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6733 arm_pragma_long_calls = OFF;
6736 /* Handle an attribute requiring a FUNCTION_DECL;
6737 arguments as in struct attribute_spec.handler. */
6738 static tree
6739 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6740 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6742 if (TREE_CODE (*node) != FUNCTION_DECL)
6744 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6745 name);
6746 *no_add_attrs = true;
6749 return NULL_TREE;
6752 /* Handle an "interrupt" or "isr" attribute;
6753 arguments as in struct attribute_spec.handler. */
6754 static tree
6755 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6756 bool *no_add_attrs)
6758 if (DECL_P (*node))
6760 if (TREE_CODE (*node) != FUNCTION_DECL)
6762 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6763 name);
6764 *no_add_attrs = true;
6766 /* FIXME: the argument if any is checked for type attributes;
6767 should it be checked for decl ones? */
6769 else
6771 if (TREE_CODE (*node) == FUNCTION_TYPE
6772 || TREE_CODE (*node) == METHOD_TYPE)
6774 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6776 warning (OPT_Wattributes, "%qE attribute ignored",
6777 name);
6778 *no_add_attrs = true;
6781 else if (TREE_CODE (*node) == POINTER_TYPE
6782 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6783 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6784 && arm_isr_value (args) != ARM_FT_UNKNOWN)
6786 *node = build_variant_type_copy (*node);
6787 TREE_TYPE (*node) = build_type_attribute_variant
6788 (TREE_TYPE (*node),
6789 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6790 *no_add_attrs = true;
6792 else
6794 /* Possibly pass this attribute on from the type to a decl. */
6795 if (flags & ((int) ATTR_FLAG_DECL_NEXT
6796 | (int) ATTR_FLAG_FUNCTION_NEXT
6797 | (int) ATTR_FLAG_ARRAY_NEXT))
6799 *no_add_attrs = true;
6800 return tree_cons (name, args, NULL_TREE);
6802 else
6804 warning (OPT_Wattributes, "%qE attribute ignored",
6805 name);
6810 return NULL_TREE;
6813 /* Handle a "pcs" attribute; arguments as in struct
6814 attribute_spec.handler. */
6815 static tree
6816 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6817 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6819 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6821 warning (OPT_Wattributes, "%qE attribute ignored", name);
6822 *no_add_attrs = true;
6824 return NULL_TREE;
6827 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6828 /* Handle the "notshared" attribute. This attribute is another way of
6829 requesting hidden visibility. ARM's compiler supports
6830 "__declspec(notshared)"; we support the same thing via an
6831 attribute. */
6833 static tree
6834 arm_handle_notshared_attribute (tree *node,
6835 tree name ATTRIBUTE_UNUSED,
6836 tree args ATTRIBUTE_UNUSED,
6837 int flags ATTRIBUTE_UNUSED,
6838 bool *no_add_attrs)
6840 tree decl = TYPE_NAME (*node);
6842 if (decl)
6844 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6845 DECL_VISIBILITY_SPECIFIED (decl) = 1;
6846 *no_add_attrs = false;
6848 return NULL_TREE;
6850 #endif
6852 /* This function returns true if a function with declaration FNDECL and type
6853 FNTYPE uses the stack to pass arguments or return variables and false
6854 otherwise. This is used for functions with the attributes
6855 'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
6856 diagnostic messages if the stack is used. NAME is the name of the attribute
6857 used. */
6859 static bool
6860 cmse_func_args_or_return_in_stack (tree fndecl, tree name, tree fntype)
6862 function_args_iterator args_iter;
6863 CUMULATIVE_ARGS args_so_far_v;
6864 cumulative_args_t args_so_far;
6865 bool first_param = true;
6866 tree arg_type, prev_arg_type = NULL_TREE, ret_type;
6868 /* Error out if any argument is passed on the stack. */
6869 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
6870 args_so_far = pack_cumulative_args (&args_so_far_v);
6871 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
6873 rtx arg_rtx;
6874 machine_mode arg_mode = TYPE_MODE (arg_type);
6876 prev_arg_type = arg_type;
6877 if (VOID_TYPE_P (arg_type))
6878 continue;
6880 if (!first_param)
6881 arm_function_arg_advance (args_so_far, arg_mode, arg_type, true);
6882 arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type, true);
6883 if (!arg_rtx
6884 || arm_arg_partial_bytes (args_so_far, arg_mode, arg_type, true))
6886 error ("%qE attribute not available to functions with arguments "
6887 "passed on the stack", name);
6888 return true;
6890 first_param = false;
6893 /* Error out for variadic functions since we cannot control how many
6894 arguments will be passed and thus stack could be used. stdarg_p () is not
6895 used for the checking to avoid browsing arguments twice. */
6896 if (prev_arg_type != NULL_TREE && !VOID_TYPE_P (prev_arg_type))
6898 error ("%qE attribute not available to functions with variable number "
6899 "of arguments", name);
6900 return true;
6903 /* Error out if return value is passed on the stack. */
6904 ret_type = TREE_TYPE (fntype);
6905 if (arm_return_in_memory (ret_type, fntype))
6907 error ("%qE attribute not available to functions that return value on "
6908 "the stack", name);
6909 return true;
6911 return false;
6914 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
6915 function will check whether the attribute is allowed here and will add the
6916 attribute to the function declaration tree or otherwise issue a warning. */
6918 static tree
6919 arm_handle_cmse_nonsecure_entry (tree *node, tree name,
6920 tree /* args */,
6921 int /* flags */,
6922 bool *no_add_attrs)
6924 tree fndecl;
6926 if (!use_cmse)
6928 *no_add_attrs = true;
6929 warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
6930 name);
6931 return NULL_TREE;
6934 /* Ignore attribute for function types. */
6935 if (TREE_CODE (*node) != FUNCTION_DECL)
6937 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6938 name);
6939 *no_add_attrs = true;
6940 return NULL_TREE;
6943 fndecl = *node;
6945 /* Warn for static linkage functions. */
6946 if (!TREE_PUBLIC (fndecl))
6948 warning (OPT_Wattributes, "%qE attribute has no effect on functions "
6949 "with static linkage", name);
6950 *no_add_attrs = true;
6951 return NULL_TREE;
6954 *no_add_attrs |= cmse_func_args_or_return_in_stack (fndecl, name,
6955 TREE_TYPE (fndecl));
6956 return NULL_TREE;
6960 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
6961 function will check whether the attribute is allowed here and will add the
6962 attribute to the function type tree or otherwise issue a diagnostic. The
6963 reason we check this at declaration time is to only allow the use of the
6964 attribute with declarations of function pointers and not function
6965 declarations. This function checks NODE is of the expected type and issues
6966 diagnostics otherwise using NAME. If it is not of the expected type
6967 *NO_ADD_ATTRS will be set to true. */
6969 static tree
6970 arm_handle_cmse_nonsecure_call (tree *node, tree name,
6971 tree /* args */,
6972 int /* flags */,
6973 bool *no_add_attrs)
6975 tree decl = NULL_TREE, fntype = NULL_TREE;
6976 tree type;
6978 if (!use_cmse)
6980 *no_add_attrs = true;
6981 warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
6982 name);
6983 return NULL_TREE;
6986 if (TREE_CODE (*node) == VAR_DECL || TREE_CODE (*node) == TYPE_DECL)
6988 decl = *node;
6989 fntype = TREE_TYPE (decl);
6992 while (fntype != NULL_TREE && TREE_CODE (fntype) == POINTER_TYPE)
6993 fntype = TREE_TYPE (fntype);
6995 if (!decl || TREE_CODE (fntype) != FUNCTION_TYPE)
6997 warning (OPT_Wattributes, "%qE attribute only applies to base type of a "
6998 "function pointer", name);
6999 *no_add_attrs = true;
7000 return NULL_TREE;
7003 *no_add_attrs |= cmse_func_args_or_return_in_stack (NULL, name, fntype);
7005 if (*no_add_attrs)
7006 return NULL_TREE;
7008 /* Prevent trees being shared among function types with and without
7009 cmse_nonsecure_call attribute. */
7010 type = TREE_TYPE (decl);
7012 type = build_distinct_type_copy (type);
7013 TREE_TYPE (decl) = type;
7014 fntype = type;
7016 while (TREE_CODE (fntype) != FUNCTION_TYPE)
7018 type = fntype;
7019 fntype = TREE_TYPE (fntype);
7020 fntype = build_distinct_type_copy (fntype);
7021 TREE_TYPE (type) = fntype;
7024 /* Construct a type attribute and add it to the function type. */
7025 tree attrs = tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE,
7026 TYPE_ATTRIBUTES (fntype));
7027 TYPE_ATTRIBUTES (fntype) = attrs;
7028 return NULL_TREE;
7031 /* Return 0 if the attributes for two types are incompatible, 1 if they
7032 are compatible, and 2 if they are nearly compatible (which causes a
7033 warning to be generated). */
7034 static int
7035 arm_comp_type_attributes (const_tree type1, const_tree type2)
7037 int l1, l2, s1, s2;
7039 /* Check for mismatch of non-default calling convention. */
7040 if (TREE_CODE (type1) != FUNCTION_TYPE)
7041 return 1;
7043 /* Check for mismatched call attributes. */
7044 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
7045 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
7046 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
7047 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
7049 /* Only bother to check if an attribute is defined. */
7050 if (l1 | l2 | s1 | s2)
7052 /* If one type has an attribute, the other must have the same attribute. */
7053 if ((l1 != l2) || (s1 != s2))
7054 return 0;
7056 /* Disallow mixed attributes. */
7057 if ((l1 & s2) || (l2 & s1))
7058 return 0;
7061 /* Check for mismatched ISR attribute. */
7062 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
7063 if (! l1)
7064 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
7065 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
7066 if (! l2)
7067 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
7068 if (l1 != l2)
7069 return 0;
7071 l1 = lookup_attribute ("cmse_nonsecure_call",
7072 TYPE_ATTRIBUTES (type1)) != NULL;
7073 l2 = lookup_attribute ("cmse_nonsecure_call",
7074 TYPE_ATTRIBUTES (type2)) != NULL;
7076 if (l1 != l2)
7077 return 0;
7079 return 1;
7082 /* Assigns default attributes to newly defined type. This is used to
7083 set short_call/long_call attributes for function types of
7084 functions defined inside corresponding #pragma scopes. */
7085 static void
7086 arm_set_default_type_attributes (tree type)
7088 /* Add __attribute__ ((long_call)) to all functions, when
7089 inside #pragma long_calls or __attribute__ ((short_call)),
7090 when inside #pragma no_long_calls. */
7091 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
7093 tree type_attr_list, attr_name;
7094 type_attr_list = TYPE_ATTRIBUTES (type);
7096 if (arm_pragma_long_calls == LONG)
7097 attr_name = get_identifier ("long_call");
7098 else if (arm_pragma_long_calls == SHORT)
7099 attr_name = get_identifier ("short_call");
7100 else
7101 return;
7103 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
7104 TYPE_ATTRIBUTES (type) = type_attr_list;
7108 /* Return true if DECL is known to be linked into section SECTION. */
7110 static bool
7111 arm_function_in_section_p (tree decl, section *section)
7113 /* We can only be certain about the prevailing symbol definition. */
7114 if (!decl_binds_to_current_def_p (decl))
7115 return false;
7117 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
7118 if (!DECL_SECTION_NAME (decl))
7120 /* Make sure that we will not create a unique section for DECL. */
7121 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
7122 return false;
7125 return function_section (decl) == section;
7128 /* Return nonzero if a 32-bit "long_call" should be generated for
7129 a call from the current function to DECL. We generate a long_call
7130 if the function:
7132 a. has an __attribute__((long call))
7133 or b. is within the scope of a #pragma long_calls
7134 or c. the -mlong-calls command line switch has been specified
7136 However we do not generate a long call if the function:
7138 d. has an __attribute__ ((short_call))
7139 or e. is inside the scope of a #pragma no_long_calls
7140 or f. is defined in the same section as the current function. */
7142 bool
7143 arm_is_long_call_p (tree decl)
7145 tree attrs;
7147 if (!decl)
7148 return TARGET_LONG_CALLS;
7150 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
7151 if (lookup_attribute ("short_call", attrs))
7152 return false;
7154 /* For "f", be conservative, and only cater for cases in which the
7155 whole of the current function is placed in the same section. */
7156 if (!flag_reorder_blocks_and_partition
7157 && TREE_CODE (decl) == FUNCTION_DECL
7158 && arm_function_in_section_p (decl, current_function_section ()))
7159 return false;
7161 if (lookup_attribute ("long_call", attrs))
7162 return true;
7164 return TARGET_LONG_CALLS;
7167 /* Return nonzero if it is ok to make a tail-call to DECL. */
7168 static bool
7169 arm_function_ok_for_sibcall (tree decl, tree exp)
7171 unsigned long func_type;
7173 if (cfun->machine->sibcall_blocked)
7174 return false;
7176 /* Never tailcall something if we are generating code for Thumb-1. */
7177 if (TARGET_THUMB1)
7178 return false;
7180 /* The PIC register is live on entry to VxWorks PLT entries, so we
7181 must make the call before restoring the PIC register. */
7182 if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
7183 return false;
7185 /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7186 may be used both as target of the call and base register for restoring
7187 the VFP registers */
7188 if (TARGET_APCS_FRAME && TARGET_ARM
7189 && TARGET_HARD_FLOAT
7190 && decl && arm_is_long_call_p (decl))
7191 return false;
7193 /* If we are interworking and the function is not declared static
7194 then we can't tail-call it unless we know that it exists in this
7195 compilation unit (since it might be a Thumb routine). */
7196 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
7197 && !TREE_ASM_WRITTEN (decl))
7198 return false;
7200 func_type = arm_current_func_type ();
7201 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
7202 if (IS_INTERRUPT (func_type))
7203 return false;
7205 /* ARMv8-M non-secure entry functions need to return with bxns which is only
7206 generated for entry functions themselves. */
7207 if (IS_CMSE_ENTRY (arm_current_func_type ()))
7208 return false;
7210 /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7211 this would complicate matters for later code generation. */
7212 if (TREE_CODE (exp) == CALL_EXPR)
7214 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7215 if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype)))
7216 return false;
7219 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
7221 /* Check that the return value locations are the same. For
7222 example that we aren't returning a value from the sibling in
7223 a VFP register but then need to transfer it to a core
7224 register. */
7225 rtx a, b;
7226 tree decl_or_type = decl;
7228 /* If it is an indirect function pointer, get the function type. */
7229 if (!decl)
7230 decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7232 a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
7233 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
7234 cfun->decl, false);
7235 if (!rtx_equal_p (a, b))
7236 return false;
7239 /* Never tailcall if function may be called with a misaligned SP. */
7240 if (IS_STACKALIGN (func_type))
7241 return false;
7243 /* The AAPCS says that, on bare-metal, calls to unresolved weak
7244 references should become a NOP. Don't convert such calls into
7245 sibling calls. */
7246 if (TARGET_AAPCS_BASED
7247 && arm_abi == ARM_ABI_AAPCS
7248 && decl
7249 && DECL_WEAK (decl))
7250 return false;
7252 /* We cannot do a tailcall for an indirect call by descriptor if all the
7253 argument registers are used because the only register left to load the
7254 address is IP and it will already contain the static chain. */
7255 if (!decl && CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines)
7257 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7258 CUMULATIVE_ARGS cum;
7259 cumulative_args_t cum_v;
7261 arm_init_cumulative_args (&cum, fntype, NULL_RTX, NULL_TREE);
7262 cum_v = pack_cumulative_args (&cum);
7264 for (tree t = TYPE_ARG_TYPES (fntype); t; t = TREE_CHAIN (t))
7266 tree type = TREE_VALUE (t);
7267 if (!VOID_TYPE_P (type))
7268 arm_function_arg_advance (cum_v, TYPE_MODE (type), type, true);
7271 if (!arm_function_arg (cum_v, SImode, integer_type_node, true))
7272 return false;
7275 /* Everything else is ok. */
7276 return true;
7280 /* Addressing mode support functions. */
7282 /* Return nonzero if X is a legitimate immediate operand when compiling
7283 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
7285 legitimate_pic_operand_p (rtx x)
7287 if (GET_CODE (x) == SYMBOL_REF
7288 || (GET_CODE (x) == CONST
7289 && GET_CODE (XEXP (x, 0)) == PLUS
7290 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
7291 return 0;
7293 return 1;
7296 /* Record that the current function needs a PIC register. Initialize
7297 cfun->machine->pic_reg if we have not already done so. */
7299 static void
7300 require_pic_register (void)
7302 /* A lot of the logic here is made obscure by the fact that this
7303 routine gets called as part of the rtx cost estimation process.
7304 We don't want those calls to affect any assumptions about the real
7305 function; and further, we can't call entry_of_function() until we
7306 start the real expansion process. */
7307 if (!crtl->uses_pic_offset_table)
7309 gcc_assert (can_create_pseudo_p ());
7310 if (arm_pic_register != INVALID_REGNUM
7311 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
7313 if (!cfun->machine->pic_reg)
7314 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
7316 /* Play games to avoid marking the function as needing pic
7317 if we are being called as part of the cost-estimation
7318 process. */
7319 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7320 crtl->uses_pic_offset_table = 1;
7322 else
7324 rtx_insn *seq, *insn;
7326 if (!cfun->machine->pic_reg)
7327 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
7329 /* Play games to avoid marking the function as needing pic
7330 if we are being called as part of the cost-estimation
7331 process. */
7332 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7334 crtl->uses_pic_offset_table = 1;
7335 start_sequence ();
7337 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
7338 && arm_pic_register > LAST_LO_REGNUM)
7339 emit_move_insn (cfun->machine->pic_reg,
7340 gen_rtx_REG (Pmode, arm_pic_register));
7341 else
7342 arm_load_pic_register (0UL);
7344 seq = get_insns ();
7345 end_sequence ();
7347 for (insn = seq; insn; insn = NEXT_INSN (insn))
7348 if (INSN_P (insn))
7349 INSN_LOCATION (insn) = prologue_location;
7351 /* We can be called during expansion of PHI nodes, where
7352 we can't yet emit instructions directly in the final
7353 insn stream. Queue the insns on the entry edge, they will
7354 be committed after everything else is expanded. */
7355 insert_insn_on_edge (seq,
7356 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
7363 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
7365 if (GET_CODE (orig) == SYMBOL_REF
7366 || GET_CODE (orig) == LABEL_REF)
7368 if (reg == 0)
7370 gcc_assert (can_create_pseudo_p ());
7371 reg = gen_reg_rtx (Pmode);
7374 /* VxWorks does not impose a fixed gap between segments; the run-time
7375 gap can be different from the object-file gap. We therefore can't
7376 use GOTOFF unless we are absolutely sure that the symbol is in the
7377 same segment as the GOT. Unfortunately, the flexibility of linker
7378 scripts means that we can't be sure of that in general, so assume
7379 that GOTOFF is never valid on VxWorks. */
7380 /* References to weak symbols cannot be resolved locally: they
7381 may be overridden by a non-weak definition at link time. */
7382 rtx_insn *insn;
7383 if ((GET_CODE (orig) == LABEL_REF
7384 || (GET_CODE (orig) == SYMBOL_REF
7385 && SYMBOL_REF_LOCAL_P (orig)
7386 && (SYMBOL_REF_DECL (orig)
7387 ? !DECL_WEAK (SYMBOL_REF_DECL (orig)) : 1)))
7388 && NEED_GOT_RELOC
7389 && arm_pic_data_is_text_relative)
7390 insn = arm_pic_static_addr (orig, reg);
7391 else
7393 rtx pat;
7394 rtx mem;
7396 /* If this function doesn't have a pic register, create one now. */
7397 require_pic_register ();
7399 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
7401 /* Make the MEM as close to a constant as possible. */
7402 mem = SET_SRC (pat);
7403 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
7404 MEM_READONLY_P (mem) = 1;
7405 MEM_NOTRAP_P (mem) = 1;
7407 insn = emit_insn (pat);
7410 /* Put a REG_EQUAL note on this insn, so that it can be optimized
7411 by loop. */
7412 set_unique_reg_note (insn, REG_EQUAL, orig);
7414 return reg;
7416 else if (GET_CODE (orig) == CONST)
7418 rtx base, offset;
7420 if (GET_CODE (XEXP (orig, 0)) == PLUS
7421 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
7422 return orig;
7424 /* Handle the case where we have: const (UNSPEC_TLS). */
7425 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
7426 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
7427 return orig;
7429 /* Handle the case where we have:
7430 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
7431 CONST_INT. */
7432 if (GET_CODE (XEXP (orig, 0)) == PLUS
7433 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
7434 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
7436 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
7437 return orig;
7440 if (reg == 0)
7442 gcc_assert (can_create_pseudo_p ());
7443 reg = gen_reg_rtx (Pmode);
7446 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
7448 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
7449 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
7450 base == reg ? 0 : reg);
7452 if (CONST_INT_P (offset))
7454 /* The base register doesn't really matter, we only want to
7455 test the index for the appropriate mode. */
7456 if (!arm_legitimate_index_p (mode, offset, SET, 0))
7458 gcc_assert (can_create_pseudo_p ());
7459 offset = force_reg (Pmode, offset);
7462 if (CONST_INT_P (offset))
7463 return plus_constant (Pmode, base, INTVAL (offset));
7466 if (GET_MODE_SIZE (mode) > 4
7467 && (GET_MODE_CLASS (mode) == MODE_INT
7468 || TARGET_SOFT_FLOAT))
7470 emit_insn (gen_addsi3 (reg, base, offset));
7471 return reg;
7474 return gen_rtx_PLUS (Pmode, base, offset);
7477 return orig;
7481 /* Find a spare register to use during the prolog of a function. */
7483 static int
7484 thumb_find_work_register (unsigned long pushed_regs_mask)
7486 int reg;
7488 /* Check the argument registers first as these are call-used. The
7489 register allocation order means that sometimes r3 might be used
7490 but earlier argument registers might not, so check them all. */
7491 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
7492 if (!df_regs_ever_live_p (reg))
7493 return reg;
7495 /* Before going on to check the call-saved registers we can try a couple
7496 more ways of deducing that r3 is available. The first is when we are
7497 pushing anonymous arguments onto the stack and we have less than 4
7498 registers worth of fixed arguments(*). In this case r3 will be part of
7499 the variable argument list and so we can be sure that it will be
7500 pushed right at the start of the function. Hence it will be available
7501 for the rest of the prologue.
7502 (*): ie crtl->args.pretend_args_size is greater than 0. */
7503 if (cfun->machine->uses_anonymous_args
7504 && crtl->args.pretend_args_size > 0)
7505 return LAST_ARG_REGNUM;
7507 /* The other case is when we have fixed arguments but less than 4 registers
7508 worth. In this case r3 might be used in the body of the function, but
7509 it is not being used to convey an argument into the function. In theory
7510 we could just check crtl->args.size to see how many bytes are
7511 being passed in argument registers, but it seems that it is unreliable.
7512 Sometimes it will have the value 0 when in fact arguments are being
7513 passed. (See testcase execute/20021111-1.c for an example). So we also
7514 check the args_info.nregs field as well. The problem with this field is
7515 that it makes no allowances for arguments that are passed to the
7516 function but which are not used. Hence we could miss an opportunity
7517 when a function has an unused argument in r3. But it is better to be
7518 safe than to be sorry. */
7519 if (! cfun->machine->uses_anonymous_args
7520 && crtl->args.size >= 0
7521 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
7522 && (TARGET_AAPCS_BASED
7523 ? crtl->args.info.aapcs_ncrn < 4
7524 : crtl->args.info.nregs < 4))
7525 return LAST_ARG_REGNUM;
7527 /* Otherwise look for a call-saved register that is going to be pushed. */
7528 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
7529 if (pushed_regs_mask & (1 << reg))
7530 return reg;
7532 if (TARGET_THUMB2)
7534 /* Thumb-2 can use high regs. */
7535 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
7536 if (pushed_regs_mask & (1 << reg))
7537 return reg;
7539 /* Something went wrong - thumb_compute_save_reg_mask()
7540 should have arranged for a suitable register to be pushed. */
7541 gcc_unreachable ();
7544 static GTY(()) int pic_labelno;
7546 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
7547 low register. */
7549 void
7550 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
7552 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
7554 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
7555 return;
7557 gcc_assert (flag_pic);
7559 pic_reg = cfun->machine->pic_reg;
7560 if (TARGET_VXWORKS_RTP)
7562 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
7563 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7564 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
7566 emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
7568 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7569 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
7571 else
7573 /* We use an UNSPEC rather than a LABEL_REF because this label
7574 never appears in the code stream. */
7576 labelno = GEN_INT (pic_labelno++);
7577 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7578 l1 = gen_rtx_CONST (VOIDmode, l1);
7580 /* On the ARM the PC register contains 'dot + 8' at the time of the
7581 addition, on the Thumb it is 'dot + 4'. */
7582 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7583 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
7584 UNSPEC_GOTSYM_OFF);
7585 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7587 if (TARGET_32BIT)
7589 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7591 else /* TARGET_THUMB1 */
7593 if (arm_pic_register != INVALID_REGNUM
7594 && REGNO (pic_reg) > LAST_LO_REGNUM)
7596 /* We will have pushed the pic register, so we should always be
7597 able to find a work register. */
7598 pic_tmp = gen_rtx_REG (SImode,
7599 thumb_find_work_register (saved_regs));
7600 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
7601 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
7602 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
7604 else if (arm_pic_register != INVALID_REGNUM
7605 && arm_pic_register > LAST_LO_REGNUM
7606 && REGNO (pic_reg) <= LAST_LO_REGNUM)
7608 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7609 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
7610 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
7612 else
7613 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7617 /* Need to emit this whether or not we obey regdecls,
7618 since setjmp/longjmp can cause life info to screw up. */
7619 emit_use (pic_reg);
7622 /* Generate code to load the address of a static var when flag_pic is set. */
7623 static rtx_insn *
7624 arm_pic_static_addr (rtx orig, rtx reg)
7626 rtx l1, labelno, offset_rtx;
7628 gcc_assert (flag_pic);
7630 /* We use an UNSPEC rather than a LABEL_REF because this label
7631 never appears in the code stream. */
7632 labelno = GEN_INT (pic_labelno++);
7633 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7634 l1 = gen_rtx_CONST (VOIDmode, l1);
7636 /* On the ARM the PC register contains 'dot + 8' at the time of the
7637 addition, on the Thumb it is 'dot + 4'. */
7638 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7639 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
7640 UNSPEC_SYMBOL_OFFSET);
7641 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
7643 return emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
7646 /* Return nonzero if X is valid as an ARM state addressing register. */
7647 static int
7648 arm_address_register_rtx_p (rtx x, int strict_p)
7650 int regno;
7652 if (!REG_P (x))
7653 return 0;
7655 regno = REGNO (x);
7657 if (strict_p)
7658 return ARM_REGNO_OK_FOR_BASE_P (regno);
7660 return (regno <= LAST_ARM_REGNUM
7661 || regno >= FIRST_PSEUDO_REGISTER
7662 || regno == FRAME_POINTER_REGNUM
7663 || regno == ARG_POINTER_REGNUM);
7666 /* Return TRUE if this rtx is the difference of a symbol and a label,
7667 and will reduce to a PC-relative relocation in the object file.
7668 Expressions like this can be left alone when generating PIC, rather
7669 than forced through the GOT. */
7670 static int
7671 pcrel_constant_p (rtx x)
7673 if (GET_CODE (x) == MINUS)
7674 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
7676 return FALSE;
7679 /* Return true if X will surely end up in an index register after next
7680 splitting pass. */
7681 static bool
7682 will_be_in_index_register (const_rtx x)
7684 /* arm.md: calculate_pic_address will split this into a register. */
7685 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
7688 /* Return nonzero if X is a valid ARM state address operand. */
7690 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
7691 int strict_p)
7693 bool use_ldrd;
7694 enum rtx_code code = GET_CODE (x);
7696 if (arm_address_register_rtx_p (x, strict_p))
7697 return 1;
7699 use_ldrd = (TARGET_LDRD
7700 && (mode == DImode || mode == DFmode));
7702 if (code == POST_INC || code == PRE_DEC
7703 || ((code == PRE_INC || code == POST_DEC)
7704 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7705 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7707 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7708 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7709 && GET_CODE (XEXP (x, 1)) == PLUS
7710 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7712 rtx addend = XEXP (XEXP (x, 1), 1);
7714 /* Don't allow ldrd post increment by register because it's hard
7715 to fixup invalid register choices. */
7716 if (use_ldrd
7717 && GET_CODE (x) == POST_MODIFY
7718 && REG_P (addend))
7719 return 0;
7721 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
7722 && arm_legitimate_index_p (mode, addend, outer, strict_p));
7725 /* After reload constants split into minipools will have addresses
7726 from a LABEL_REF. */
7727 else if (reload_completed
7728 && (code == LABEL_REF
7729 || (code == CONST
7730 && GET_CODE (XEXP (x, 0)) == PLUS
7731 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7732 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7733 return 1;
7735 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7736 return 0;
7738 else if (code == PLUS)
7740 rtx xop0 = XEXP (x, 0);
7741 rtx xop1 = XEXP (x, 1);
7743 return ((arm_address_register_rtx_p (xop0, strict_p)
7744 && ((CONST_INT_P (xop1)
7745 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
7746 || (!strict_p && will_be_in_index_register (xop1))))
7747 || (arm_address_register_rtx_p (xop1, strict_p)
7748 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
7751 #if 0
7752 /* Reload currently can't handle MINUS, so disable this for now */
7753 else if (GET_CODE (x) == MINUS)
7755 rtx xop0 = XEXP (x, 0);
7756 rtx xop1 = XEXP (x, 1);
7758 return (arm_address_register_rtx_p (xop0, strict_p)
7759 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
7761 #endif
7763 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7764 && code == SYMBOL_REF
7765 && CONSTANT_POOL_ADDRESS_P (x)
7766 && ! (flag_pic
7767 && symbol_mentioned_p (get_pool_constant (x))
7768 && ! pcrel_constant_p (get_pool_constant (x))))
7769 return 1;
7771 return 0;
7774 /* Return true if we can avoid creating a constant pool entry for x. */
7775 static bool
7776 can_avoid_literal_pool_for_label_p (rtx x)
7778 /* Normally we can assign constant values to target registers without
7779 the help of constant pool. But there are cases we have to use constant
7780 pool like:
7781 1) assign a label to register.
7782 2) sign-extend a 8bit value to 32bit and then assign to register.
7784 Constant pool access in format:
7785 (set (reg r0) (mem (symbol_ref (".LC0"))))
7786 will cause the use of literal pool (later in function arm_reorg).
7787 So here we mark such format as an invalid format, then the compiler
7788 will adjust it into:
7789 (set (reg r0) (symbol_ref (".LC0")))
7790 (set (reg r0) (mem (reg r0))).
7791 No extra register is required, and (mem (reg r0)) won't cause the use
7792 of literal pools. */
7793 if (arm_disable_literal_pool && GET_CODE (x) == SYMBOL_REF
7794 && CONSTANT_POOL_ADDRESS_P (x))
7795 return 1;
7796 return 0;
7800 /* Return nonzero if X is a valid Thumb-2 address operand. */
7801 static int
7802 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7804 bool use_ldrd;
7805 enum rtx_code code = GET_CODE (x);
7807 if (arm_address_register_rtx_p (x, strict_p))
7808 return 1;
7810 use_ldrd = (TARGET_LDRD
7811 && (mode == DImode || mode == DFmode));
7813 if (code == POST_INC || code == PRE_DEC
7814 || ((code == PRE_INC || code == POST_DEC)
7815 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7816 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7818 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7819 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7820 && GET_CODE (XEXP (x, 1)) == PLUS
7821 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7823 /* Thumb-2 only has autoincrement by constant. */
7824 rtx addend = XEXP (XEXP (x, 1), 1);
7825 HOST_WIDE_INT offset;
7827 if (!CONST_INT_P (addend))
7828 return 0;
7830 offset = INTVAL(addend);
7831 if (GET_MODE_SIZE (mode) <= 4)
7832 return (offset > -256 && offset < 256);
7834 return (use_ldrd && offset > -1024 && offset < 1024
7835 && (offset & 3) == 0);
7838 /* After reload constants split into minipools will have addresses
7839 from a LABEL_REF. */
7840 else if (reload_completed
7841 && (code == LABEL_REF
7842 || (code == CONST
7843 && GET_CODE (XEXP (x, 0)) == PLUS
7844 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7845 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7846 return 1;
7848 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7849 return 0;
7851 else if (code == PLUS)
7853 rtx xop0 = XEXP (x, 0);
7854 rtx xop1 = XEXP (x, 1);
7856 return ((arm_address_register_rtx_p (xop0, strict_p)
7857 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
7858 || (!strict_p && will_be_in_index_register (xop1))))
7859 || (arm_address_register_rtx_p (xop1, strict_p)
7860 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
7863 else if (can_avoid_literal_pool_for_label_p (x))
7864 return 0;
7866 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7867 && code == SYMBOL_REF
7868 && CONSTANT_POOL_ADDRESS_P (x)
7869 && ! (flag_pic
7870 && symbol_mentioned_p (get_pool_constant (x))
7871 && ! pcrel_constant_p (get_pool_constant (x))))
7872 return 1;
7874 return 0;
7877 /* Return nonzero if INDEX is valid for an address index operand in
7878 ARM state. */
7879 static int
7880 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
7881 int strict_p)
7883 HOST_WIDE_INT range;
7884 enum rtx_code code = GET_CODE (index);
7886 /* Standard coprocessor addressing modes. */
7887 if (TARGET_HARD_FLOAT
7888 && (mode == SFmode || mode == DFmode))
7889 return (code == CONST_INT && INTVAL (index) < 1024
7890 && INTVAL (index) > -1024
7891 && (INTVAL (index) & 3) == 0);
7893 /* For quad modes, we restrict the constant offset to be slightly less
7894 than what the instruction format permits. We do this because for
7895 quad mode moves, we will actually decompose them into two separate
7896 double-mode reads or writes. INDEX must therefore be a valid
7897 (double-mode) offset and so should INDEX+8. */
7898 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7899 return (code == CONST_INT
7900 && INTVAL (index) < 1016
7901 && INTVAL (index) > -1024
7902 && (INTVAL (index) & 3) == 0);
7904 /* We have no such constraint on double mode offsets, so we permit the
7905 full range of the instruction format. */
7906 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7907 return (code == CONST_INT
7908 && INTVAL (index) < 1024
7909 && INTVAL (index) > -1024
7910 && (INTVAL (index) & 3) == 0);
7912 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7913 return (code == CONST_INT
7914 && INTVAL (index) < 1024
7915 && INTVAL (index) > -1024
7916 && (INTVAL (index) & 3) == 0);
7918 if (arm_address_register_rtx_p (index, strict_p)
7919 && (GET_MODE_SIZE (mode) <= 4))
7920 return 1;
7922 if (mode == DImode || mode == DFmode)
7924 if (code == CONST_INT)
7926 HOST_WIDE_INT val = INTVAL (index);
7928 if (TARGET_LDRD)
7929 return val > -256 && val < 256;
7930 else
7931 return val > -4096 && val < 4092;
7934 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
7937 if (GET_MODE_SIZE (mode) <= 4
7938 && ! (arm_arch4
7939 && (mode == HImode
7940 || mode == HFmode
7941 || (mode == QImode && outer == SIGN_EXTEND))))
7943 if (code == MULT)
7945 rtx xiop0 = XEXP (index, 0);
7946 rtx xiop1 = XEXP (index, 1);
7948 return ((arm_address_register_rtx_p (xiop0, strict_p)
7949 && power_of_two_operand (xiop1, SImode))
7950 || (arm_address_register_rtx_p (xiop1, strict_p)
7951 && power_of_two_operand (xiop0, SImode)));
7953 else if (code == LSHIFTRT || code == ASHIFTRT
7954 || code == ASHIFT || code == ROTATERT)
7956 rtx op = XEXP (index, 1);
7958 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7959 && CONST_INT_P (op)
7960 && INTVAL (op) > 0
7961 && INTVAL (op) <= 31);
7965 /* For ARM v4 we may be doing a sign-extend operation during the
7966 load. */
7967 if (arm_arch4)
7969 if (mode == HImode
7970 || mode == HFmode
7971 || (outer == SIGN_EXTEND && mode == QImode))
7972 range = 256;
7973 else
7974 range = 4096;
7976 else
7977 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
7979 return (code == CONST_INT
7980 && INTVAL (index) < range
7981 && INTVAL (index) > -range);
7984 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7985 index operand. i.e. 1, 2, 4 or 8. */
7986 static bool
7987 thumb2_index_mul_operand (rtx op)
7989 HOST_WIDE_INT val;
7991 if (!CONST_INT_P (op))
7992 return false;
7994 val = INTVAL(op);
7995 return (val == 1 || val == 2 || val == 4 || val == 8);
7998 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
7999 static int
8000 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
8002 enum rtx_code code = GET_CODE (index);
8004 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
8005 /* Standard coprocessor addressing modes. */
8006 if (TARGET_HARD_FLOAT
8007 && (mode == SFmode || mode == DFmode))
8008 return (code == CONST_INT && INTVAL (index) < 1024
8009 /* Thumb-2 allows only > -256 index range for it's core register
8010 load/stores. Since we allow SF/DF in core registers, we have
8011 to use the intersection between -256~4096 (core) and -1024~1024
8012 (coprocessor). */
8013 && INTVAL (index) > -256
8014 && (INTVAL (index) & 3) == 0);
8016 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8018 /* For DImode assume values will usually live in core regs
8019 and only allow LDRD addressing modes. */
8020 if (!TARGET_LDRD || mode != DImode)
8021 return (code == CONST_INT
8022 && INTVAL (index) < 1024
8023 && INTVAL (index) > -1024
8024 && (INTVAL (index) & 3) == 0);
8027 /* For quad modes, we restrict the constant offset to be slightly less
8028 than what the instruction format permits. We do this because for
8029 quad mode moves, we will actually decompose them into two separate
8030 double-mode reads or writes. INDEX must therefore be a valid
8031 (double-mode) offset and so should INDEX+8. */
8032 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8033 return (code == CONST_INT
8034 && INTVAL (index) < 1016
8035 && INTVAL (index) > -1024
8036 && (INTVAL (index) & 3) == 0);
8038 /* We have no such constraint on double mode offsets, so we permit the
8039 full range of the instruction format. */
8040 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8041 return (code == CONST_INT
8042 && INTVAL (index) < 1024
8043 && INTVAL (index) > -1024
8044 && (INTVAL (index) & 3) == 0);
8046 if (arm_address_register_rtx_p (index, strict_p)
8047 && (GET_MODE_SIZE (mode) <= 4))
8048 return 1;
8050 if (mode == DImode || mode == DFmode)
8052 if (code == CONST_INT)
8054 HOST_WIDE_INT val = INTVAL (index);
8055 /* ??? Can we assume ldrd for thumb2? */
8056 /* Thumb-2 ldrd only has reg+const addressing modes. */
8057 /* ldrd supports offsets of +-1020.
8058 However the ldr fallback does not. */
8059 return val > -256 && val < 256 && (val & 3) == 0;
8061 else
8062 return 0;
8065 if (code == MULT)
8067 rtx xiop0 = XEXP (index, 0);
8068 rtx xiop1 = XEXP (index, 1);
8070 return ((arm_address_register_rtx_p (xiop0, strict_p)
8071 && thumb2_index_mul_operand (xiop1))
8072 || (arm_address_register_rtx_p (xiop1, strict_p)
8073 && thumb2_index_mul_operand (xiop0)));
8075 else if (code == ASHIFT)
8077 rtx op = XEXP (index, 1);
8079 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8080 && CONST_INT_P (op)
8081 && INTVAL (op) > 0
8082 && INTVAL (op) <= 3);
8085 return (code == CONST_INT
8086 && INTVAL (index) < 4096
8087 && INTVAL (index) > -256);
8090 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
8091 static int
8092 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
8094 int regno;
8096 if (!REG_P (x))
8097 return 0;
8099 regno = REGNO (x);
8101 if (strict_p)
8102 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
8104 return (regno <= LAST_LO_REGNUM
8105 || regno > LAST_VIRTUAL_REGISTER
8106 || regno == FRAME_POINTER_REGNUM
8107 || (GET_MODE_SIZE (mode) >= 4
8108 && (regno == STACK_POINTER_REGNUM
8109 || regno >= FIRST_PSEUDO_REGISTER
8110 || x == hard_frame_pointer_rtx
8111 || x == arg_pointer_rtx)));
8114 /* Return nonzero if x is a legitimate index register. This is the case
8115 for any base register that can access a QImode object. */
8116 inline static int
8117 thumb1_index_register_rtx_p (rtx x, int strict_p)
8119 return thumb1_base_register_rtx_p (x, QImode, strict_p);
8122 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8124 The AP may be eliminated to either the SP or the FP, so we use the
8125 least common denominator, e.g. SImode, and offsets from 0 to 64.
8127 ??? Verify whether the above is the right approach.
8129 ??? Also, the FP may be eliminated to the SP, so perhaps that
8130 needs special handling also.
8132 ??? Look at how the mips16 port solves this problem. It probably uses
8133 better ways to solve some of these problems.
8135 Although it is not incorrect, we don't accept QImode and HImode
8136 addresses based on the frame pointer or arg pointer until the
8137 reload pass starts. This is so that eliminating such addresses
8138 into stack based ones won't produce impossible code. */
8140 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8142 if (TARGET_HAVE_MOVT && can_avoid_literal_pool_for_label_p (x))
8143 return 0;
8145 /* ??? Not clear if this is right. Experiment. */
8146 if (GET_MODE_SIZE (mode) < 4
8147 && !(reload_in_progress || reload_completed)
8148 && (reg_mentioned_p (frame_pointer_rtx, x)
8149 || reg_mentioned_p (arg_pointer_rtx, x)
8150 || reg_mentioned_p (virtual_incoming_args_rtx, x)
8151 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
8152 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
8153 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
8154 return 0;
8156 /* Accept any base register. SP only in SImode or larger. */
8157 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
8158 return 1;
8160 /* This is PC relative data before arm_reorg runs. */
8161 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
8162 && GET_CODE (x) == SYMBOL_REF
8163 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
8164 return 1;
8166 /* This is PC relative data after arm_reorg runs. */
8167 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
8168 && reload_completed
8169 && (GET_CODE (x) == LABEL_REF
8170 || (GET_CODE (x) == CONST
8171 && GET_CODE (XEXP (x, 0)) == PLUS
8172 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8173 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8174 return 1;
8176 /* Post-inc indexing only supported for SImode and larger. */
8177 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
8178 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
8179 return 1;
8181 else if (GET_CODE (x) == PLUS)
8183 /* REG+REG address can be any two index registers. */
8184 /* We disallow FRAME+REG addressing since we know that FRAME
8185 will be replaced with STACK, and SP relative addressing only
8186 permits SP+OFFSET. */
8187 if (GET_MODE_SIZE (mode) <= 4
8188 && XEXP (x, 0) != frame_pointer_rtx
8189 && XEXP (x, 1) != frame_pointer_rtx
8190 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8191 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
8192 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
8193 return 1;
8195 /* REG+const has 5-7 bit offset for non-SP registers. */
8196 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8197 || XEXP (x, 0) == arg_pointer_rtx)
8198 && CONST_INT_P (XEXP (x, 1))
8199 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
8200 return 1;
8202 /* REG+const has 10-bit offset for SP, but only SImode and
8203 larger is supported. */
8204 /* ??? Should probably check for DI/DFmode overflow here
8205 just like GO_IF_LEGITIMATE_OFFSET does. */
8206 else if (REG_P (XEXP (x, 0))
8207 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
8208 && GET_MODE_SIZE (mode) >= 4
8209 && CONST_INT_P (XEXP (x, 1))
8210 && INTVAL (XEXP (x, 1)) >= 0
8211 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
8212 && (INTVAL (XEXP (x, 1)) & 3) == 0)
8213 return 1;
8215 else if (REG_P (XEXP (x, 0))
8216 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
8217 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
8218 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
8219 && REGNO (XEXP (x, 0))
8220 <= LAST_VIRTUAL_POINTER_REGISTER))
8221 && GET_MODE_SIZE (mode) >= 4
8222 && CONST_INT_P (XEXP (x, 1))
8223 && (INTVAL (XEXP (x, 1)) & 3) == 0)
8224 return 1;
8227 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8228 && GET_MODE_SIZE (mode) == 4
8229 && GET_CODE (x) == SYMBOL_REF
8230 && CONSTANT_POOL_ADDRESS_P (x)
8231 && ! (flag_pic
8232 && symbol_mentioned_p (get_pool_constant (x))
8233 && ! pcrel_constant_p (get_pool_constant (x))))
8234 return 1;
8236 return 0;
8239 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
8240 instruction of mode MODE. */
8242 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
8244 switch (GET_MODE_SIZE (mode))
8246 case 1:
8247 return val >= 0 && val < 32;
8249 case 2:
8250 return val >= 0 && val < 64 && (val & 1) == 0;
8252 default:
8253 return (val >= 0
8254 && (val + GET_MODE_SIZE (mode)) <= 128
8255 && (val & 3) == 0);
8259 bool
8260 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
8262 if (TARGET_ARM)
8263 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
8264 else if (TARGET_THUMB2)
8265 return thumb2_legitimate_address_p (mode, x, strict_p);
8266 else /* if (TARGET_THUMB1) */
8267 return thumb1_legitimate_address_p (mode, x, strict_p);
8270 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
8272 Given an rtx X being reloaded into a reg required to be
8273 in class CLASS, return the class of reg to actually use.
8274 In general this is just CLASS, but for the Thumb core registers and
8275 immediate constants we prefer a LO_REGS class or a subset. */
8277 static reg_class_t
8278 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
8280 if (TARGET_32BIT)
8281 return rclass;
8282 else
8284 if (rclass == GENERAL_REGS)
8285 return LO_REGS;
8286 else
8287 return rclass;
8291 /* Build the SYMBOL_REF for __tls_get_addr. */
8293 static GTY(()) rtx tls_get_addr_libfunc;
8295 static rtx
8296 get_tls_get_addr (void)
8298 if (!tls_get_addr_libfunc)
8299 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
8300 return tls_get_addr_libfunc;
8304 arm_load_tp (rtx target)
8306 if (!target)
8307 target = gen_reg_rtx (SImode);
8309 if (TARGET_HARD_TP)
8311 /* Can return in any reg. */
8312 emit_insn (gen_load_tp_hard (target));
8314 else
8316 /* Always returned in r0. Immediately copy the result into a pseudo,
8317 otherwise other uses of r0 (e.g. setting up function arguments) may
8318 clobber the value. */
8320 rtx tmp;
8322 emit_insn (gen_load_tp_soft ());
8324 tmp = gen_rtx_REG (SImode, R0_REGNUM);
8325 emit_move_insn (target, tmp);
8327 return target;
8330 static rtx
8331 load_tls_operand (rtx x, rtx reg)
8333 rtx tmp;
8335 if (reg == NULL_RTX)
8336 reg = gen_reg_rtx (SImode);
8338 tmp = gen_rtx_CONST (SImode, x);
8340 emit_move_insn (reg, tmp);
8342 return reg;
8345 static rtx_insn *
8346 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
8348 rtx label, labelno, sum;
8350 gcc_assert (reloc != TLS_DESCSEQ);
8351 start_sequence ();
8353 labelno = GEN_INT (pic_labelno++);
8354 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8355 label = gen_rtx_CONST (VOIDmode, label);
8357 sum = gen_rtx_UNSPEC (Pmode,
8358 gen_rtvec (4, x, GEN_INT (reloc), label,
8359 GEN_INT (TARGET_ARM ? 8 : 4)),
8360 UNSPEC_TLS);
8361 reg = load_tls_operand (sum, reg);
8363 if (TARGET_ARM)
8364 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
8365 else
8366 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8368 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
8369 LCT_PURE, /* LCT_CONST? */
8370 Pmode, reg, Pmode);
8372 rtx_insn *insns = get_insns ();
8373 end_sequence ();
8375 return insns;
8378 static rtx
8379 arm_tls_descseq_addr (rtx x, rtx reg)
8381 rtx labelno = GEN_INT (pic_labelno++);
8382 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8383 rtx sum = gen_rtx_UNSPEC (Pmode,
8384 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
8385 gen_rtx_CONST (VOIDmode, label),
8386 GEN_INT (!TARGET_ARM)),
8387 UNSPEC_TLS);
8388 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
8390 emit_insn (gen_tlscall (x, labelno));
8391 if (!reg)
8392 reg = gen_reg_rtx (SImode);
8393 else
8394 gcc_assert (REGNO (reg) != R0_REGNUM);
8396 emit_move_insn (reg, reg0);
8398 return reg;
8402 legitimize_tls_address (rtx x, rtx reg)
8404 rtx dest, tp, label, labelno, sum, ret, eqv, addend;
8405 rtx_insn *insns;
8406 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
8408 switch (model)
8410 case TLS_MODEL_GLOBAL_DYNAMIC:
8411 if (TARGET_GNU2_TLS)
8413 reg = arm_tls_descseq_addr (x, reg);
8415 tp = arm_load_tp (NULL_RTX);
8417 dest = gen_rtx_PLUS (Pmode, tp, reg);
8419 else
8421 /* Original scheme */
8422 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
8423 dest = gen_reg_rtx (Pmode);
8424 emit_libcall_block (insns, dest, ret, x);
8426 return dest;
8428 case TLS_MODEL_LOCAL_DYNAMIC:
8429 if (TARGET_GNU2_TLS)
8431 reg = arm_tls_descseq_addr (x, reg);
8433 tp = arm_load_tp (NULL_RTX);
8435 dest = gen_rtx_PLUS (Pmode, tp, reg);
8437 else
8439 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
8441 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
8442 share the LDM result with other LD model accesses. */
8443 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
8444 UNSPEC_TLS);
8445 dest = gen_reg_rtx (Pmode);
8446 emit_libcall_block (insns, dest, ret, eqv);
8448 /* Load the addend. */
8449 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
8450 GEN_INT (TLS_LDO32)),
8451 UNSPEC_TLS);
8452 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
8453 dest = gen_rtx_PLUS (Pmode, dest, addend);
8455 return dest;
8457 case TLS_MODEL_INITIAL_EXEC:
8458 labelno = GEN_INT (pic_labelno++);
8459 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8460 label = gen_rtx_CONST (VOIDmode, label);
8461 sum = gen_rtx_UNSPEC (Pmode,
8462 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
8463 GEN_INT (TARGET_ARM ? 8 : 4)),
8464 UNSPEC_TLS);
8465 reg = load_tls_operand (sum, reg);
8467 if (TARGET_ARM)
8468 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
8469 else if (TARGET_THUMB2)
8470 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
8471 else
8473 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8474 emit_move_insn (reg, gen_const_mem (SImode, reg));
8477 tp = arm_load_tp (NULL_RTX);
8479 return gen_rtx_PLUS (Pmode, tp, reg);
8481 case TLS_MODEL_LOCAL_EXEC:
8482 tp = arm_load_tp (NULL_RTX);
8484 reg = gen_rtx_UNSPEC (Pmode,
8485 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
8486 UNSPEC_TLS);
8487 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
8489 return gen_rtx_PLUS (Pmode, tp, reg);
8491 default:
8492 abort ();
8496 /* Try machine-dependent ways of modifying an illegitimate address
8497 to be legitimate. If we find one, return the new, valid address. */
8499 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8501 if (arm_tls_referenced_p (x))
8503 rtx addend = NULL;
8505 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
8507 addend = XEXP (XEXP (x, 0), 1);
8508 x = XEXP (XEXP (x, 0), 0);
8511 if (GET_CODE (x) != SYMBOL_REF)
8512 return x;
8514 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
8516 x = legitimize_tls_address (x, NULL_RTX);
8518 if (addend)
8520 x = gen_rtx_PLUS (SImode, x, addend);
8521 orig_x = x;
8523 else
8524 return x;
8527 if (!TARGET_ARM)
8529 /* TODO: legitimize_address for Thumb2. */
8530 if (TARGET_THUMB2)
8531 return x;
8532 return thumb_legitimize_address (x, orig_x, mode);
8535 if (GET_CODE (x) == PLUS)
8537 rtx xop0 = XEXP (x, 0);
8538 rtx xop1 = XEXP (x, 1);
8540 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
8541 xop0 = force_reg (SImode, xop0);
8543 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
8544 && !symbol_mentioned_p (xop1))
8545 xop1 = force_reg (SImode, xop1);
8547 if (ARM_BASE_REGISTER_RTX_P (xop0)
8548 && CONST_INT_P (xop1))
8550 HOST_WIDE_INT n, low_n;
8551 rtx base_reg, val;
8552 n = INTVAL (xop1);
8554 /* VFP addressing modes actually allow greater offsets, but for
8555 now we just stick with the lowest common denominator. */
8556 if (mode == DImode || mode == DFmode)
8558 low_n = n & 0x0f;
8559 n &= ~0x0f;
8560 if (low_n > 4)
8562 n += 16;
8563 low_n -= 16;
8566 else
8568 low_n = ((mode) == TImode ? 0
8569 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
8570 n -= low_n;
8573 base_reg = gen_reg_rtx (SImode);
8574 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
8575 emit_move_insn (base_reg, val);
8576 x = plus_constant (Pmode, base_reg, low_n);
8578 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8579 x = gen_rtx_PLUS (SImode, xop0, xop1);
8582 /* XXX We don't allow MINUS any more -- see comment in
8583 arm_legitimate_address_outer_p (). */
8584 else if (GET_CODE (x) == MINUS)
8586 rtx xop0 = XEXP (x, 0);
8587 rtx xop1 = XEXP (x, 1);
8589 if (CONSTANT_P (xop0))
8590 xop0 = force_reg (SImode, xop0);
8592 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
8593 xop1 = force_reg (SImode, xop1);
8595 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8596 x = gen_rtx_MINUS (SImode, xop0, xop1);
8599 /* Make sure to take full advantage of the pre-indexed addressing mode
8600 with absolute addresses which often allows for the base register to
8601 be factorized for multiple adjacent memory references, and it might
8602 even allows for the mini pool to be avoided entirely. */
8603 else if (CONST_INT_P (x) && optimize > 0)
8605 unsigned int bits;
8606 HOST_WIDE_INT mask, base, index;
8607 rtx base_reg;
8609 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
8610 use a 8-bit index. So let's use a 12-bit index for SImode only and
8611 hope that arm_gen_constant will enable ldrb to use more bits. */
8612 bits = (mode == SImode) ? 12 : 8;
8613 mask = (1 << bits) - 1;
8614 base = INTVAL (x) & ~mask;
8615 index = INTVAL (x) & mask;
8616 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
8618 /* It'll most probably be more efficient to generate the base
8619 with more bits set and use a negative index instead. */
8620 base |= mask;
8621 index -= mask;
8623 base_reg = force_reg (SImode, GEN_INT (base));
8624 x = plus_constant (Pmode, base_reg, index);
8627 if (flag_pic)
8629 /* We need to find and carefully transform any SYMBOL and LABEL
8630 references; so go back to the original address expression. */
8631 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8633 if (new_x != orig_x)
8634 x = new_x;
8637 return x;
8641 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8642 to be legitimate. If we find one, return the new, valid address. */
8644 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8646 if (GET_CODE (x) == PLUS
8647 && CONST_INT_P (XEXP (x, 1))
8648 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
8649 || INTVAL (XEXP (x, 1)) < 0))
8651 rtx xop0 = XEXP (x, 0);
8652 rtx xop1 = XEXP (x, 1);
8653 HOST_WIDE_INT offset = INTVAL (xop1);
8655 /* Try and fold the offset into a biasing of the base register and
8656 then offsetting that. Don't do this when optimizing for space
8657 since it can cause too many CSEs. */
8658 if (optimize_size && offset >= 0
8659 && offset < 256 + 31 * GET_MODE_SIZE (mode))
8661 HOST_WIDE_INT delta;
8663 if (offset >= 256)
8664 delta = offset - (256 - GET_MODE_SIZE (mode));
8665 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
8666 delta = 31 * GET_MODE_SIZE (mode);
8667 else
8668 delta = offset & (~31 * GET_MODE_SIZE (mode));
8670 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
8671 NULL_RTX);
8672 x = plus_constant (Pmode, xop0, delta);
8674 else if (offset < 0 && offset > -256)
8675 /* Small negative offsets are best done with a subtract before the
8676 dereference, forcing these into a register normally takes two
8677 instructions. */
8678 x = force_operand (x, NULL_RTX);
8679 else
8681 /* For the remaining cases, force the constant into a register. */
8682 xop1 = force_reg (SImode, xop1);
8683 x = gen_rtx_PLUS (SImode, xop0, xop1);
8686 else if (GET_CODE (x) == PLUS
8687 && s_register_operand (XEXP (x, 1), SImode)
8688 && !s_register_operand (XEXP (x, 0), SImode))
8690 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
8692 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
8695 if (flag_pic)
8697 /* We need to find and carefully transform any SYMBOL and LABEL
8698 references; so go back to the original address expression. */
8699 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8701 if (new_x != orig_x)
8702 x = new_x;
8705 return x;
8708 /* Return TRUE if X contains any TLS symbol references. */
8710 bool
8711 arm_tls_referenced_p (rtx x)
8713 if (! TARGET_HAVE_TLS)
8714 return false;
8716 subrtx_iterator::array_type array;
8717 FOR_EACH_SUBRTX (iter, array, x, ALL)
8719 const_rtx x = *iter;
8720 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
8722 /* ARM currently does not provide relocations to encode TLS variables
8723 into AArch32 instructions, only data, so there is no way to
8724 currently implement these if a literal pool is disabled. */
8725 if (arm_disable_literal_pool)
8726 sorry ("accessing thread-local storage is not currently supported "
8727 "with -mpure-code or -mslow-flash-data");
8729 return true;
8732 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8733 TLS offsets, not real symbol references. */
8734 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8735 iter.skip_subrtxes ();
8737 return false;
8740 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8742 On the ARM, allow any integer (invalid ones are removed later by insn
8743 patterns), nice doubles and symbol_refs which refer to the function's
8744 constant pool XXX.
8746 When generating pic allow anything. */
8748 static bool
8749 arm_legitimate_constant_p_1 (machine_mode, rtx x)
8751 return flag_pic || !label_mentioned_p (x);
8754 static bool
8755 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8757 /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
8758 RTX. These RTX must therefore be allowed for Thumb-1 so that when run
8759 for ARMv8-M Baseline or later the result is valid. */
8760 if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
8761 x = XEXP (x, 0);
8763 return (CONST_INT_P (x)
8764 || CONST_DOUBLE_P (x)
8765 || CONSTANT_ADDRESS_P (x)
8766 || (TARGET_HAVE_MOVT && GET_CODE (x) == SYMBOL_REF)
8767 || flag_pic);
8770 static bool
8771 arm_legitimate_constant_p (machine_mode mode, rtx x)
8773 return (!arm_cannot_force_const_mem (mode, x)
8774 && (TARGET_32BIT
8775 ? arm_legitimate_constant_p_1 (mode, x)
8776 : thumb_legitimate_constant_p (mode, x)));
8779 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8781 static bool
8782 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8784 rtx base, offset;
8786 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8788 split_const (x, &base, &offset);
8789 if (GET_CODE (base) == SYMBOL_REF
8790 && !offset_within_block_p (base, INTVAL (offset)))
8791 return true;
8793 return arm_tls_referenced_p (x);
8796 #define REG_OR_SUBREG_REG(X) \
8797 (REG_P (X) \
8798 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8800 #define REG_OR_SUBREG_RTX(X) \
8801 (REG_P (X) ? (X) : SUBREG_REG (X))
8803 static inline int
8804 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8806 machine_mode mode = GET_MODE (x);
8807 int total, words;
8809 switch (code)
8811 case ASHIFT:
8812 case ASHIFTRT:
8813 case LSHIFTRT:
8814 case ROTATERT:
8815 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8817 case PLUS:
8818 case MINUS:
8819 case COMPARE:
8820 case NEG:
8821 case NOT:
8822 return COSTS_N_INSNS (1);
8824 case MULT:
8825 if (arm_arch6m && arm_m_profile_small_mul)
8826 return COSTS_N_INSNS (32);
8828 if (CONST_INT_P (XEXP (x, 1)))
8830 int cycles = 0;
8831 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8833 while (i)
8835 i >>= 2;
8836 cycles++;
8838 return COSTS_N_INSNS (2) + cycles;
8840 return COSTS_N_INSNS (1) + 16;
8842 case SET:
8843 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8844 the mode. */
8845 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8846 return (COSTS_N_INSNS (words)
8847 + 4 * ((MEM_P (SET_SRC (x)))
8848 + MEM_P (SET_DEST (x))));
8850 case CONST_INT:
8851 if (outer == SET)
8853 if (UINTVAL (x) < 256
8854 /* 16-bit constant. */
8855 || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
8856 return 0;
8857 if (thumb_shiftable_const (INTVAL (x)))
8858 return COSTS_N_INSNS (2);
8859 return COSTS_N_INSNS (3);
8861 else if ((outer == PLUS || outer == COMPARE)
8862 && INTVAL (x) < 256 && INTVAL (x) > -256)
8863 return 0;
8864 else if ((outer == IOR || outer == XOR || outer == AND)
8865 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8866 return COSTS_N_INSNS (1);
8867 else if (outer == AND)
8869 int i;
8870 /* This duplicates the tests in the andsi3 expander. */
8871 for (i = 9; i <= 31; i++)
8872 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
8873 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
8874 return COSTS_N_INSNS (2);
8876 else if (outer == ASHIFT || outer == ASHIFTRT
8877 || outer == LSHIFTRT)
8878 return 0;
8879 return COSTS_N_INSNS (2);
8881 case CONST:
8882 case CONST_DOUBLE:
8883 case LABEL_REF:
8884 case SYMBOL_REF:
8885 return COSTS_N_INSNS (3);
8887 case UDIV:
8888 case UMOD:
8889 case DIV:
8890 case MOD:
8891 return 100;
8893 case TRUNCATE:
8894 return 99;
8896 case AND:
8897 case XOR:
8898 case IOR:
8899 /* XXX guess. */
8900 return 8;
8902 case MEM:
8903 /* XXX another guess. */
8904 /* Memory costs quite a lot for the first word, but subsequent words
8905 load at the equivalent of a single insn each. */
8906 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8907 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8908 ? 4 : 0));
8910 case IF_THEN_ELSE:
8911 /* XXX a guess. */
8912 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8913 return 14;
8914 return 2;
8916 case SIGN_EXTEND:
8917 case ZERO_EXTEND:
8918 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
8919 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
8921 if (mode == SImode)
8922 return total;
8924 if (arm_arch6)
8925 return total + COSTS_N_INSNS (1);
8927 /* Assume a two-shift sequence. Increase the cost slightly so
8928 we prefer actual shifts over an extend operation. */
8929 return total + 1 + COSTS_N_INSNS (2);
8931 default:
8932 return 99;
8936 /* Estimates the size cost of thumb1 instructions.
8937 For now most of the code is copied from thumb1_rtx_costs. We need more
8938 fine grain tuning when we have more related test cases. */
8939 static inline int
8940 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8942 machine_mode mode = GET_MODE (x);
8943 int words, cost;
8945 switch (code)
8947 case ASHIFT:
8948 case ASHIFTRT:
8949 case LSHIFTRT:
8950 case ROTATERT:
8951 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8953 case PLUS:
8954 case MINUS:
8955 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8956 defined by RTL expansion, especially for the expansion of
8957 multiplication. */
8958 if ((GET_CODE (XEXP (x, 0)) == MULT
8959 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
8960 || (GET_CODE (XEXP (x, 1)) == MULT
8961 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
8962 return COSTS_N_INSNS (2);
8963 /* Fall through. */
8964 case COMPARE:
8965 case NEG:
8966 case NOT:
8967 return COSTS_N_INSNS (1);
8969 case MULT:
8970 if (CONST_INT_P (XEXP (x, 1)))
8972 /* Thumb1 mul instruction can't operate on const. We must Load it
8973 into a register first. */
8974 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
8975 /* For the targets which have a very small and high-latency multiply
8976 unit, we prefer to synthesize the mult with up to 5 instructions,
8977 giving a good balance between size and performance. */
8978 if (arm_arch6m && arm_m_profile_small_mul)
8979 return COSTS_N_INSNS (5);
8980 else
8981 return COSTS_N_INSNS (1) + const_size;
8983 return COSTS_N_INSNS (1);
8985 case SET:
8986 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8987 the mode. */
8988 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8989 cost = COSTS_N_INSNS (words);
8990 if (satisfies_constraint_J (SET_SRC (x))
8991 || satisfies_constraint_K (SET_SRC (x))
8992 /* Too big an immediate for a 2-byte mov, using MOVT. */
8993 || (CONST_INT_P (SET_SRC (x))
8994 && UINTVAL (SET_SRC (x)) >= 256
8995 && TARGET_HAVE_MOVT
8996 && satisfies_constraint_j (SET_SRC (x)))
8997 /* thumb1_movdi_insn. */
8998 || ((words > 1) && MEM_P (SET_SRC (x))))
8999 cost += COSTS_N_INSNS (1);
9000 return cost;
9002 case CONST_INT:
9003 if (outer == SET)
9005 if (UINTVAL (x) < 256)
9006 return COSTS_N_INSNS (1);
9007 /* movw is 4byte long. */
9008 if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
9009 return COSTS_N_INSNS (2);
9010 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
9011 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
9012 return COSTS_N_INSNS (2);
9013 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
9014 if (thumb_shiftable_const (INTVAL (x)))
9015 return COSTS_N_INSNS (2);
9016 return COSTS_N_INSNS (3);
9018 else if ((outer == PLUS || outer == COMPARE)
9019 && INTVAL (x) < 256 && INTVAL (x) > -256)
9020 return 0;
9021 else if ((outer == IOR || outer == XOR || outer == AND)
9022 && INTVAL (x) < 256 && INTVAL (x) >= -256)
9023 return COSTS_N_INSNS (1);
9024 else if (outer == AND)
9026 int i;
9027 /* This duplicates the tests in the andsi3 expander. */
9028 for (i = 9; i <= 31; i++)
9029 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9030 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9031 return COSTS_N_INSNS (2);
9033 else if (outer == ASHIFT || outer == ASHIFTRT
9034 || outer == LSHIFTRT)
9035 return 0;
9036 return COSTS_N_INSNS (2);
9038 case CONST:
9039 case CONST_DOUBLE:
9040 case LABEL_REF:
9041 case SYMBOL_REF:
9042 return COSTS_N_INSNS (3);
9044 case UDIV:
9045 case UMOD:
9046 case DIV:
9047 case MOD:
9048 return 100;
9050 case TRUNCATE:
9051 return 99;
9053 case AND:
9054 case XOR:
9055 case IOR:
9056 return COSTS_N_INSNS (1);
9058 case MEM:
9059 return (COSTS_N_INSNS (1)
9060 + COSTS_N_INSNS (1)
9061 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9062 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9063 ? COSTS_N_INSNS (1) : 0));
9065 case IF_THEN_ELSE:
9066 /* XXX a guess. */
9067 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9068 return 14;
9069 return 2;
9071 case ZERO_EXTEND:
9072 /* XXX still guessing. */
9073 switch (GET_MODE (XEXP (x, 0)))
9075 case E_QImode:
9076 return (1 + (mode == DImode ? 4 : 0)
9077 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9079 case E_HImode:
9080 return (4 + (mode == DImode ? 4 : 0)
9081 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9083 case E_SImode:
9084 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9086 default:
9087 return 99;
9090 default:
9091 return 99;
9095 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9096 operand, then return the operand that is being shifted. If the shift
9097 is not by a constant, then set SHIFT_REG to point to the operand.
9098 Return NULL if OP is not a shifter operand. */
9099 static rtx
9100 shifter_op_p (rtx op, rtx *shift_reg)
9102 enum rtx_code code = GET_CODE (op);
9104 if (code == MULT && CONST_INT_P (XEXP (op, 1))
9105 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9106 return XEXP (op, 0);
9107 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9108 return XEXP (op, 0);
9109 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9110 || code == ASHIFTRT)
9112 if (!CONST_INT_P (XEXP (op, 1)))
9113 *shift_reg = XEXP (op, 1);
9114 return XEXP (op, 0);
9117 return NULL;
9120 static bool
9121 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9123 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9124 rtx_code code = GET_CODE (x);
9125 gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
9127 switch (XINT (x, 1))
9129 case UNSPEC_UNALIGNED_LOAD:
9130 /* We can only do unaligned loads into the integer unit, and we can't
9131 use LDM or LDRD. */
9132 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9133 if (speed_p)
9134 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9135 + extra_cost->ldst.load_unaligned);
9137 #ifdef NOT_YET
9138 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9139 ADDR_SPACE_GENERIC, speed_p);
9140 #endif
9141 return true;
9143 case UNSPEC_UNALIGNED_STORE:
9144 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9145 if (speed_p)
9146 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9147 + extra_cost->ldst.store_unaligned);
9149 *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
9150 #ifdef NOT_YET
9151 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9152 ADDR_SPACE_GENERIC, speed_p);
9153 #endif
9154 return true;
9156 case UNSPEC_VRINTZ:
9157 case UNSPEC_VRINTP:
9158 case UNSPEC_VRINTM:
9159 case UNSPEC_VRINTR:
9160 case UNSPEC_VRINTX:
9161 case UNSPEC_VRINTA:
9162 if (speed_p)
9163 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9165 return true;
9166 default:
9167 *cost = COSTS_N_INSNS (2);
9168 break;
9170 return true;
9173 /* Cost of a libcall. We assume one insn per argument, an amount for the
9174 call (one insn for -Os) and then one for processing the result. */
9175 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9177 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9178 do \
9180 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9181 if (shift_op != NULL \
9182 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9184 if (shift_reg) \
9186 if (speed_p) \
9187 *cost += extra_cost->alu.arith_shift_reg; \
9188 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9189 ASHIFT, 1, speed_p); \
9191 else if (speed_p) \
9192 *cost += extra_cost->alu.arith_shift; \
9194 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
9195 ASHIFT, 0, speed_p) \
9196 + rtx_cost (XEXP (x, 1 - IDX), \
9197 GET_MODE (shift_op), \
9198 OP, 1, speed_p)); \
9199 return true; \
9202 while (0);
9204 /* RTX costs. Make an estimate of the cost of executing the operation
9205 X, which is contained with an operation with code OUTER_CODE.
9206 SPEED_P indicates whether the cost desired is the performance cost,
9207 or the size cost. The estimate is stored in COST and the return
9208 value is TRUE if the cost calculation is final, or FALSE if the
9209 caller should recurse through the operands of X to add additional
9210 costs.
9212 We currently make no attempt to model the size savings of Thumb-2
9213 16-bit instructions. At the normal points in compilation where
9214 this code is called we have no measure of whether the condition
9215 flags are live or not, and thus no realistic way to determine what
9216 the size will eventually be. */
9217 static bool
9218 arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
9219 const struct cpu_cost_table *extra_cost,
9220 int *cost, bool speed_p)
9222 machine_mode mode = GET_MODE (x);
9224 *cost = COSTS_N_INSNS (1);
9226 if (TARGET_THUMB1)
9228 if (speed_p)
9229 *cost = thumb1_rtx_costs (x, code, outer_code);
9230 else
9231 *cost = thumb1_size_rtx_costs (x, code, outer_code);
9232 return true;
9235 switch (code)
9237 case SET:
9238 *cost = 0;
9239 /* SET RTXs don't have a mode so we get it from the destination. */
9240 mode = GET_MODE (SET_DEST (x));
9242 if (REG_P (SET_SRC (x))
9243 && REG_P (SET_DEST (x)))
9245 /* Assume that most copies can be done with a single insn,
9246 unless we don't have HW FP, in which case everything
9247 larger than word mode will require two insns. */
9248 *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9249 && GET_MODE_SIZE (mode) > 4)
9250 || mode == DImode)
9251 ? 2 : 1);
9252 /* Conditional register moves can be encoded
9253 in 16 bits in Thumb mode. */
9254 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9255 *cost >>= 1;
9257 return true;
9260 if (CONST_INT_P (SET_SRC (x)))
9262 /* Handle CONST_INT here, since the value doesn't have a mode
9263 and we would otherwise be unable to work out the true cost. */
9264 *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
9265 0, speed_p);
9266 outer_code = SET;
9267 /* Slightly lower the cost of setting a core reg to a constant.
9268 This helps break up chains and allows for better scheduling. */
9269 if (REG_P (SET_DEST (x))
9270 && REGNO (SET_DEST (x)) <= LR_REGNUM)
9271 *cost -= 1;
9272 x = SET_SRC (x);
9273 /* Immediate moves with an immediate in the range [0, 255] can be
9274 encoded in 16 bits in Thumb mode. */
9275 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9276 && INTVAL (x) >= 0 && INTVAL (x) <=255)
9277 *cost >>= 1;
9278 goto const_int_cost;
9281 return false;
9283 case MEM:
9284 /* A memory access costs 1 insn if the mode is small, or the address is
9285 a single register, otherwise it costs one insn per word. */
9286 if (REG_P (XEXP (x, 0)))
9287 *cost = COSTS_N_INSNS (1);
9288 else if (flag_pic
9289 && GET_CODE (XEXP (x, 0)) == PLUS
9290 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9291 /* This will be split into two instructions.
9292 See arm.md:calculate_pic_address. */
9293 *cost = COSTS_N_INSNS (2);
9294 else
9295 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9297 /* For speed optimizations, add the costs of the address and
9298 accessing memory. */
9299 if (speed_p)
9300 #ifdef NOT_YET
9301 *cost += (extra_cost->ldst.load
9302 + arm_address_cost (XEXP (x, 0), mode,
9303 ADDR_SPACE_GENERIC, speed_p));
9304 #else
9305 *cost += extra_cost->ldst.load;
9306 #endif
9307 return true;
9309 case PARALLEL:
9311 /* Calculations of LDM costs are complex. We assume an initial cost
9312 (ldm_1st) which will load the number of registers mentioned in
9313 ldm_regs_per_insn_1st registers; then each additional
9314 ldm_regs_per_insn_subsequent registers cost one more insn. The
9315 formula for N regs is thus:
9317 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9318 + ldm_regs_per_insn_subsequent - 1)
9319 / ldm_regs_per_insn_subsequent).
9321 Additional costs may also be added for addressing. A similar
9322 formula is used for STM. */
9324 bool is_ldm = load_multiple_operation (x, SImode);
9325 bool is_stm = store_multiple_operation (x, SImode);
9327 if (is_ldm || is_stm)
9329 if (speed_p)
9331 HOST_WIDE_INT nregs = XVECLEN (x, 0);
9332 HOST_WIDE_INT regs_per_insn_1st = is_ldm
9333 ? extra_cost->ldst.ldm_regs_per_insn_1st
9334 : extra_cost->ldst.stm_regs_per_insn_1st;
9335 HOST_WIDE_INT regs_per_insn_sub = is_ldm
9336 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9337 : extra_cost->ldst.stm_regs_per_insn_subsequent;
9339 *cost += regs_per_insn_1st
9340 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9341 + regs_per_insn_sub - 1)
9342 / regs_per_insn_sub);
9343 return true;
9347 return false;
9349 case DIV:
9350 case UDIV:
9351 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9352 && (mode == SFmode || !TARGET_VFP_SINGLE))
9353 *cost += COSTS_N_INSNS (speed_p
9354 ? extra_cost->fp[mode != SFmode].div : 0);
9355 else if (mode == SImode && TARGET_IDIV)
9356 *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
9357 else
9358 *cost = LIBCALL_COST (2);
9360 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9361 possible udiv is prefered. */
9362 *cost += (code == DIV ? COSTS_N_INSNS (1) : 0);
9363 return false; /* All arguments must be in registers. */
9365 case MOD:
9366 /* MOD by a power of 2 can be expanded as:
9367 rsbs r1, r0, #0
9368 and r0, r0, #(n - 1)
9369 and r1, r1, #(n - 1)
9370 rsbpl r0, r1, #0. */
9371 if (CONST_INT_P (XEXP (x, 1))
9372 && exact_log2 (INTVAL (XEXP (x, 1))) > 0
9373 && mode == SImode)
9375 *cost += COSTS_N_INSNS (3);
9377 if (speed_p)
9378 *cost += 2 * extra_cost->alu.logical
9379 + extra_cost->alu.arith;
9380 return true;
9383 /* Fall-through. */
9384 case UMOD:
9385 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9386 possible udiv is prefered. */
9387 *cost = LIBCALL_COST (2) + (code == MOD ? COSTS_N_INSNS (1) : 0);
9388 return false; /* All arguments must be in registers. */
9390 case ROTATE:
9391 if (mode == SImode && REG_P (XEXP (x, 1)))
9393 *cost += (COSTS_N_INSNS (1)
9394 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9395 if (speed_p)
9396 *cost += extra_cost->alu.shift_reg;
9397 return true;
9399 /* Fall through */
9400 case ROTATERT:
9401 case ASHIFT:
9402 case LSHIFTRT:
9403 case ASHIFTRT:
9404 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9406 *cost += (COSTS_N_INSNS (2)
9407 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9408 if (speed_p)
9409 *cost += 2 * extra_cost->alu.shift;
9410 return true;
9412 else if (mode == SImode)
9414 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9415 /* Slightly disparage register shifts at -Os, but not by much. */
9416 if (!CONST_INT_P (XEXP (x, 1)))
9417 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9418 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9419 return true;
9421 else if (GET_MODE_CLASS (mode) == MODE_INT
9422 && GET_MODE_SIZE (mode) < 4)
9424 if (code == ASHIFT)
9426 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9427 /* Slightly disparage register shifts at -Os, but not by
9428 much. */
9429 if (!CONST_INT_P (XEXP (x, 1)))
9430 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9431 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9433 else if (code == LSHIFTRT || code == ASHIFTRT)
9435 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9437 /* Can use SBFX/UBFX. */
9438 if (speed_p)
9439 *cost += extra_cost->alu.bfx;
9440 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9442 else
9444 *cost += COSTS_N_INSNS (1);
9445 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9446 if (speed_p)
9448 if (CONST_INT_P (XEXP (x, 1)))
9449 *cost += 2 * extra_cost->alu.shift;
9450 else
9451 *cost += (extra_cost->alu.shift
9452 + extra_cost->alu.shift_reg);
9454 else
9455 /* Slightly disparage register shifts. */
9456 *cost += !CONST_INT_P (XEXP (x, 1));
9459 else /* Rotates. */
9461 *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
9462 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9463 if (speed_p)
9465 if (CONST_INT_P (XEXP (x, 1)))
9466 *cost += (2 * extra_cost->alu.shift
9467 + extra_cost->alu.log_shift);
9468 else
9469 *cost += (extra_cost->alu.shift
9470 + extra_cost->alu.shift_reg
9471 + extra_cost->alu.log_shift_reg);
9474 return true;
9477 *cost = LIBCALL_COST (2);
9478 return false;
9480 case BSWAP:
9481 if (arm_arch6)
9483 if (mode == SImode)
9485 if (speed_p)
9486 *cost += extra_cost->alu.rev;
9488 return false;
9491 else
9493 /* No rev instruction available. Look at arm_legacy_rev
9494 and thumb_legacy_rev for the form of RTL used then. */
9495 if (TARGET_THUMB)
9497 *cost += COSTS_N_INSNS (9);
9499 if (speed_p)
9501 *cost += 6 * extra_cost->alu.shift;
9502 *cost += 3 * extra_cost->alu.logical;
9505 else
9507 *cost += COSTS_N_INSNS (4);
9509 if (speed_p)
9511 *cost += 2 * extra_cost->alu.shift;
9512 *cost += extra_cost->alu.arith_shift;
9513 *cost += 2 * extra_cost->alu.logical;
9516 return true;
9518 return false;
9520 case MINUS:
9521 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9522 && (mode == SFmode || !TARGET_VFP_SINGLE))
9524 if (GET_CODE (XEXP (x, 0)) == MULT
9525 || GET_CODE (XEXP (x, 1)) == MULT)
9527 rtx mul_op0, mul_op1, sub_op;
9529 if (speed_p)
9530 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9532 if (GET_CODE (XEXP (x, 0)) == MULT)
9534 mul_op0 = XEXP (XEXP (x, 0), 0);
9535 mul_op1 = XEXP (XEXP (x, 0), 1);
9536 sub_op = XEXP (x, 1);
9538 else
9540 mul_op0 = XEXP (XEXP (x, 1), 0);
9541 mul_op1 = XEXP (XEXP (x, 1), 1);
9542 sub_op = XEXP (x, 0);
9545 /* The first operand of the multiply may be optionally
9546 negated. */
9547 if (GET_CODE (mul_op0) == NEG)
9548 mul_op0 = XEXP (mul_op0, 0);
9550 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9551 + rtx_cost (mul_op1, mode, code, 0, speed_p)
9552 + rtx_cost (sub_op, mode, code, 0, speed_p));
9554 return true;
9557 if (speed_p)
9558 *cost += extra_cost->fp[mode != SFmode].addsub;
9559 return false;
9562 if (mode == SImode)
9564 rtx shift_by_reg = NULL;
9565 rtx shift_op;
9566 rtx non_shift_op;
9568 shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9569 if (shift_op == NULL)
9571 shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9572 non_shift_op = XEXP (x, 0);
9574 else
9575 non_shift_op = XEXP (x, 1);
9577 if (shift_op != NULL)
9579 if (shift_by_reg != NULL)
9581 if (speed_p)
9582 *cost += extra_cost->alu.arith_shift_reg;
9583 *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
9585 else if (speed_p)
9586 *cost += extra_cost->alu.arith_shift;
9588 *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
9589 *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
9590 return true;
9593 if (arm_arch_thumb2
9594 && GET_CODE (XEXP (x, 1)) == MULT)
9596 /* MLS. */
9597 if (speed_p)
9598 *cost += extra_cost->mult[0].add;
9599 *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
9600 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
9601 *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
9602 return true;
9605 if (CONST_INT_P (XEXP (x, 0)))
9607 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9608 INTVAL (XEXP (x, 0)), NULL_RTX,
9609 NULL_RTX, 1, 0);
9610 *cost = COSTS_N_INSNS (insns);
9611 if (speed_p)
9612 *cost += insns * extra_cost->alu.arith;
9613 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9614 return true;
9616 else if (speed_p)
9617 *cost += extra_cost->alu.arith;
9619 return false;
9622 if (GET_MODE_CLASS (mode) == MODE_INT
9623 && GET_MODE_SIZE (mode) < 4)
9625 rtx shift_op, shift_reg;
9626 shift_reg = NULL;
9628 /* We check both sides of the MINUS for shifter operands since,
9629 unlike PLUS, it's not commutative. */
9631 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
9632 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
9634 /* Slightly disparage, as we might need to widen the result. */
9635 *cost += 1;
9636 if (speed_p)
9637 *cost += extra_cost->alu.arith;
9639 if (CONST_INT_P (XEXP (x, 0)))
9641 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9642 return true;
9645 return false;
9648 if (mode == DImode)
9650 *cost += COSTS_N_INSNS (1);
9652 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9654 rtx op1 = XEXP (x, 1);
9656 if (speed_p)
9657 *cost += 2 * extra_cost->alu.arith;
9659 if (GET_CODE (op1) == ZERO_EXTEND)
9660 *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
9661 0, speed_p);
9662 else
9663 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
9664 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9665 0, speed_p);
9666 return true;
9668 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9670 if (speed_p)
9671 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9672 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
9673 0, speed_p)
9674 + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
9675 return true;
9677 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9678 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9680 if (speed_p)
9681 *cost += (extra_cost->alu.arith
9682 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9683 ? extra_cost->alu.arith
9684 : extra_cost->alu.arith_shift));
9685 *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
9686 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
9687 GET_CODE (XEXP (x, 1)), 0, speed_p));
9688 return true;
9691 if (speed_p)
9692 *cost += 2 * extra_cost->alu.arith;
9693 return false;
9696 /* Vector mode? */
9698 *cost = LIBCALL_COST (2);
9699 return false;
9701 case PLUS:
9702 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9703 && (mode == SFmode || !TARGET_VFP_SINGLE))
9705 if (GET_CODE (XEXP (x, 0)) == MULT)
9707 rtx mul_op0, mul_op1, add_op;
9709 if (speed_p)
9710 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9712 mul_op0 = XEXP (XEXP (x, 0), 0);
9713 mul_op1 = XEXP (XEXP (x, 0), 1);
9714 add_op = XEXP (x, 1);
9716 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9717 + rtx_cost (mul_op1, mode, code, 0, speed_p)
9718 + rtx_cost (add_op, mode, code, 0, speed_p));
9720 return true;
9723 if (speed_p)
9724 *cost += extra_cost->fp[mode != SFmode].addsub;
9725 return false;
9727 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9729 *cost = LIBCALL_COST (2);
9730 return false;
9733 /* Narrow modes can be synthesized in SImode, but the range
9734 of useful sub-operations is limited. Check for shift operations
9735 on one of the operands. Only left shifts can be used in the
9736 narrow modes. */
9737 if (GET_MODE_CLASS (mode) == MODE_INT
9738 && GET_MODE_SIZE (mode) < 4)
9740 rtx shift_op, shift_reg;
9741 shift_reg = NULL;
9743 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
9745 if (CONST_INT_P (XEXP (x, 1)))
9747 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9748 INTVAL (XEXP (x, 1)), NULL_RTX,
9749 NULL_RTX, 1, 0);
9750 *cost = COSTS_N_INSNS (insns);
9751 if (speed_p)
9752 *cost += insns * extra_cost->alu.arith;
9753 /* Slightly penalize a narrow operation as the result may
9754 need widening. */
9755 *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9756 return true;
9759 /* Slightly penalize a narrow operation as the result may
9760 need widening. */
9761 *cost += 1;
9762 if (speed_p)
9763 *cost += extra_cost->alu.arith;
9765 return false;
9768 if (mode == SImode)
9770 rtx shift_op, shift_reg;
9772 if (TARGET_INT_SIMD
9773 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9774 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9776 /* UXTA[BH] or SXTA[BH]. */
9777 if (speed_p)
9778 *cost += extra_cost->alu.extend_arith;
9779 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9780 0, speed_p)
9781 + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
9782 return true;
9785 shift_reg = NULL;
9786 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9787 if (shift_op != NULL)
9789 if (shift_reg)
9791 if (speed_p)
9792 *cost += extra_cost->alu.arith_shift_reg;
9793 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
9795 else if (speed_p)
9796 *cost += extra_cost->alu.arith_shift;
9798 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
9799 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9800 return true;
9802 if (GET_CODE (XEXP (x, 0)) == MULT)
9804 rtx mul_op = XEXP (x, 0);
9806 if (TARGET_DSP_MULTIPLY
9807 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9808 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9809 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9810 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9811 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9812 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9813 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9814 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9815 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9816 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9817 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9818 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9819 == 16))))))
9821 /* SMLA[BT][BT]. */
9822 if (speed_p)
9823 *cost += extra_cost->mult[0].extend_add;
9824 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
9825 SIGN_EXTEND, 0, speed_p)
9826 + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
9827 SIGN_EXTEND, 0, speed_p)
9828 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9829 return true;
9832 if (speed_p)
9833 *cost += extra_cost->mult[0].add;
9834 *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
9835 + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
9836 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9837 return true;
9839 if (CONST_INT_P (XEXP (x, 1)))
9841 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9842 INTVAL (XEXP (x, 1)), NULL_RTX,
9843 NULL_RTX, 1, 0);
9844 *cost = COSTS_N_INSNS (insns);
9845 if (speed_p)
9846 *cost += insns * extra_cost->alu.arith;
9847 *cost += rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9848 return true;
9850 else if (speed_p)
9851 *cost += extra_cost->alu.arith;
9853 return false;
9856 if (mode == DImode)
9858 if (arm_arch3m
9859 && GET_CODE (XEXP (x, 0)) == MULT
9860 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
9861 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
9862 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
9863 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
9865 if (speed_p)
9866 *cost += extra_cost->mult[1].extend_add;
9867 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
9868 ZERO_EXTEND, 0, speed_p)
9869 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
9870 ZERO_EXTEND, 0, speed_p)
9871 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9872 return true;
9875 *cost += COSTS_N_INSNS (1);
9877 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9878 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9880 if (speed_p)
9881 *cost += (extra_cost->alu.arith
9882 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9883 ? extra_cost->alu.arith
9884 : extra_cost->alu.arith_shift));
9886 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9887 0, speed_p)
9888 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9889 return true;
9892 if (speed_p)
9893 *cost += 2 * extra_cost->alu.arith;
9894 return false;
9897 /* Vector mode? */
9898 *cost = LIBCALL_COST (2);
9899 return false;
9900 case IOR:
9901 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
9903 if (speed_p)
9904 *cost += extra_cost->alu.rev;
9906 return true;
9908 /* Fall through. */
9909 case AND: case XOR:
9910 if (mode == SImode)
9912 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9913 rtx op0 = XEXP (x, 0);
9914 rtx shift_op, shift_reg;
9916 if (subcode == NOT
9917 && (code == AND
9918 || (code == IOR && TARGET_THUMB2)))
9919 op0 = XEXP (op0, 0);
9921 shift_reg = NULL;
9922 shift_op = shifter_op_p (op0, &shift_reg);
9923 if (shift_op != NULL)
9925 if (shift_reg)
9927 if (speed_p)
9928 *cost += extra_cost->alu.log_shift_reg;
9929 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
9931 else if (speed_p)
9932 *cost += extra_cost->alu.log_shift;
9934 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
9935 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9936 return true;
9939 if (CONST_INT_P (XEXP (x, 1)))
9941 int insns = arm_gen_constant (code, SImode, NULL_RTX,
9942 INTVAL (XEXP (x, 1)), NULL_RTX,
9943 NULL_RTX, 1, 0);
9945 *cost = COSTS_N_INSNS (insns);
9946 if (speed_p)
9947 *cost += insns * extra_cost->alu.logical;
9948 *cost += rtx_cost (op0, mode, code, 0, speed_p);
9949 return true;
9952 if (speed_p)
9953 *cost += extra_cost->alu.logical;
9954 *cost += (rtx_cost (op0, mode, code, 0, speed_p)
9955 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9956 return true;
9959 if (mode == DImode)
9961 rtx op0 = XEXP (x, 0);
9962 enum rtx_code subcode = GET_CODE (op0);
9964 *cost += COSTS_N_INSNS (1);
9966 if (subcode == NOT
9967 && (code == AND
9968 || (code == IOR && TARGET_THUMB2)))
9969 op0 = XEXP (op0, 0);
9971 if (GET_CODE (op0) == ZERO_EXTEND)
9973 if (speed_p)
9974 *cost += 2 * extra_cost->alu.logical;
9976 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
9977 0, speed_p)
9978 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
9979 return true;
9981 else if (GET_CODE (op0) == SIGN_EXTEND)
9983 if (speed_p)
9984 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
9986 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
9987 0, speed_p)
9988 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
9989 return true;
9992 if (speed_p)
9993 *cost += 2 * extra_cost->alu.logical;
9995 return true;
9997 /* Vector mode? */
9999 *cost = LIBCALL_COST (2);
10000 return false;
10002 case MULT:
10003 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10004 && (mode == SFmode || !TARGET_VFP_SINGLE))
10006 rtx op0 = XEXP (x, 0);
10008 if (GET_CODE (op0) == NEG && !flag_rounding_math)
10009 op0 = XEXP (op0, 0);
10011 if (speed_p)
10012 *cost += extra_cost->fp[mode != SFmode].mult;
10014 *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
10015 + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
10016 return true;
10018 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10020 *cost = LIBCALL_COST (2);
10021 return false;
10024 if (mode == SImode)
10026 if (TARGET_DSP_MULTIPLY
10027 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10028 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10029 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10030 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10031 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10032 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10033 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10034 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10035 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10036 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10037 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10038 && (INTVAL (XEXP (XEXP (x, 1), 1))
10039 == 16))))))
10041 /* SMUL[TB][TB]. */
10042 if (speed_p)
10043 *cost += extra_cost->mult[0].extend;
10044 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
10045 SIGN_EXTEND, 0, speed_p);
10046 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
10047 SIGN_EXTEND, 1, speed_p);
10048 return true;
10050 if (speed_p)
10051 *cost += extra_cost->mult[0].simple;
10052 return false;
10055 if (mode == DImode)
10057 if (arm_arch3m
10058 && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10059 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10060 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10061 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
10063 if (speed_p)
10064 *cost += extra_cost->mult[1].extend;
10065 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
10066 ZERO_EXTEND, 0, speed_p)
10067 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10068 ZERO_EXTEND, 0, speed_p));
10069 return true;
10072 *cost = LIBCALL_COST (2);
10073 return false;
10076 /* Vector mode? */
10077 *cost = LIBCALL_COST (2);
10078 return false;
10080 case NEG:
10081 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10082 && (mode == SFmode || !TARGET_VFP_SINGLE))
10084 if (GET_CODE (XEXP (x, 0)) == MULT)
10086 /* VNMUL. */
10087 *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
10088 return true;
10091 if (speed_p)
10092 *cost += extra_cost->fp[mode != SFmode].neg;
10094 return false;
10096 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10098 *cost = LIBCALL_COST (1);
10099 return false;
10102 if (mode == SImode)
10104 if (GET_CODE (XEXP (x, 0)) == ABS)
10106 *cost += COSTS_N_INSNS (1);
10107 /* Assume the non-flag-changing variant. */
10108 if (speed_p)
10109 *cost += (extra_cost->alu.log_shift
10110 + extra_cost->alu.arith_shift);
10111 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
10112 return true;
10115 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10116 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10118 *cost += COSTS_N_INSNS (1);
10119 /* No extra cost for MOV imm and MVN imm. */
10120 /* If the comparison op is using the flags, there's no further
10121 cost, otherwise we need to add the cost of the comparison. */
10122 if (!(REG_P (XEXP (XEXP (x, 0), 0))
10123 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10124 && XEXP (XEXP (x, 0), 1) == const0_rtx))
10126 mode = GET_MODE (XEXP (XEXP (x, 0), 0));
10127 *cost += (COSTS_N_INSNS (1)
10128 + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
10129 0, speed_p)
10130 + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
10131 1, speed_p));
10132 if (speed_p)
10133 *cost += extra_cost->alu.arith;
10135 return true;
10138 if (speed_p)
10139 *cost += extra_cost->alu.arith;
10140 return false;
10143 if (GET_MODE_CLASS (mode) == MODE_INT
10144 && GET_MODE_SIZE (mode) < 4)
10146 /* Slightly disparage, as we might need an extend operation. */
10147 *cost += 1;
10148 if (speed_p)
10149 *cost += extra_cost->alu.arith;
10150 return false;
10153 if (mode == DImode)
10155 *cost += COSTS_N_INSNS (1);
10156 if (speed_p)
10157 *cost += 2 * extra_cost->alu.arith;
10158 return false;
10161 /* Vector mode? */
10162 *cost = LIBCALL_COST (1);
10163 return false;
10165 case NOT:
10166 if (mode == SImode)
10168 rtx shift_op;
10169 rtx shift_reg = NULL;
10171 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10173 if (shift_op)
10175 if (shift_reg != NULL)
10177 if (speed_p)
10178 *cost += extra_cost->alu.log_shift_reg;
10179 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10181 else if (speed_p)
10182 *cost += extra_cost->alu.log_shift;
10183 *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
10184 return true;
10187 if (speed_p)
10188 *cost += extra_cost->alu.logical;
10189 return false;
10191 if (mode == DImode)
10193 *cost += COSTS_N_INSNS (1);
10194 return false;
10197 /* Vector mode? */
10199 *cost += LIBCALL_COST (1);
10200 return false;
10202 case IF_THEN_ELSE:
10204 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10206 *cost += COSTS_N_INSNS (3);
10207 return true;
10209 int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
10210 int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
10212 *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
10213 /* Assume that if one arm of the if_then_else is a register,
10214 that it will be tied with the result and eliminate the
10215 conditional insn. */
10216 if (REG_P (XEXP (x, 1)))
10217 *cost += op2cost;
10218 else if (REG_P (XEXP (x, 2)))
10219 *cost += op1cost;
10220 else
10222 if (speed_p)
10224 if (extra_cost->alu.non_exec_costs_exec)
10225 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10226 else
10227 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10229 else
10230 *cost += op1cost + op2cost;
10233 return true;
10235 case COMPARE:
10236 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10237 *cost = 0;
10238 else
10240 machine_mode op0mode;
10241 /* We'll mostly assume that the cost of a compare is the cost of the
10242 LHS. However, there are some notable exceptions. */
10244 /* Floating point compares are never done as side-effects. */
10245 op0mode = GET_MODE (XEXP (x, 0));
10246 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10247 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10249 if (speed_p)
10250 *cost += extra_cost->fp[op0mode != SFmode].compare;
10252 if (XEXP (x, 1) == CONST0_RTX (op0mode))
10254 *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
10255 return true;
10258 return false;
10260 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10262 *cost = LIBCALL_COST (2);
10263 return false;
10266 /* DImode compares normally take two insns. */
10267 if (op0mode == DImode)
10269 *cost += COSTS_N_INSNS (1);
10270 if (speed_p)
10271 *cost += 2 * extra_cost->alu.arith;
10272 return false;
10275 if (op0mode == SImode)
10277 rtx shift_op;
10278 rtx shift_reg;
10280 if (XEXP (x, 1) == const0_rtx
10281 && !(REG_P (XEXP (x, 0))
10282 || (GET_CODE (XEXP (x, 0)) == SUBREG
10283 && REG_P (SUBREG_REG (XEXP (x, 0))))))
10285 *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10287 /* Multiply operations that set the flags are often
10288 significantly more expensive. */
10289 if (speed_p
10290 && GET_CODE (XEXP (x, 0)) == MULT
10291 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10292 *cost += extra_cost->mult[0].flag_setting;
10294 if (speed_p
10295 && GET_CODE (XEXP (x, 0)) == PLUS
10296 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10297 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10298 0), 1), mode))
10299 *cost += extra_cost->mult[0].flag_setting;
10300 return true;
10303 shift_reg = NULL;
10304 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10305 if (shift_op != NULL)
10307 if (shift_reg != NULL)
10309 *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
10310 1, speed_p);
10311 if (speed_p)
10312 *cost += extra_cost->alu.arith_shift_reg;
10314 else if (speed_p)
10315 *cost += extra_cost->alu.arith_shift;
10316 *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
10317 *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
10318 return true;
10321 if (speed_p)
10322 *cost += extra_cost->alu.arith;
10323 if (CONST_INT_P (XEXP (x, 1))
10324 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10326 *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10327 return true;
10329 return false;
10332 /* Vector mode? */
10334 *cost = LIBCALL_COST (2);
10335 return false;
10337 return true;
10339 case EQ:
10340 case NE:
10341 case LT:
10342 case LE:
10343 case GT:
10344 case GE:
10345 case LTU:
10346 case LEU:
10347 case GEU:
10348 case GTU:
10349 case ORDERED:
10350 case UNORDERED:
10351 case UNEQ:
10352 case UNLE:
10353 case UNLT:
10354 case UNGE:
10355 case UNGT:
10356 case LTGT:
10357 if (outer_code == SET)
10359 /* Is it a store-flag operation? */
10360 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10361 && XEXP (x, 1) == const0_rtx)
10363 /* Thumb also needs an IT insn. */
10364 *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
10365 return true;
10367 if (XEXP (x, 1) == const0_rtx)
10369 switch (code)
10371 case LT:
10372 /* LSR Rd, Rn, #31. */
10373 if (speed_p)
10374 *cost += extra_cost->alu.shift;
10375 break;
10377 case EQ:
10378 /* RSBS T1, Rn, #0
10379 ADC Rd, Rn, T1. */
10381 case NE:
10382 /* SUBS T1, Rn, #1
10383 SBC Rd, Rn, T1. */
10384 *cost += COSTS_N_INSNS (1);
10385 break;
10387 case LE:
10388 /* RSBS T1, Rn, Rn, LSR #31
10389 ADC Rd, Rn, T1. */
10390 *cost += COSTS_N_INSNS (1);
10391 if (speed_p)
10392 *cost += extra_cost->alu.arith_shift;
10393 break;
10395 case GT:
10396 /* RSB Rd, Rn, Rn, ASR #1
10397 LSR Rd, Rd, #31. */
10398 *cost += COSTS_N_INSNS (1);
10399 if (speed_p)
10400 *cost += (extra_cost->alu.arith_shift
10401 + extra_cost->alu.shift);
10402 break;
10404 case GE:
10405 /* ASR Rd, Rn, #31
10406 ADD Rd, Rn, #1. */
10407 *cost += COSTS_N_INSNS (1);
10408 if (speed_p)
10409 *cost += extra_cost->alu.shift;
10410 break;
10412 default:
10413 /* Remaining cases are either meaningless or would take
10414 three insns anyway. */
10415 *cost = COSTS_N_INSNS (3);
10416 break;
10418 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10419 return true;
10421 else
10423 *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10424 if (CONST_INT_P (XEXP (x, 1))
10425 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10427 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10428 return true;
10431 return false;
10434 /* Not directly inside a set. If it involves the condition code
10435 register it must be the condition for a branch, cond_exec or
10436 I_T_E operation. Since the comparison is performed elsewhere
10437 this is just the control part which has no additional
10438 cost. */
10439 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10440 && XEXP (x, 1) == const0_rtx)
10442 *cost = 0;
10443 return true;
10445 return false;
10447 case ABS:
10448 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10449 && (mode == SFmode || !TARGET_VFP_SINGLE))
10451 if (speed_p)
10452 *cost += extra_cost->fp[mode != SFmode].neg;
10454 return false;
10456 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10458 *cost = LIBCALL_COST (1);
10459 return false;
10462 if (mode == SImode)
10464 if (speed_p)
10465 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10466 return false;
10468 /* Vector mode? */
10469 *cost = LIBCALL_COST (1);
10470 return false;
10472 case SIGN_EXTEND:
10473 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10474 && MEM_P (XEXP (x, 0)))
10476 if (mode == DImode)
10477 *cost += COSTS_N_INSNS (1);
10479 if (!speed_p)
10480 return true;
10482 if (GET_MODE (XEXP (x, 0)) == SImode)
10483 *cost += extra_cost->ldst.load;
10484 else
10485 *cost += extra_cost->ldst.load_sign_extend;
10487 if (mode == DImode)
10488 *cost += extra_cost->alu.shift;
10490 return true;
10493 /* Widening from less than 32-bits requires an extend operation. */
10494 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10496 /* We have SXTB/SXTH. */
10497 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10498 if (speed_p)
10499 *cost += extra_cost->alu.extend;
10501 else if (GET_MODE (XEXP (x, 0)) != SImode)
10503 /* Needs two shifts. */
10504 *cost += COSTS_N_INSNS (1);
10505 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10506 if (speed_p)
10507 *cost += 2 * extra_cost->alu.shift;
10510 /* Widening beyond 32-bits requires one more insn. */
10511 if (mode == DImode)
10513 *cost += COSTS_N_INSNS (1);
10514 if (speed_p)
10515 *cost += extra_cost->alu.shift;
10518 return true;
10520 case ZERO_EXTEND:
10521 if ((arm_arch4
10522 || GET_MODE (XEXP (x, 0)) == SImode
10523 || GET_MODE (XEXP (x, 0)) == QImode)
10524 && MEM_P (XEXP (x, 0)))
10526 *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10528 if (mode == DImode)
10529 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10531 return true;
10534 /* Widening from less than 32-bits requires an extend operation. */
10535 if (GET_MODE (XEXP (x, 0)) == QImode)
10537 /* UXTB can be a shorter instruction in Thumb2, but it might
10538 be slower than the AND Rd, Rn, #255 alternative. When
10539 optimizing for speed it should never be slower to use
10540 AND, and we don't really model 16-bit vs 32-bit insns
10541 here. */
10542 if (speed_p)
10543 *cost += extra_cost->alu.logical;
10545 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10547 /* We have UXTB/UXTH. */
10548 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10549 if (speed_p)
10550 *cost += extra_cost->alu.extend;
10552 else if (GET_MODE (XEXP (x, 0)) != SImode)
10554 /* Needs two shifts. It's marginally preferable to use
10555 shifts rather than two BIC instructions as the second
10556 shift may merge with a subsequent insn as a shifter
10557 op. */
10558 *cost = COSTS_N_INSNS (2);
10559 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10560 if (speed_p)
10561 *cost += 2 * extra_cost->alu.shift;
10564 /* Widening beyond 32-bits requires one more insn. */
10565 if (mode == DImode)
10567 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10570 return true;
10572 case CONST_INT:
10573 *cost = 0;
10574 /* CONST_INT has no mode, so we cannot tell for sure how many
10575 insns are really going to be needed. The best we can do is
10576 look at the value passed. If it fits in SImode, then assume
10577 that's the mode it will be used for. Otherwise assume it
10578 will be used in DImode. */
10579 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10580 mode = SImode;
10581 else
10582 mode = DImode;
10584 /* Avoid blowing up in arm_gen_constant (). */
10585 if (!(outer_code == PLUS
10586 || outer_code == AND
10587 || outer_code == IOR
10588 || outer_code == XOR
10589 || outer_code == MINUS))
10590 outer_code = SET;
10592 const_int_cost:
10593 if (mode == SImode)
10595 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10596 INTVAL (x), NULL, NULL,
10597 0, 0));
10598 /* Extra costs? */
10600 else
10602 *cost += COSTS_N_INSNS (arm_gen_constant
10603 (outer_code, SImode, NULL,
10604 trunc_int_for_mode (INTVAL (x), SImode),
10605 NULL, NULL, 0, 0)
10606 + arm_gen_constant (outer_code, SImode, NULL,
10607 INTVAL (x) >> 32, NULL,
10608 NULL, 0, 0));
10609 /* Extra costs? */
10612 return true;
10614 case CONST:
10615 case LABEL_REF:
10616 case SYMBOL_REF:
10617 if (speed_p)
10619 if (arm_arch_thumb2 && !flag_pic)
10620 *cost += COSTS_N_INSNS (1);
10621 else
10622 *cost += extra_cost->ldst.load;
10624 else
10625 *cost += COSTS_N_INSNS (1);
10627 if (flag_pic)
10629 *cost += COSTS_N_INSNS (1);
10630 if (speed_p)
10631 *cost += extra_cost->alu.arith;
10634 return true;
10636 case CONST_FIXED:
10637 *cost = COSTS_N_INSNS (4);
10638 /* Fixme. */
10639 return true;
10641 case CONST_DOUBLE:
10642 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10643 && (mode == SFmode || !TARGET_VFP_SINGLE))
10645 if (vfp3_const_double_rtx (x))
10647 if (speed_p)
10648 *cost += extra_cost->fp[mode == DFmode].fpconst;
10649 return true;
10652 if (speed_p)
10654 if (mode == DFmode)
10655 *cost += extra_cost->ldst.loadd;
10656 else
10657 *cost += extra_cost->ldst.loadf;
10659 else
10660 *cost += COSTS_N_INSNS (1 + (mode == DFmode));
10662 return true;
10664 *cost = COSTS_N_INSNS (4);
10665 return true;
10667 case CONST_VECTOR:
10668 /* Fixme. */
10669 if (TARGET_NEON
10670 && TARGET_HARD_FLOAT
10671 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10672 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10673 *cost = COSTS_N_INSNS (1);
10674 else
10675 *cost = COSTS_N_INSNS (4);
10676 return true;
10678 case HIGH:
10679 case LO_SUM:
10680 /* When optimizing for size, we prefer constant pool entries to
10681 MOVW/MOVT pairs, so bump the cost of these slightly. */
10682 if (!speed_p)
10683 *cost += 1;
10684 return true;
10686 case CLZ:
10687 if (speed_p)
10688 *cost += extra_cost->alu.clz;
10689 return false;
10691 case SMIN:
10692 if (XEXP (x, 1) == const0_rtx)
10694 if (speed_p)
10695 *cost += extra_cost->alu.log_shift;
10696 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10697 return true;
10699 /* Fall through. */
10700 case SMAX:
10701 case UMIN:
10702 case UMAX:
10703 *cost += COSTS_N_INSNS (1);
10704 return false;
10706 case TRUNCATE:
10707 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10708 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10709 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10710 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10711 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10712 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10713 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10714 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10715 == ZERO_EXTEND))))
10717 if (speed_p)
10718 *cost += extra_cost->mult[1].extend;
10719 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
10720 ZERO_EXTEND, 0, speed_p)
10721 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
10722 ZERO_EXTEND, 0, speed_p));
10723 return true;
10725 *cost = LIBCALL_COST (1);
10726 return false;
10728 case UNSPEC_VOLATILE:
10729 case UNSPEC:
10730 return arm_unspec_cost (x, outer_code, speed_p, cost);
10732 case PC:
10733 /* Reading the PC is like reading any other register. Writing it
10734 is more expensive, but we take that into account elsewhere. */
10735 *cost = 0;
10736 return true;
10738 case ZERO_EXTRACT:
10739 /* TODO: Simple zero_extract of bottom bits using AND. */
10740 /* Fall through. */
10741 case SIGN_EXTRACT:
10742 if (arm_arch6
10743 && mode == SImode
10744 && CONST_INT_P (XEXP (x, 1))
10745 && CONST_INT_P (XEXP (x, 2)))
10747 if (speed_p)
10748 *cost += extra_cost->alu.bfx;
10749 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10750 return true;
10752 /* Without UBFX/SBFX, need to resort to shift operations. */
10753 *cost += COSTS_N_INSNS (1);
10754 if (speed_p)
10755 *cost += 2 * extra_cost->alu.shift;
10756 *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
10757 return true;
10759 case FLOAT_EXTEND:
10760 if (TARGET_HARD_FLOAT)
10762 if (speed_p)
10763 *cost += extra_cost->fp[mode == DFmode].widen;
10764 if (!TARGET_VFP5
10765 && GET_MODE (XEXP (x, 0)) == HFmode)
10767 /* Pre v8, widening HF->DF is a two-step process, first
10768 widening to SFmode. */
10769 *cost += COSTS_N_INSNS (1);
10770 if (speed_p)
10771 *cost += extra_cost->fp[0].widen;
10773 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10774 return true;
10777 *cost = LIBCALL_COST (1);
10778 return false;
10780 case FLOAT_TRUNCATE:
10781 if (TARGET_HARD_FLOAT)
10783 if (speed_p)
10784 *cost += extra_cost->fp[mode == DFmode].narrow;
10785 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10786 return true;
10787 /* Vector modes? */
10789 *cost = LIBCALL_COST (1);
10790 return false;
10792 case FMA:
10793 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
10795 rtx op0 = XEXP (x, 0);
10796 rtx op1 = XEXP (x, 1);
10797 rtx op2 = XEXP (x, 2);
10800 /* vfms or vfnma. */
10801 if (GET_CODE (op0) == NEG)
10802 op0 = XEXP (op0, 0);
10804 /* vfnms or vfnma. */
10805 if (GET_CODE (op2) == NEG)
10806 op2 = XEXP (op2, 0);
10808 *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
10809 *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
10810 *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
10812 if (speed_p)
10813 *cost += extra_cost->fp[mode ==DFmode].fma;
10815 return true;
10818 *cost = LIBCALL_COST (3);
10819 return false;
10821 case FIX:
10822 case UNSIGNED_FIX:
10823 if (TARGET_HARD_FLOAT)
10825 /* The *combine_vcvtf2i reduces a vmul+vcvt into
10826 a vcvt fixed-point conversion. */
10827 if (code == FIX && mode == SImode
10828 && GET_CODE (XEXP (x, 0)) == FIX
10829 && GET_MODE (XEXP (x, 0)) == SFmode
10830 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10831 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
10832 > 0)
10834 if (speed_p)
10835 *cost += extra_cost->fp[0].toint;
10837 *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10838 code, 0, speed_p);
10839 return true;
10842 if (GET_MODE_CLASS (mode) == MODE_INT)
10844 mode = GET_MODE (XEXP (x, 0));
10845 if (speed_p)
10846 *cost += extra_cost->fp[mode == DFmode].toint;
10847 /* Strip of the 'cost' of rounding towards zero. */
10848 if (GET_CODE (XEXP (x, 0)) == FIX)
10849 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
10850 0, speed_p);
10851 else
10852 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10853 /* ??? Increase the cost to deal with transferring from
10854 FP -> CORE registers? */
10855 return true;
10857 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
10858 && TARGET_VFP5)
10860 if (speed_p)
10861 *cost += extra_cost->fp[mode == DFmode].roundint;
10862 return false;
10864 /* Vector costs? */
10866 *cost = LIBCALL_COST (1);
10867 return false;
10869 case FLOAT:
10870 case UNSIGNED_FLOAT:
10871 if (TARGET_HARD_FLOAT)
10873 /* ??? Increase the cost to deal with transferring from CORE
10874 -> FP registers? */
10875 if (speed_p)
10876 *cost += extra_cost->fp[mode == DFmode].fromint;
10877 return false;
10879 *cost = LIBCALL_COST (1);
10880 return false;
10882 case CALL:
10883 return true;
10885 case ASM_OPERANDS:
10887 /* Just a guess. Guess number of instructions in the asm
10888 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
10889 though (see PR60663). */
10890 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
10891 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
10893 *cost = COSTS_N_INSNS (asm_length + num_operands);
10894 return true;
10896 default:
10897 if (mode != VOIDmode)
10898 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
10899 else
10900 *cost = COSTS_N_INSNS (4); /* Who knows? */
10901 return false;
10905 #undef HANDLE_NARROW_SHIFT_ARITH
10907 /* RTX costs entry point. */
10909 static bool
10910 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
10911 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
10913 bool result;
10914 int code = GET_CODE (x);
10915 gcc_assert (current_tune->insn_extra_cost);
10917 result = arm_rtx_costs_internal (x, (enum rtx_code) code,
10918 (enum rtx_code) outer_code,
10919 current_tune->insn_extra_cost,
10920 total, speed);
10922 if (dump_file && (dump_flags & TDF_DETAILS))
10924 print_rtl_single (dump_file, x);
10925 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
10926 *total, result ? "final" : "partial");
10928 return result;
10931 /* All address computations that can be done are free, but rtx cost returns
10932 the same for practically all of them. So we weight the different types
10933 of address here in the order (most pref first):
10934 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
10935 static inline int
10936 arm_arm_address_cost (rtx x)
10938 enum rtx_code c = GET_CODE (x);
10940 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
10941 return 0;
10942 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
10943 return 10;
10945 if (c == PLUS)
10947 if (CONST_INT_P (XEXP (x, 1)))
10948 return 2;
10950 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
10951 return 3;
10953 return 4;
10956 return 6;
10959 static inline int
10960 arm_thumb_address_cost (rtx x)
10962 enum rtx_code c = GET_CODE (x);
10964 if (c == REG)
10965 return 1;
10966 if (c == PLUS
10967 && REG_P (XEXP (x, 0))
10968 && CONST_INT_P (XEXP (x, 1)))
10969 return 1;
10971 return 2;
10974 static int
10975 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
10976 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
10978 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
10981 /* Adjust cost hook for XScale. */
10982 static bool
10983 xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
10984 int * cost)
10986 /* Some true dependencies can have a higher cost depending
10987 on precisely how certain input operands are used. */
10988 if (dep_type == 0
10989 && recog_memoized (insn) >= 0
10990 && recog_memoized (dep) >= 0)
10992 int shift_opnum = get_attr_shift (insn);
10993 enum attr_type attr_type = get_attr_type (dep);
10995 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
10996 operand for INSN. If we have a shifted input operand and the
10997 instruction we depend on is another ALU instruction, then we may
10998 have to account for an additional stall. */
10999 if (shift_opnum != 0
11000 && (attr_type == TYPE_ALU_SHIFT_IMM
11001 || attr_type == TYPE_ALUS_SHIFT_IMM
11002 || attr_type == TYPE_LOGIC_SHIFT_IMM
11003 || attr_type == TYPE_LOGICS_SHIFT_IMM
11004 || attr_type == TYPE_ALU_SHIFT_REG
11005 || attr_type == TYPE_ALUS_SHIFT_REG
11006 || attr_type == TYPE_LOGIC_SHIFT_REG
11007 || attr_type == TYPE_LOGICS_SHIFT_REG
11008 || attr_type == TYPE_MOV_SHIFT
11009 || attr_type == TYPE_MVN_SHIFT
11010 || attr_type == TYPE_MOV_SHIFT_REG
11011 || attr_type == TYPE_MVN_SHIFT_REG))
11013 rtx shifted_operand;
11014 int opno;
11016 /* Get the shifted operand. */
11017 extract_insn (insn);
11018 shifted_operand = recog_data.operand[shift_opnum];
11020 /* Iterate over all the operands in DEP. If we write an operand
11021 that overlaps with SHIFTED_OPERAND, then we have increase the
11022 cost of this dependency. */
11023 extract_insn (dep);
11024 preprocess_constraints (dep);
11025 for (opno = 0; opno < recog_data.n_operands; opno++)
11027 /* We can ignore strict inputs. */
11028 if (recog_data.operand_type[opno] == OP_IN)
11029 continue;
11031 if (reg_overlap_mentioned_p (recog_data.operand[opno],
11032 shifted_operand))
11034 *cost = 2;
11035 return false;
11040 return true;
11043 /* Adjust cost hook for Cortex A9. */
11044 static bool
11045 cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11046 int * cost)
11048 switch (dep_type)
11050 case REG_DEP_ANTI:
11051 *cost = 0;
11052 return false;
11054 case REG_DEP_TRUE:
11055 case REG_DEP_OUTPUT:
11056 if (recog_memoized (insn) >= 0
11057 && recog_memoized (dep) >= 0)
11059 if (GET_CODE (PATTERN (insn)) == SET)
11061 if (GET_MODE_CLASS
11062 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11063 || GET_MODE_CLASS
11064 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11066 enum attr_type attr_type_insn = get_attr_type (insn);
11067 enum attr_type attr_type_dep = get_attr_type (dep);
11069 /* By default all dependencies of the form
11070 s0 = s0 <op> s1
11071 s0 = s0 <op> s2
11072 have an extra latency of 1 cycle because
11073 of the input and output dependency in this
11074 case. However this gets modeled as an true
11075 dependency and hence all these checks. */
11076 if (REG_P (SET_DEST (PATTERN (insn)))
11077 && reg_set_p (SET_DEST (PATTERN (insn)), dep))
11079 /* FMACS is a special case where the dependent
11080 instruction can be issued 3 cycles before
11081 the normal latency in case of an output
11082 dependency. */
11083 if ((attr_type_insn == TYPE_FMACS
11084 || attr_type_insn == TYPE_FMACD)
11085 && (attr_type_dep == TYPE_FMACS
11086 || attr_type_dep == TYPE_FMACD))
11088 if (dep_type == REG_DEP_OUTPUT)
11089 *cost = insn_default_latency (dep) - 3;
11090 else
11091 *cost = insn_default_latency (dep);
11092 return false;
11094 else
11096 if (dep_type == REG_DEP_OUTPUT)
11097 *cost = insn_default_latency (dep) + 1;
11098 else
11099 *cost = insn_default_latency (dep);
11101 return false;
11106 break;
11108 default:
11109 gcc_unreachable ();
11112 return true;
11115 /* Adjust cost hook for FA726TE. */
11116 static bool
11117 fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11118 int * cost)
11120 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11121 have penalty of 3. */
11122 if (dep_type == REG_DEP_TRUE
11123 && recog_memoized (insn) >= 0
11124 && recog_memoized (dep) >= 0
11125 && get_attr_conds (dep) == CONDS_SET)
11127 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11128 if (get_attr_conds (insn) == CONDS_USE
11129 && get_attr_type (insn) != TYPE_BRANCH)
11131 *cost = 3;
11132 return false;
11135 if (GET_CODE (PATTERN (insn)) == COND_EXEC
11136 || get_attr_conds (insn) == CONDS_USE)
11138 *cost = 0;
11139 return false;
11143 return true;
11146 /* Implement TARGET_REGISTER_MOVE_COST.
11148 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11149 it is typically more expensive than a single memory access. We set
11150 the cost to less than two memory accesses so that floating
11151 point to integer conversion does not go through memory. */
11154 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11155 reg_class_t from, reg_class_t to)
11157 if (TARGET_32BIT)
11159 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11160 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11161 return 15;
11162 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11163 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11164 return 4;
11165 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11166 return 20;
11167 else
11168 return 2;
11170 else
11172 if (from == HI_REGS || to == HI_REGS)
11173 return 4;
11174 else
11175 return 2;
11179 /* Implement TARGET_MEMORY_MOVE_COST. */
11182 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11183 bool in ATTRIBUTE_UNUSED)
11185 if (TARGET_32BIT)
11186 return 10;
11187 else
11189 if (GET_MODE_SIZE (mode) < 4)
11190 return 8;
11191 else
11192 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11196 /* Vectorizer cost model implementation. */
11198 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11199 static int
11200 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11201 tree vectype,
11202 int misalign ATTRIBUTE_UNUSED)
11204 unsigned elements;
11206 switch (type_of_cost)
11208 case scalar_stmt:
11209 return current_tune->vec_costs->scalar_stmt_cost;
11211 case scalar_load:
11212 return current_tune->vec_costs->scalar_load_cost;
11214 case scalar_store:
11215 return current_tune->vec_costs->scalar_store_cost;
11217 case vector_stmt:
11218 return current_tune->vec_costs->vec_stmt_cost;
11220 case vector_load:
11221 return current_tune->vec_costs->vec_align_load_cost;
11223 case vector_store:
11224 return current_tune->vec_costs->vec_store_cost;
11226 case vec_to_scalar:
11227 return current_tune->vec_costs->vec_to_scalar_cost;
11229 case scalar_to_vec:
11230 return current_tune->vec_costs->scalar_to_vec_cost;
11232 case unaligned_load:
11233 return current_tune->vec_costs->vec_unalign_load_cost;
11235 case unaligned_store:
11236 return current_tune->vec_costs->vec_unalign_store_cost;
11238 case cond_branch_taken:
11239 return current_tune->vec_costs->cond_taken_branch_cost;
11241 case cond_branch_not_taken:
11242 return current_tune->vec_costs->cond_not_taken_branch_cost;
11244 case vec_perm:
11245 case vec_promote_demote:
11246 return current_tune->vec_costs->vec_stmt_cost;
11248 case vec_construct:
11249 elements = TYPE_VECTOR_SUBPARTS (vectype);
11250 return elements / 2 + 1;
11252 default:
11253 gcc_unreachable ();
11257 /* Implement targetm.vectorize.add_stmt_cost. */
11259 static unsigned
11260 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11261 struct _stmt_vec_info *stmt_info, int misalign,
11262 enum vect_cost_model_location where)
11264 unsigned *cost = (unsigned *) data;
11265 unsigned retval = 0;
11267 if (flag_vect_cost_model)
11269 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11270 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11272 /* Statements in an inner loop relative to the loop being
11273 vectorized are weighted more heavily. The value here is
11274 arbitrary and could potentially be improved with analysis. */
11275 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11276 count *= 50; /* FIXME. */
11278 retval = (unsigned) (count * stmt_cost);
11279 cost[where] += retval;
11282 return retval;
11285 /* Return true if and only if this insn can dual-issue only as older. */
11286 static bool
11287 cortexa7_older_only (rtx_insn *insn)
11289 if (recog_memoized (insn) < 0)
11290 return false;
11292 switch (get_attr_type (insn))
11294 case TYPE_ALU_DSP_REG:
11295 case TYPE_ALU_SREG:
11296 case TYPE_ALUS_SREG:
11297 case TYPE_LOGIC_REG:
11298 case TYPE_LOGICS_REG:
11299 case TYPE_ADC_REG:
11300 case TYPE_ADCS_REG:
11301 case TYPE_ADR:
11302 case TYPE_BFM:
11303 case TYPE_REV:
11304 case TYPE_MVN_REG:
11305 case TYPE_SHIFT_IMM:
11306 case TYPE_SHIFT_REG:
11307 case TYPE_LOAD_BYTE:
11308 case TYPE_LOAD1:
11309 case TYPE_STORE1:
11310 case TYPE_FFARITHS:
11311 case TYPE_FADDS:
11312 case TYPE_FFARITHD:
11313 case TYPE_FADDD:
11314 case TYPE_FMOV:
11315 case TYPE_F_CVT:
11316 case TYPE_FCMPS:
11317 case TYPE_FCMPD:
11318 case TYPE_FCONSTS:
11319 case TYPE_FCONSTD:
11320 case TYPE_FMULS:
11321 case TYPE_FMACS:
11322 case TYPE_FMULD:
11323 case TYPE_FMACD:
11324 case TYPE_FDIVS:
11325 case TYPE_FDIVD:
11326 case TYPE_F_MRC:
11327 case TYPE_F_MRRC:
11328 case TYPE_F_FLAG:
11329 case TYPE_F_LOADS:
11330 case TYPE_F_STORES:
11331 return true;
11332 default:
11333 return false;
11337 /* Return true if and only if this insn can dual-issue as younger. */
11338 static bool
11339 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11341 if (recog_memoized (insn) < 0)
11343 if (verbose > 5)
11344 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11345 return false;
11348 switch (get_attr_type (insn))
11350 case TYPE_ALU_IMM:
11351 case TYPE_ALUS_IMM:
11352 case TYPE_LOGIC_IMM:
11353 case TYPE_LOGICS_IMM:
11354 case TYPE_EXTEND:
11355 case TYPE_MVN_IMM:
11356 case TYPE_MOV_IMM:
11357 case TYPE_MOV_REG:
11358 case TYPE_MOV_SHIFT:
11359 case TYPE_MOV_SHIFT_REG:
11360 case TYPE_BRANCH:
11361 case TYPE_CALL:
11362 return true;
11363 default:
11364 return false;
11369 /* Look for an instruction that can dual issue only as an older
11370 instruction, and move it in front of any instructions that can
11371 dual-issue as younger, while preserving the relative order of all
11372 other instructions in the ready list. This is a hueuristic to help
11373 dual-issue in later cycles, by postponing issue of more flexible
11374 instructions. This heuristic may affect dual issue opportunities
11375 in the current cycle. */
11376 static void
11377 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11378 int *n_readyp, int clock)
11380 int i;
11381 int first_older_only = -1, first_younger = -1;
11383 if (verbose > 5)
11384 fprintf (file,
11385 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11386 clock,
11387 *n_readyp);
11389 /* Traverse the ready list from the head (the instruction to issue
11390 first), and looking for the first instruction that can issue as
11391 younger and the first instruction that can dual-issue only as
11392 older. */
11393 for (i = *n_readyp - 1; i >= 0; i--)
11395 rtx_insn *insn = ready[i];
11396 if (cortexa7_older_only (insn))
11398 first_older_only = i;
11399 if (verbose > 5)
11400 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11401 break;
11403 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11404 first_younger = i;
11407 /* Nothing to reorder because either no younger insn found or insn
11408 that can dual-issue only as older appears before any insn that
11409 can dual-issue as younger. */
11410 if (first_younger == -1)
11412 if (verbose > 5)
11413 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11414 return;
11417 /* Nothing to reorder because no older-only insn in the ready list. */
11418 if (first_older_only == -1)
11420 if (verbose > 5)
11421 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11422 return;
11425 /* Move first_older_only insn before first_younger. */
11426 if (verbose > 5)
11427 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11428 INSN_UID(ready [first_older_only]),
11429 INSN_UID(ready [first_younger]));
11430 rtx_insn *first_older_only_insn = ready [first_older_only];
11431 for (i = first_older_only; i < first_younger; i++)
11433 ready[i] = ready[i+1];
11436 ready[i] = first_older_only_insn;
11437 return;
11440 /* Implement TARGET_SCHED_REORDER. */
11441 static int
11442 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
11443 int clock)
11445 switch (arm_tune)
11447 case TARGET_CPU_cortexa7:
11448 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11449 break;
11450 default:
11451 /* Do nothing for other cores. */
11452 break;
11455 return arm_issue_rate ();
11458 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11459 It corrects the value of COST based on the relationship between
11460 INSN and DEP through the dependence LINK. It returns the new
11461 value. There is a per-core adjust_cost hook to adjust scheduler costs
11462 and the per-core hook can choose to completely override the generic
11463 adjust_cost function. Only put bits of code into arm_adjust_cost that
11464 are common across all cores. */
11465 static int
11466 arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
11467 unsigned int)
11469 rtx i_pat, d_pat;
11471 /* When generating Thumb-1 code, we want to place flag-setting operations
11472 close to a conditional branch which depends on them, so that we can
11473 omit the comparison. */
11474 if (TARGET_THUMB1
11475 && dep_type == 0
11476 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11477 && recog_memoized (dep) >= 0
11478 && get_attr_conds (dep) == CONDS_SET)
11479 return 0;
11481 if (current_tune->sched_adjust_cost != NULL)
11483 if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost))
11484 return cost;
11487 /* XXX Is this strictly true? */
11488 if (dep_type == REG_DEP_ANTI
11489 || dep_type == REG_DEP_OUTPUT)
11490 return 0;
11492 /* Call insns don't incur a stall, even if they follow a load. */
11493 if (dep_type == 0
11494 && CALL_P (insn))
11495 return 1;
11497 if ((i_pat = single_set (insn)) != NULL
11498 && MEM_P (SET_SRC (i_pat))
11499 && (d_pat = single_set (dep)) != NULL
11500 && MEM_P (SET_DEST (d_pat)))
11502 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11503 /* This is a load after a store, there is no conflict if the load reads
11504 from a cached area. Assume that loads from the stack, and from the
11505 constant pool are cached, and that others will miss. This is a
11506 hack. */
11508 if ((GET_CODE (src_mem) == SYMBOL_REF
11509 && CONSTANT_POOL_ADDRESS_P (src_mem))
11510 || reg_mentioned_p (stack_pointer_rtx, src_mem)
11511 || reg_mentioned_p (frame_pointer_rtx, src_mem)
11512 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11513 return 1;
11516 return cost;
11520 arm_max_conditional_execute (void)
11522 return max_insns_skipped;
11525 static int
11526 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11528 if (TARGET_32BIT)
11529 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
11530 else
11531 return (optimize > 0) ? 2 : 0;
11534 static int
11535 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
11537 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11540 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11541 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11542 sequences of non-executed instructions in IT blocks probably take the same
11543 amount of time as executed instructions (and the IT instruction itself takes
11544 space in icache). This function was experimentally determined to give good
11545 results on a popular embedded benchmark. */
11547 static int
11548 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
11550 return (TARGET_32BIT && speed_p) ? 1
11551 : arm_default_branch_cost (speed_p, predictable_p);
11554 static int
11555 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
11557 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11560 static bool fp_consts_inited = false;
11562 static REAL_VALUE_TYPE value_fp0;
11564 static void
11565 init_fp_table (void)
11567 REAL_VALUE_TYPE r;
11569 r = REAL_VALUE_ATOF ("0", DFmode);
11570 value_fp0 = r;
11571 fp_consts_inited = true;
11574 /* Return TRUE if rtx X is a valid immediate FP constant. */
11576 arm_const_double_rtx (rtx x)
11578 const REAL_VALUE_TYPE *r;
11580 if (!fp_consts_inited)
11581 init_fp_table ();
11583 r = CONST_DOUBLE_REAL_VALUE (x);
11584 if (REAL_VALUE_MINUS_ZERO (*r))
11585 return 0;
11587 if (real_equal (r, &value_fp0))
11588 return 1;
11590 return 0;
11593 /* VFPv3 has a fairly wide range of representable immediates, formed from
11594 "quarter-precision" floating-point values. These can be evaluated using this
11595 formula (with ^ for exponentiation):
11597 -1^s * n * 2^-r
11599 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11600 16 <= n <= 31 and 0 <= r <= 7.
11602 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11604 - A (most-significant) is the sign bit.
11605 - BCD are the exponent (encoded as r XOR 3).
11606 - EFGH are the mantissa (encoded as n - 16).
11609 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11610 fconst[sd] instruction, or -1 if X isn't suitable. */
11611 static int
11612 vfp3_const_double_index (rtx x)
11614 REAL_VALUE_TYPE r, m;
11615 int sign, exponent;
11616 unsigned HOST_WIDE_INT mantissa, mant_hi;
11617 unsigned HOST_WIDE_INT mask;
11618 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
11619 bool fail;
11621 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
11622 return -1;
11624 r = *CONST_DOUBLE_REAL_VALUE (x);
11626 /* We can't represent these things, so detect them first. */
11627 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
11628 return -1;
11630 /* Extract sign, exponent and mantissa. */
11631 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
11632 r = real_value_abs (&r);
11633 exponent = REAL_EXP (&r);
11634 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11635 highest (sign) bit, with a fixed binary point at bit point_pos.
11636 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11637 bits for the mantissa, this may fail (low bits would be lost). */
11638 real_ldexp (&m, &r, point_pos - exponent);
11639 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
11640 mantissa = w.elt (0);
11641 mant_hi = w.elt (1);
11643 /* If there are bits set in the low part of the mantissa, we can't
11644 represent this value. */
11645 if (mantissa != 0)
11646 return -1;
11648 /* Now make it so that mantissa contains the most-significant bits, and move
11649 the point_pos to indicate that the least-significant bits have been
11650 discarded. */
11651 point_pos -= HOST_BITS_PER_WIDE_INT;
11652 mantissa = mant_hi;
11654 /* We can permit four significant bits of mantissa only, plus a high bit
11655 which is always 1. */
11656 mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
11657 if ((mantissa & mask) != 0)
11658 return -1;
11660 /* Now we know the mantissa is in range, chop off the unneeded bits. */
11661 mantissa >>= point_pos - 5;
11663 /* The mantissa may be zero. Disallow that case. (It's possible to load the
11664 floating-point immediate zero with Neon using an integer-zero load, but
11665 that case is handled elsewhere.) */
11666 if (mantissa == 0)
11667 return -1;
11669 gcc_assert (mantissa >= 16 && mantissa <= 31);
11671 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11672 normalized significands are in the range [1, 2). (Our mantissa is shifted
11673 left 4 places at this point relative to normalized IEEE754 values). GCC
11674 internally uses [0.5, 1) (see real.c), so the exponent returned from
11675 REAL_EXP must be altered. */
11676 exponent = 5 - exponent;
11678 if (exponent < 0 || exponent > 7)
11679 return -1;
11681 /* Sign, mantissa and exponent are now in the correct form to plug into the
11682 formula described in the comment above. */
11683 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
11686 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
11688 vfp3_const_double_rtx (rtx x)
11690 if (!TARGET_VFP3)
11691 return 0;
11693 return vfp3_const_double_index (x) != -1;
11696 /* Recognize immediates which can be used in various Neon instructions. Legal
11697 immediates are described by the following table (for VMVN variants, the
11698 bitwise inverse of the constant shown is recognized. In either case, VMOV
11699 is output and the correct instruction to use for a given constant is chosen
11700 by the assembler). The constant shown is replicated across all elements of
11701 the destination vector.
11703 insn elems variant constant (binary)
11704 ---- ----- ------- -----------------
11705 vmov i32 0 00000000 00000000 00000000 abcdefgh
11706 vmov i32 1 00000000 00000000 abcdefgh 00000000
11707 vmov i32 2 00000000 abcdefgh 00000000 00000000
11708 vmov i32 3 abcdefgh 00000000 00000000 00000000
11709 vmov i16 4 00000000 abcdefgh
11710 vmov i16 5 abcdefgh 00000000
11711 vmvn i32 6 00000000 00000000 00000000 abcdefgh
11712 vmvn i32 7 00000000 00000000 abcdefgh 00000000
11713 vmvn i32 8 00000000 abcdefgh 00000000 00000000
11714 vmvn i32 9 abcdefgh 00000000 00000000 00000000
11715 vmvn i16 10 00000000 abcdefgh
11716 vmvn i16 11 abcdefgh 00000000
11717 vmov i32 12 00000000 00000000 abcdefgh 11111111
11718 vmvn i32 13 00000000 00000000 abcdefgh 11111111
11719 vmov i32 14 00000000 abcdefgh 11111111 11111111
11720 vmvn i32 15 00000000 abcdefgh 11111111 11111111
11721 vmov i8 16 abcdefgh
11722 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
11723 eeeeeeee ffffffff gggggggg hhhhhhhh
11724 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
11725 vmov f32 19 00000000 00000000 00000000 00000000
11727 For case 18, B = !b. Representable values are exactly those accepted by
11728 vfp3_const_double_index, but are output as floating-point numbers rather
11729 than indices.
11731 For case 19, we will change it to vmov.i32 when assembling.
11733 Variants 0-5 (inclusive) may also be used as immediates for the second
11734 operand of VORR/VBIC instructions.
11736 The INVERSE argument causes the bitwise inverse of the given operand to be
11737 recognized instead (used for recognizing legal immediates for the VAND/VORN
11738 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11739 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11740 output, rather than the real insns vbic/vorr).
11742 INVERSE makes no difference to the recognition of float vectors.
11744 The return value is the variant of immediate as shown in the above table, or
11745 -1 if the given value doesn't match any of the listed patterns.
11747 static int
11748 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
11749 rtx *modconst, int *elementwidth)
11751 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
11752 matches = 1; \
11753 for (i = 0; i < idx; i += (STRIDE)) \
11754 if (!(TEST)) \
11755 matches = 0; \
11756 if (matches) \
11758 immtype = (CLASS); \
11759 elsize = (ELSIZE); \
11760 break; \
11763 unsigned int i, elsize = 0, idx = 0, n_elts;
11764 unsigned int innersize;
11765 unsigned char bytes[16];
11766 int immtype = -1, matches;
11767 unsigned int invmask = inverse ? 0xff : 0;
11768 bool vector = GET_CODE (op) == CONST_VECTOR;
11770 if (vector)
11771 n_elts = CONST_VECTOR_NUNITS (op);
11772 else
11774 n_elts = 1;
11775 if (mode == VOIDmode)
11776 mode = DImode;
11779 innersize = GET_MODE_UNIT_SIZE (mode);
11781 /* Vectors of float constants. */
11782 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
11784 rtx el0 = CONST_VECTOR_ELT (op, 0);
11786 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
11787 return -1;
11789 /* FP16 vectors cannot be represented. */
11790 if (GET_MODE_INNER (mode) == HFmode)
11791 return -1;
11793 /* All elements in the vector must be the same. Note that 0.0 and -0.0
11794 are distinct in this context. */
11795 if (!const_vec_duplicate_p (op))
11796 return -1;
11798 if (modconst)
11799 *modconst = CONST_VECTOR_ELT (op, 0);
11801 if (elementwidth)
11802 *elementwidth = 0;
11804 if (el0 == CONST0_RTX (GET_MODE (el0)))
11805 return 19;
11806 else
11807 return 18;
11810 /* The tricks done in the code below apply for little-endian vector layout.
11811 For big-endian vectors only allow vectors of the form { a, a, a..., a }.
11812 FIXME: Implement logic for big-endian vectors. */
11813 if (BYTES_BIG_ENDIAN && vector && !const_vec_duplicate_p (op))
11814 return -1;
11816 /* Splat vector constant out into a byte vector. */
11817 for (i = 0; i < n_elts; i++)
11819 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
11820 unsigned HOST_WIDE_INT elpart;
11822 gcc_assert (CONST_INT_P (el));
11823 elpart = INTVAL (el);
11825 for (unsigned int byte = 0; byte < innersize; byte++)
11827 bytes[idx++] = (elpart & 0xff) ^ invmask;
11828 elpart >>= BITS_PER_UNIT;
11832 /* Sanity check. */
11833 gcc_assert (idx == GET_MODE_SIZE (mode));
11837 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
11838 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11840 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11841 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11843 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
11844 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11846 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
11847 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
11849 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
11851 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
11853 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
11854 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11856 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11857 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11859 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
11860 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11862 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
11863 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
11865 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
11867 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
11869 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11870 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11872 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11873 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11875 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
11876 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11878 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
11879 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11881 CHECK (1, 8, 16, bytes[i] == bytes[0]);
11883 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
11884 && bytes[i] == bytes[(i + 8) % idx]);
11886 while (0);
11888 if (immtype == -1)
11889 return -1;
11891 if (elementwidth)
11892 *elementwidth = elsize;
11894 if (modconst)
11896 unsigned HOST_WIDE_INT imm = 0;
11898 /* Un-invert bytes of recognized vector, if necessary. */
11899 if (invmask != 0)
11900 for (i = 0; i < idx; i++)
11901 bytes[i] ^= invmask;
11903 if (immtype == 17)
11905 /* FIXME: Broken on 32-bit H_W_I hosts. */
11906 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
11908 for (i = 0; i < 8; i++)
11909 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
11910 << (i * BITS_PER_UNIT);
11912 *modconst = GEN_INT (imm);
11914 else
11916 unsigned HOST_WIDE_INT imm = 0;
11918 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
11919 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
11921 *modconst = GEN_INT (imm);
11925 return immtype;
11926 #undef CHECK
11929 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
11930 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
11931 float elements), and a modified constant (whatever should be output for a
11932 VMOV) in *MODCONST. */
11935 neon_immediate_valid_for_move (rtx op, machine_mode mode,
11936 rtx *modconst, int *elementwidth)
11938 rtx tmpconst;
11939 int tmpwidth;
11940 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
11942 if (retval == -1)
11943 return 0;
11945 if (modconst)
11946 *modconst = tmpconst;
11948 if (elementwidth)
11949 *elementwidth = tmpwidth;
11951 return 1;
11954 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
11955 the immediate is valid, write a constant suitable for using as an operand
11956 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
11957 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
11960 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
11961 rtx *modconst, int *elementwidth)
11963 rtx tmpconst;
11964 int tmpwidth;
11965 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
11967 if (retval < 0 || retval > 5)
11968 return 0;
11970 if (modconst)
11971 *modconst = tmpconst;
11973 if (elementwidth)
11974 *elementwidth = tmpwidth;
11976 return 1;
11979 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
11980 the immediate is valid, write a constant suitable for using as an operand
11981 to VSHR/VSHL to *MODCONST and the corresponding element width to
11982 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
11983 because they have different limitations. */
11986 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
11987 rtx *modconst, int *elementwidth,
11988 bool isleftshift)
11990 unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
11991 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
11992 unsigned HOST_WIDE_INT last_elt = 0;
11993 unsigned HOST_WIDE_INT maxshift;
11995 /* Split vector constant out into a byte vector. */
11996 for (i = 0; i < n_elts; i++)
11998 rtx el = CONST_VECTOR_ELT (op, i);
11999 unsigned HOST_WIDE_INT elpart;
12001 if (CONST_INT_P (el))
12002 elpart = INTVAL (el);
12003 else if (CONST_DOUBLE_P (el))
12004 return 0;
12005 else
12006 gcc_unreachable ();
12008 if (i != 0 && elpart != last_elt)
12009 return 0;
12011 last_elt = elpart;
12014 /* Shift less than element size. */
12015 maxshift = innersize * 8;
12017 if (isleftshift)
12019 /* Left shift immediate value can be from 0 to <size>-1. */
12020 if (last_elt >= maxshift)
12021 return 0;
12023 else
12025 /* Right shift immediate value can be from 1 to <size>. */
12026 if (last_elt == 0 || last_elt > maxshift)
12027 return 0;
12030 if (elementwidth)
12031 *elementwidth = innersize * 8;
12033 if (modconst)
12034 *modconst = CONST_VECTOR_ELT (op, 0);
12036 return 1;
12039 /* Return a string suitable for output of Neon immediate logic operation
12040 MNEM. */
12042 char *
12043 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12044 int inverse, int quad)
12046 int width, is_valid;
12047 static char templ[40];
12049 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12051 gcc_assert (is_valid != 0);
12053 if (quad)
12054 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12055 else
12056 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12058 return templ;
12061 /* Return a string suitable for output of Neon immediate shift operation
12062 (VSHR or VSHL) MNEM. */
12064 char *
12065 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12066 machine_mode mode, int quad,
12067 bool isleftshift)
12069 int width, is_valid;
12070 static char templ[40];
12072 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12073 gcc_assert (is_valid != 0);
12075 if (quad)
12076 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12077 else
12078 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12080 return templ;
12083 /* Output a sequence of pairwise operations to implement a reduction.
12084 NOTE: We do "too much work" here, because pairwise operations work on two
12085 registers-worth of operands in one go. Unfortunately we can't exploit those
12086 extra calculations to do the full operation in fewer steps, I don't think.
12087 Although all vector elements of the result but the first are ignored, we
12088 actually calculate the same result in each of the elements. An alternative
12089 such as initially loading a vector with zero to use as each of the second
12090 operands would use up an additional register and take an extra instruction,
12091 for no particular gain. */
12093 void
12094 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12095 rtx (*reduc) (rtx, rtx, rtx))
12097 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
12098 rtx tmpsum = op1;
12100 for (i = parts / 2; i >= 1; i /= 2)
12102 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12103 emit_insn (reduc (dest, tmpsum, tmpsum));
12104 tmpsum = dest;
12108 /* If VALS is a vector constant that can be loaded into a register
12109 using VDUP, generate instructions to do so and return an RTX to
12110 assign to the register. Otherwise return NULL_RTX. */
12112 static rtx
12113 neon_vdup_constant (rtx vals)
12115 machine_mode mode = GET_MODE (vals);
12116 machine_mode inner_mode = GET_MODE_INNER (mode);
12117 rtx x;
12119 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12120 return NULL_RTX;
12122 if (!const_vec_duplicate_p (vals, &x))
12123 /* The elements are not all the same. We could handle repeating
12124 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12125 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12126 vdup.i16). */
12127 return NULL_RTX;
12129 /* We can load this constant by using VDUP and a constant in a
12130 single ARM register. This will be cheaper than a vector
12131 load. */
12133 x = copy_to_mode_reg (inner_mode, x);
12134 return gen_rtx_VEC_DUPLICATE (mode, x);
12137 /* Generate code to load VALS, which is a PARALLEL containing only
12138 constants (for vec_init) or CONST_VECTOR, efficiently into a
12139 register. Returns an RTX to copy into the register, or NULL_RTX
12140 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12143 neon_make_constant (rtx vals)
12145 machine_mode mode = GET_MODE (vals);
12146 rtx target;
12147 rtx const_vec = NULL_RTX;
12148 int n_elts = GET_MODE_NUNITS (mode);
12149 int n_const = 0;
12150 int i;
12152 if (GET_CODE (vals) == CONST_VECTOR)
12153 const_vec = vals;
12154 else if (GET_CODE (vals) == PARALLEL)
12156 /* A CONST_VECTOR must contain only CONST_INTs and
12157 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12158 Only store valid constants in a CONST_VECTOR. */
12159 for (i = 0; i < n_elts; ++i)
12161 rtx x = XVECEXP (vals, 0, i);
12162 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12163 n_const++;
12165 if (n_const == n_elts)
12166 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12168 else
12169 gcc_unreachable ();
12171 if (const_vec != NULL
12172 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12173 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12174 return const_vec;
12175 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12176 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12177 pipeline cycle; creating the constant takes one or two ARM
12178 pipeline cycles. */
12179 return target;
12180 else if (const_vec != NULL_RTX)
12181 /* Load from constant pool. On Cortex-A8 this takes two cycles
12182 (for either double or quad vectors). We can not take advantage
12183 of single-cycle VLD1 because we need a PC-relative addressing
12184 mode. */
12185 return const_vec;
12186 else
12187 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12188 We can not construct an initializer. */
12189 return NULL_RTX;
12192 /* Initialize vector TARGET to VALS. */
12194 void
12195 neon_expand_vector_init (rtx target, rtx vals)
12197 machine_mode mode = GET_MODE (target);
12198 machine_mode inner_mode = GET_MODE_INNER (mode);
12199 int n_elts = GET_MODE_NUNITS (mode);
12200 int n_var = 0, one_var = -1;
12201 bool all_same = true;
12202 rtx x, mem;
12203 int i;
12205 for (i = 0; i < n_elts; ++i)
12207 x = XVECEXP (vals, 0, i);
12208 if (!CONSTANT_P (x))
12209 ++n_var, one_var = i;
12211 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12212 all_same = false;
12215 if (n_var == 0)
12217 rtx constant = neon_make_constant (vals);
12218 if (constant != NULL_RTX)
12220 emit_move_insn (target, constant);
12221 return;
12225 /* Splat a single non-constant element if we can. */
12226 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12228 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12229 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
12230 return;
12233 /* One field is non-constant. Load constant then overwrite varying
12234 field. This is more efficient than using the stack. */
12235 if (n_var == 1)
12237 rtx copy = copy_rtx (vals);
12238 rtx index = GEN_INT (one_var);
12240 /* Load constant part of vector, substitute neighboring value for
12241 varying element. */
12242 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12243 neon_expand_vector_init (target, copy);
12245 /* Insert variable. */
12246 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12247 switch (mode)
12249 case E_V8QImode:
12250 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12251 break;
12252 case E_V16QImode:
12253 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12254 break;
12255 case E_V4HImode:
12256 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12257 break;
12258 case E_V8HImode:
12259 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12260 break;
12261 case E_V2SImode:
12262 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12263 break;
12264 case E_V4SImode:
12265 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12266 break;
12267 case E_V2SFmode:
12268 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12269 break;
12270 case E_V4SFmode:
12271 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12272 break;
12273 case E_V2DImode:
12274 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12275 break;
12276 default:
12277 gcc_unreachable ();
12279 return;
12282 /* Construct the vector in memory one field at a time
12283 and load the whole vector. */
12284 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12285 for (i = 0; i < n_elts; i++)
12286 emit_move_insn (adjust_address_nv (mem, inner_mode,
12287 i * GET_MODE_SIZE (inner_mode)),
12288 XVECEXP (vals, 0, i));
12289 emit_move_insn (target, mem);
12292 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12293 ERR if it doesn't. EXP indicates the source location, which includes the
12294 inlining history for intrinsics. */
12296 static void
12297 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12298 const_tree exp, const char *desc)
12300 HOST_WIDE_INT lane;
12302 gcc_assert (CONST_INT_P (operand));
12304 lane = INTVAL (operand);
12306 if (lane < low || lane >= high)
12308 if (exp)
12309 error ("%K%s %wd out of range %wd - %wd",
12310 exp, desc, lane, low, high - 1);
12311 else
12312 error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
12316 /* Bounds-check lanes. */
12318 void
12319 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12320 const_tree exp)
12322 bounds_check (operand, low, high, exp, "lane");
12325 /* Bounds-check constants. */
12327 void
12328 arm_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12330 bounds_check (operand, low, high, NULL_TREE, "constant");
12333 HOST_WIDE_INT
12334 neon_element_bits (machine_mode mode)
12336 return GET_MODE_UNIT_BITSIZE (mode);
12340 /* Predicates for `match_operand' and `match_operator'. */
12342 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12343 WB is true if full writeback address modes are allowed and is false
12344 if limited writeback address modes (POST_INC and PRE_DEC) are
12345 allowed. */
12348 arm_coproc_mem_operand (rtx op, bool wb)
12350 rtx ind;
12352 /* Reject eliminable registers. */
12353 if (! (reload_in_progress || reload_completed || lra_in_progress)
12354 && ( reg_mentioned_p (frame_pointer_rtx, op)
12355 || reg_mentioned_p (arg_pointer_rtx, op)
12356 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12357 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12358 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12359 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12360 return FALSE;
12362 /* Constants are converted into offsets from labels. */
12363 if (!MEM_P (op))
12364 return FALSE;
12366 ind = XEXP (op, 0);
12368 if (reload_completed
12369 && (GET_CODE (ind) == LABEL_REF
12370 || (GET_CODE (ind) == CONST
12371 && GET_CODE (XEXP (ind, 0)) == PLUS
12372 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12373 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12374 return TRUE;
12376 /* Match: (mem (reg)). */
12377 if (REG_P (ind))
12378 return arm_address_register_rtx_p (ind, 0);
12380 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12381 acceptable in any case (subject to verification by
12382 arm_address_register_rtx_p). We need WB to be true to accept
12383 PRE_INC and POST_DEC. */
12384 if (GET_CODE (ind) == POST_INC
12385 || GET_CODE (ind) == PRE_DEC
12386 || (wb
12387 && (GET_CODE (ind) == PRE_INC
12388 || GET_CODE (ind) == POST_DEC)))
12389 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12391 if (wb
12392 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12393 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12394 && GET_CODE (XEXP (ind, 1)) == PLUS
12395 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12396 ind = XEXP (ind, 1);
12398 /* Match:
12399 (plus (reg)
12400 (const)). */
12401 if (GET_CODE (ind) == PLUS
12402 && REG_P (XEXP (ind, 0))
12403 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12404 && CONST_INT_P (XEXP (ind, 1))
12405 && INTVAL (XEXP (ind, 1)) > -1024
12406 && INTVAL (XEXP (ind, 1)) < 1024
12407 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12408 return TRUE;
12410 return FALSE;
12413 /* Return TRUE if OP is a memory operand which we can load or store a vector
12414 to/from. TYPE is one of the following values:
12415 0 - Vector load/stor (vldr)
12416 1 - Core registers (ldm)
12417 2 - Element/structure loads (vld1)
12420 neon_vector_mem_operand (rtx op, int type, bool strict)
12422 rtx ind;
12424 /* Reject eliminable registers. */
12425 if (strict && ! (reload_in_progress || reload_completed)
12426 && (reg_mentioned_p (frame_pointer_rtx, op)
12427 || reg_mentioned_p (arg_pointer_rtx, op)
12428 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12429 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12430 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12431 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12432 return FALSE;
12434 /* Constants are converted into offsets from labels. */
12435 if (!MEM_P (op))
12436 return FALSE;
12438 ind = XEXP (op, 0);
12440 if (reload_completed
12441 && (GET_CODE (ind) == LABEL_REF
12442 || (GET_CODE (ind) == CONST
12443 && GET_CODE (XEXP (ind, 0)) == PLUS
12444 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12445 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12446 return TRUE;
12448 /* Match: (mem (reg)). */
12449 if (REG_P (ind))
12450 return arm_address_register_rtx_p (ind, 0);
12452 /* Allow post-increment with Neon registers. */
12453 if ((type != 1 && GET_CODE (ind) == POST_INC)
12454 || (type == 0 && GET_CODE (ind) == PRE_DEC))
12455 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12457 /* Allow post-increment by register for VLDn */
12458 if (type == 2 && GET_CODE (ind) == POST_MODIFY
12459 && GET_CODE (XEXP (ind, 1)) == PLUS
12460 && REG_P (XEXP (XEXP (ind, 1), 1)))
12461 return true;
12463 /* Match:
12464 (plus (reg)
12465 (const)). */
12466 if (type == 0
12467 && GET_CODE (ind) == PLUS
12468 && REG_P (XEXP (ind, 0))
12469 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12470 && CONST_INT_P (XEXP (ind, 1))
12471 && INTVAL (XEXP (ind, 1)) > -1024
12472 /* For quad modes, we restrict the constant offset to be slightly less
12473 than what the instruction format permits. We have no such constraint
12474 on double mode offsets. (This must match arm_legitimate_index_p.) */
12475 && (INTVAL (XEXP (ind, 1))
12476 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12477 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12478 return TRUE;
12480 return FALSE;
12483 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12484 type. */
12486 neon_struct_mem_operand (rtx op)
12488 rtx ind;
12490 /* Reject eliminable registers. */
12491 if (! (reload_in_progress || reload_completed)
12492 && ( reg_mentioned_p (frame_pointer_rtx, op)
12493 || reg_mentioned_p (arg_pointer_rtx, op)
12494 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12495 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12496 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12497 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12498 return FALSE;
12500 /* Constants are converted into offsets from labels. */
12501 if (!MEM_P (op))
12502 return FALSE;
12504 ind = XEXP (op, 0);
12506 if (reload_completed
12507 && (GET_CODE (ind) == LABEL_REF
12508 || (GET_CODE (ind) == CONST
12509 && GET_CODE (XEXP (ind, 0)) == PLUS
12510 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12511 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12512 return TRUE;
12514 /* Match: (mem (reg)). */
12515 if (REG_P (ind))
12516 return arm_address_register_rtx_p (ind, 0);
12518 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12519 if (GET_CODE (ind) == POST_INC
12520 || GET_CODE (ind) == PRE_DEC)
12521 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12523 return FALSE;
12526 /* Return true if X is a register that will be eliminated later on. */
12528 arm_eliminable_register (rtx x)
12530 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
12531 || REGNO (x) == ARG_POINTER_REGNUM
12532 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
12533 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
12536 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12537 coprocessor registers. Otherwise return NO_REGS. */
12539 enum reg_class
12540 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
12542 if (mode == HFmode)
12544 if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
12545 return GENERAL_REGS;
12546 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
12547 return NO_REGS;
12548 return GENERAL_REGS;
12551 /* The neon move patterns handle all legitimate vector and struct
12552 addresses. */
12553 if (TARGET_NEON
12554 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
12555 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
12556 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
12557 || VALID_NEON_STRUCT_MODE (mode)))
12558 return NO_REGS;
12560 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
12561 return NO_REGS;
12563 return GENERAL_REGS;
12566 /* Values which must be returned in the most-significant end of the return
12567 register. */
12569 static bool
12570 arm_return_in_msb (const_tree valtype)
12572 return (TARGET_AAPCS_BASED
12573 && BYTES_BIG_ENDIAN
12574 && (AGGREGATE_TYPE_P (valtype)
12575 || TREE_CODE (valtype) == COMPLEX_TYPE
12576 || FIXED_POINT_TYPE_P (valtype)));
12579 /* Return TRUE if X references a SYMBOL_REF. */
12581 symbol_mentioned_p (rtx x)
12583 const char * fmt;
12584 int i;
12586 if (GET_CODE (x) == SYMBOL_REF)
12587 return 1;
12589 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12590 are constant offsets, not symbols. */
12591 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12592 return 0;
12594 fmt = GET_RTX_FORMAT (GET_CODE (x));
12596 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12598 if (fmt[i] == 'E')
12600 int j;
12602 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12603 if (symbol_mentioned_p (XVECEXP (x, i, j)))
12604 return 1;
12606 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
12607 return 1;
12610 return 0;
12613 /* Return TRUE if X references a LABEL_REF. */
12615 label_mentioned_p (rtx x)
12617 const char * fmt;
12618 int i;
12620 if (GET_CODE (x) == LABEL_REF)
12621 return 1;
12623 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12624 instruction, but they are constant offsets, not symbols. */
12625 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12626 return 0;
12628 fmt = GET_RTX_FORMAT (GET_CODE (x));
12629 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12631 if (fmt[i] == 'E')
12633 int j;
12635 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12636 if (label_mentioned_p (XVECEXP (x, i, j)))
12637 return 1;
12639 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
12640 return 1;
12643 return 0;
12647 tls_mentioned_p (rtx x)
12649 switch (GET_CODE (x))
12651 case CONST:
12652 return tls_mentioned_p (XEXP (x, 0));
12654 case UNSPEC:
12655 if (XINT (x, 1) == UNSPEC_TLS)
12656 return 1;
12658 /* Fall through. */
12659 default:
12660 return 0;
12664 /* Must not copy any rtx that uses a pc-relative address.
12665 Also, disallow copying of load-exclusive instructions that
12666 may appear after splitting of compare-and-swap-style operations
12667 so as to prevent those loops from being transformed away from their
12668 canonical forms (see PR 69904). */
12670 static bool
12671 arm_cannot_copy_insn_p (rtx_insn *insn)
12673 /* The tls call insn cannot be copied, as it is paired with a data
12674 word. */
12675 if (recog_memoized (insn) == CODE_FOR_tlscall)
12676 return true;
12678 subrtx_iterator::array_type array;
12679 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
12681 const_rtx x = *iter;
12682 if (GET_CODE (x) == UNSPEC
12683 && (XINT (x, 1) == UNSPEC_PIC_BASE
12684 || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
12685 return true;
12688 rtx set = single_set (insn);
12689 if (set)
12691 rtx src = SET_SRC (set);
12692 if (GET_CODE (src) == ZERO_EXTEND)
12693 src = XEXP (src, 0);
12695 /* Catch the load-exclusive and load-acquire operations. */
12696 if (GET_CODE (src) == UNSPEC_VOLATILE
12697 && (XINT (src, 1) == VUNSPEC_LL
12698 || XINT (src, 1) == VUNSPEC_LAX))
12699 return true;
12701 return false;
12704 enum rtx_code
12705 minmax_code (rtx x)
12707 enum rtx_code code = GET_CODE (x);
12709 switch (code)
12711 case SMAX:
12712 return GE;
12713 case SMIN:
12714 return LE;
12715 case UMIN:
12716 return LEU;
12717 case UMAX:
12718 return GEU;
12719 default:
12720 gcc_unreachable ();
12724 /* Match pair of min/max operators that can be implemented via usat/ssat. */
12726 bool
12727 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
12728 int *mask, bool *signed_sat)
12730 /* The high bound must be a power of two minus one. */
12731 int log = exact_log2 (INTVAL (hi_bound) + 1);
12732 if (log == -1)
12733 return false;
12735 /* The low bound is either zero (for usat) or one less than the
12736 negation of the high bound (for ssat). */
12737 if (INTVAL (lo_bound) == 0)
12739 if (mask)
12740 *mask = log;
12741 if (signed_sat)
12742 *signed_sat = false;
12744 return true;
12747 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
12749 if (mask)
12750 *mask = log + 1;
12751 if (signed_sat)
12752 *signed_sat = true;
12754 return true;
12757 return false;
12760 /* Return 1 if memory locations are adjacent. */
12762 adjacent_mem_locations (rtx a, rtx b)
12764 /* We don't guarantee to preserve the order of these memory refs. */
12765 if (volatile_refs_p (a) || volatile_refs_p (b))
12766 return 0;
12768 if ((REG_P (XEXP (a, 0))
12769 || (GET_CODE (XEXP (a, 0)) == PLUS
12770 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
12771 && (REG_P (XEXP (b, 0))
12772 || (GET_CODE (XEXP (b, 0)) == PLUS
12773 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
12775 HOST_WIDE_INT val0 = 0, val1 = 0;
12776 rtx reg0, reg1;
12777 int val_diff;
12779 if (GET_CODE (XEXP (a, 0)) == PLUS)
12781 reg0 = XEXP (XEXP (a, 0), 0);
12782 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
12784 else
12785 reg0 = XEXP (a, 0);
12787 if (GET_CODE (XEXP (b, 0)) == PLUS)
12789 reg1 = XEXP (XEXP (b, 0), 0);
12790 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
12792 else
12793 reg1 = XEXP (b, 0);
12795 /* Don't accept any offset that will require multiple
12796 instructions to handle, since this would cause the
12797 arith_adjacentmem pattern to output an overlong sequence. */
12798 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
12799 return 0;
12801 /* Don't allow an eliminable register: register elimination can make
12802 the offset too large. */
12803 if (arm_eliminable_register (reg0))
12804 return 0;
12806 val_diff = val1 - val0;
12808 if (arm_ld_sched)
12810 /* If the target has load delay slots, then there's no benefit
12811 to using an ldm instruction unless the offset is zero and
12812 we are optimizing for size. */
12813 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
12814 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
12815 && (val_diff == 4 || val_diff == -4));
12818 return ((REGNO (reg0) == REGNO (reg1))
12819 && (val_diff == 4 || val_diff == -4));
12822 return 0;
12825 /* Return true if OP is a valid load or store multiple operation. LOAD is true
12826 for load operations, false for store operations. CONSECUTIVE is true
12827 if the register numbers in the operation must be consecutive in the register
12828 bank. RETURN_PC is true if value is to be loaded in PC.
12829 The pattern we are trying to match for load is:
12830 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
12831 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
12834 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
12836 where
12837 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
12838 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
12839 3. If consecutive is TRUE, then for kth register being loaded,
12840 REGNO (R_dk) = REGNO (R_d0) + k.
12841 The pattern for store is similar. */
12842 bool
12843 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
12844 bool consecutive, bool return_pc)
12846 HOST_WIDE_INT count = XVECLEN (op, 0);
12847 rtx reg, mem, addr;
12848 unsigned regno;
12849 unsigned first_regno;
12850 HOST_WIDE_INT i = 1, base = 0, offset = 0;
12851 rtx elt;
12852 bool addr_reg_in_reglist = false;
12853 bool update = false;
12854 int reg_increment;
12855 int offset_adj;
12856 int regs_per_val;
12858 /* If not in SImode, then registers must be consecutive
12859 (e.g., VLDM instructions for DFmode). */
12860 gcc_assert ((mode == SImode) || consecutive);
12861 /* Setting return_pc for stores is illegal. */
12862 gcc_assert (!return_pc || load);
12864 /* Set up the increments and the regs per val based on the mode. */
12865 reg_increment = GET_MODE_SIZE (mode);
12866 regs_per_val = reg_increment / 4;
12867 offset_adj = return_pc ? 1 : 0;
12869 if (count <= 1
12870 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
12871 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
12872 return false;
12874 /* Check if this is a write-back. */
12875 elt = XVECEXP (op, 0, offset_adj);
12876 if (GET_CODE (SET_SRC (elt)) == PLUS)
12878 i++;
12879 base = 1;
12880 update = true;
12882 /* The offset adjustment must be the number of registers being
12883 popped times the size of a single register. */
12884 if (!REG_P (SET_DEST (elt))
12885 || !REG_P (XEXP (SET_SRC (elt), 0))
12886 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
12887 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
12888 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
12889 ((count - 1 - offset_adj) * reg_increment))
12890 return false;
12893 i = i + offset_adj;
12894 base = base + offset_adj;
12895 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
12896 success depends on the type: VLDM can do just one reg,
12897 LDM must do at least two. */
12898 if ((count <= i) && (mode == SImode))
12899 return false;
12901 elt = XVECEXP (op, 0, i - 1);
12902 if (GET_CODE (elt) != SET)
12903 return false;
12905 if (load)
12907 reg = SET_DEST (elt);
12908 mem = SET_SRC (elt);
12910 else
12912 reg = SET_SRC (elt);
12913 mem = SET_DEST (elt);
12916 if (!REG_P (reg) || !MEM_P (mem))
12917 return false;
12919 regno = REGNO (reg);
12920 first_regno = regno;
12921 addr = XEXP (mem, 0);
12922 if (GET_CODE (addr) == PLUS)
12924 if (!CONST_INT_P (XEXP (addr, 1)))
12925 return false;
12927 offset = INTVAL (XEXP (addr, 1));
12928 addr = XEXP (addr, 0);
12931 if (!REG_P (addr))
12932 return false;
12934 /* Don't allow SP to be loaded unless it is also the base register. It
12935 guarantees that SP is reset correctly when an LDM instruction
12936 is interrupted. Otherwise, we might end up with a corrupt stack. */
12937 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
12938 return false;
12940 for (; i < count; i++)
12942 elt = XVECEXP (op, 0, i);
12943 if (GET_CODE (elt) != SET)
12944 return false;
12946 if (load)
12948 reg = SET_DEST (elt);
12949 mem = SET_SRC (elt);
12951 else
12953 reg = SET_SRC (elt);
12954 mem = SET_DEST (elt);
12957 if (!REG_P (reg)
12958 || GET_MODE (reg) != mode
12959 || REGNO (reg) <= regno
12960 || (consecutive
12961 && (REGNO (reg) !=
12962 (unsigned int) (first_regno + regs_per_val * (i - base))))
12963 /* Don't allow SP to be loaded unless it is also the base register. It
12964 guarantees that SP is reset correctly when an LDM instruction
12965 is interrupted. Otherwise, we might end up with a corrupt stack. */
12966 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
12967 || !MEM_P (mem)
12968 || GET_MODE (mem) != mode
12969 || ((GET_CODE (XEXP (mem, 0)) != PLUS
12970 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
12971 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
12972 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
12973 offset + (i - base) * reg_increment))
12974 && (!REG_P (XEXP (mem, 0))
12975 || offset + (i - base) * reg_increment != 0)))
12976 return false;
12978 regno = REGNO (reg);
12979 if (regno == REGNO (addr))
12980 addr_reg_in_reglist = true;
12983 if (load)
12985 if (update && addr_reg_in_reglist)
12986 return false;
12988 /* For Thumb-1, address register is always modified - either by write-back
12989 or by explicit load. If the pattern does not describe an update,
12990 then the address register must be in the list of loaded registers. */
12991 if (TARGET_THUMB1)
12992 return update || addr_reg_in_reglist;
12995 return true;
12998 /* Return true iff it would be profitable to turn a sequence of NOPS loads
12999 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13000 instruction. ADD_OFFSET is nonzero if the base address register needs
13001 to be modified with an add instruction before we can use it. */
13003 static bool
13004 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13005 int nops, HOST_WIDE_INT add_offset)
13007 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13008 if the offset isn't small enough. The reason 2 ldrs are faster
13009 is because these ARMs are able to do more than one cache access
13010 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13011 whilst the ARM8 has a double bandwidth cache. This means that
13012 these cores can do both an instruction fetch and a data fetch in
13013 a single cycle, so the trick of calculating the address into a
13014 scratch register (one of the result regs) and then doing a load
13015 multiple actually becomes slower (and no smaller in code size).
13016 That is the transformation
13018 ldr rd1, [rbase + offset]
13019 ldr rd2, [rbase + offset + 4]
13023 add rd1, rbase, offset
13024 ldmia rd1, {rd1, rd2}
13026 produces worse code -- '3 cycles + any stalls on rd2' instead of
13027 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13028 access per cycle, the first sequence could never complete in less
13029 than 6 cycles, whereas the ldm sequence would only take 5 and
13030 would make better use of sequential accesses if not hitting the
13031 cache.
13033 We cheat here and test 'arm_ld_sched' which we currently know to
13034 only be true for the ARM8, ARM9 and StrongARM. If this ever
13035 changes, then the test below needs to be reworked. */
13036 if (nops == 2 && arm_ld_sched && add_offset != 0)
13037 return false;
13039 /* XScale has load-store double instructions, but they have stricter
13040 alignment requirements than load-store multiple, so we cannot
13041 use them.
13043 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13044 the pipeline until completion.
13046 NREGS CYCLES
13052 An ldr instruction takes 1-3 cycles, but does not block the
13053 pipeline.
13055 NREGS CYCLES
13056 1 1-3
13057 2 2-6
13058 3 3-9
13059 4 4-12
13061 Best case ldr will always win. However, the more ldr instructions
13062 we issue, the less likely we are to be able to schedule them well.
13063 Using ldr instructions also increases code size.
13065 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13066 for counts of 3 or 4 regs. */
13067 if (nops <= 2 && arm_tune_xscale && !optimize_size)
13068 return false;
13069 return true;
13072 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13073 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13074 an array ORDER which describes the sequence to use when accessing the
13075 offsets that produces an ascending order. In this sequence, each
13076 offset must be larger by exactly 4 than the previous one. ORDER[0]
13077 must have been filled in with the lowest offset by the caller.
13078 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13079 we use to verify that ORDER produces an ascending order of registers.
13080 Return true if it was possible to construct such an order, false if
13081 not. */
13083 static bool
13084 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13085 int *unsorted_regs)
13087 int i;
13088 for (i = 1; i < nops; i++)
13090 int j;
13092 order[i] = order[i - 1];
13093 for (j = 0; j < nops; j++)
13094 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13096 /* We must find exactly one offset that is higher than the
13097 previous one by 4. */
13098 if (order[i] != order[i - 1])
13099 return false;
13100 order[i] = j;
13102 if (order[i] == order[i - 1])
13103 return false;
13104 /* The register numbers must be ascending. */
13105 if (unsorted_regs != NULL
13106 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13107 return false;
13109 return true;
13112 /* Used to determine in a peephole whether a sequence of load
13113 instructions can be changed into a load-multiple instruction.
13114 NOPS is the number of separate load instructions we are examining. The
13115 first NOPS entries in OPERANDS are the destination registers, the
13116 next NOPS entries are memory operands. If this function is
13117 successful, *BASE is set to the common base register of the memory
13118 accesses; *LOAD_OFFSET is set to the first memory location's offset
13119 from that base register.
13120 REGS is an array filled in with the destination register numbers.
13121 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13122 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13123 the sequence of registers in REGS matches the loads from ascending memory
13124 locations, and the function verifies that the register numbers are
13125 themselves ascending. If CHECK_REGS is false, the register numbers
13126 are stored in the order they are found in the operands. */
13127 static int
13128 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13129 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13131 int unsorted_regs[MAX_LDM_STM_OPS];
13132 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13133 int order[MAX_LDM_STM_OPS];
13134 rtx base_reg_rtx = NULL;
13135 int base_reg = -1;
13136 int i, ldm_case;
13138 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13139 easily extended if required. */
13140 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13142 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13144 /* Loop over the operands and check that the memory references are
13145 suitable (i.e. immediate offsets from the same base register). At
13146 the same time, extract the target register, and the memory
13147 offsets. */
13148 for (i = 0; i < nops; i++)
13150 rtx reg;
13151 rtx offset;
13153 /* Convert a subreg of a mem into the mem itself. */
13154 if (GET_CODE (operands[nops + i]) == SUBREG)
13155 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13157 gcc_assert (MEM_P (operands[nops + i]));
13159 /* Don't reorder volatile memory references; it doesn't seem worth
13160 looking for the case where the order is ok anyway. */
13161 if (MEM_VOLATILE_P (operands[nops + i]))
13162 return 0;
13164 offset = const0_rtx;
13166 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13167 || (GET_CODE (reg) == SUBREG
13168 && REG_P (reg = SUBREG_REG (reg))))
13169 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13170 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13171 || (GET_CODE (reg) == SUBREG
13172 && REG_P (reg = SUBREG_REG (reg))))
13173 && (CONST_INT_P (offset
13174 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13176 if (i == 0)
13178 base_reg = REGNO (reg);
13179 base_reg_rtx = reg;
13180 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13181 return 0;
13183 else if (base_reg != (int) REGNO (reg))
13184 /* Not addressed from the same base register. */
13185 return 0;
13187 unsorted_regs[i] = (REG_P (operands[i])
13188 ? REGNO (operands[i])
13189 : REGNO (SUBREG_REG (operands[i])));
13191 /* If it isn't an integer register, or if it overwrites the
13192 base register but isn't the last insn in the list, then
13193 we can't do this. */
13194 if (unsorted_regs[i] < 0
13195 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13196 || unsorted_regs[i] > 14
13197 || (i != nops - 1 && unsorted_regs[i] == base_reg))
13198 return 0;
13200 /* Don't allow SP to be loaded unless it is also the base
13201 register. It guarantees that SP is reset correctly when
13202 an LDM instruction is interrupted. Otherwise, we might
13203 end up with a corrupt stack. */
13204 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13205 return 0;
13207 unsorted_offsets[i] = INTVAL (offset);
13208 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13209 order[0] = i;
13211 else
13212 /* Not a suitable memory address. */
13213 return 0;
13216 /* All the useful information has now been extracted from the
13217 operands into unsorted_regs and unsorted_offsets; additionally,
13218 order[0] has been set to the lowest offset in the list. Sort
13219 the offsets into order, verifying that they are adjacent, and
13220 check that the register numbers are ascending. */
13221 if (!compute_offset_order (nops, unsorted_offsets, order,
13222 check_regs ? unsorted_regs : NULL))
13223 return 0;
13225 if (saved_order)
13226 memcpy (saved_order, order, sizeof order);
13228 if (base)
13230 *base = base_reg;
13232 for (i = 0; i < nops; i++)
13233 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13235 *load_offset = unsorted_offsets[order[0]];
13238 if (TARGET_THUMB1
13239 && !peep2_reg_dead_p (nops, base_reg_rtx))
13240 return 0;
13242 if (unsorted_offsets[order[0]] == 0)
13243 ldm_case = 1; /* ldmia */
13244 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13245 ldm_case = 2; /* ldmib */
13246 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13247 ldm_case = 3; /* ldmda */
13248 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13249 ldm_case = 4; /* ldmdb */
13250 else if (const_ok_for_arm (unsorted_offsets[order[0]])
13251 || const_ok_for_arm (-unsorted_offsets[order[0]]))
13252 ldm_case = 5;
13253 else
13254 return 0;
13256 if (!multiple_operation_profitable_p (false, nops,
13257 ldm_case == 5
13258 ? unsorted_offsets[order[0]] : 0))
13259 return 0;
13261 return ldm_case;
13264 /* Used to determine in a peephole whether a sequence of store instructions can
13265 be changed into a store-multiple instruction.
13266 NOPS is the number of separate store instructions we are examining.
13267 NOPS_TOTAL is the total number of instructions recognized by the peephole
13268 pattern.
13269 The first NOPS entries in OPERANDS are the source registers, the next
13270 NOPS entries are memory operands. If this function is successful, *BASE is
13271 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13272 to the first memory location's offset from that base register. REGS is an
13273 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13274 likewise filled with the corresponding rtx's.
13275 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13276 numbers to an ascending order of stores.
13277 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13278 from ascending memory locations, and the function verifies that the register
13279 numbers are themselves ascending. If CHECK_REGS is false, the register
13280 numbers are stored in the order they are found in the operands. */
13281 static int
13282 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13283 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13284 HOST_WIDE_INT *load_offset, bool check_regs)
13286 int unsorted_regs[MAX_LDM_STM_OPS];
13287 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13288 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13289 int order[MAX_LDM_STM_OPS];
13290 int base_reg = -1;
13291 rtx base_reg_rtx = NULL;
13292 int i, stm_case;
13294 /* Write back of base register is currently only supported for Thumb 1. */
13295 int base_writeback = TARGET_THUMB1;
13297 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13298 easily extended if required. */
13299 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13301 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13303 /* Loop over the operands and check that the memory references are
13304 suitable (i.e. immediate offsets from the same base register). At
13305 the same time, extract the target register, and the memory
13306 offsets. */
13307 for (i = 0; i < nops; i++)
13309 rtx reg;
13310 rtx offset;
13312 /* Convert a subreg of a mem into the mem itself. */
13313 if (GET_CODE (operands[nops + i]) == SUBREG)
13314 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13316 gcc_assert (MEM_P (operands[nops + i]));
13318 /* Don't reorder volatile memory references; it doesn't seem worth
13319 looking for the case where the order is ok anyway. */
13320 if (MEM_VOLATILE_P (operands[nops + i]))
13321 return 0;
13323 offset = const0_rtx;
13325 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13326 || (GET_CODE (reg) == SUBREG
13327 && REG_P (reg = SUBREG_REG (reg))))
13328 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13329 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13330 || (GET_CODE (reg) == SUBREG
13331 && REG_P (reg = SUBREG_REG (reg))))
13332 && (CONST_INT_P (offset
13333 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13335 unsorted_reg_rtxs[i] = (REG_P (operands[i])
13336 ? operands[i] : SUBREG_REG (operands[i]));
13337 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13339 if (i == 0)
13341 base_reg = REGNO (reg);
13342 base_reg_rtx = reg;
13343 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13344 return 0;
13346 else if (base_reg != (int) REGNO (reg))
13347 /* Not addressed from the same base register. */
13348 return 0;
13350 /* If it isn't an integer register, then we can't do this. */
13351 if (unsorted_regs[i] < 0
13352 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13353 /* The effects are unpredictable if the base register is
13354 both updated and stored. */
13355 || (base_writeback && unsorted_regs[i] == base_reg)
13356 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13357 || unsorted_regs[i] > 14)
13358 return 0;
13360 unsorted_offsets[i] = INTVAL (offset);
13361 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13362 order[0] = i;
13364 else
13365 /* Not a suitable memory address. */
13366 return 0;
13369 /* All the useful information has now been extracted from the
13370 operands into unsorted_regs and unsorted_offsets; additionally,
13371 order[0] has been set to the lowest offset in the list. Sort
13372 the offsets into order, verifying that they are adjacent, and
13373 check that the register numbers are ascending. */
13374 if (!compute_offset_order (nops, unsorted_offsets, order,
13375 check_regs ? unsorted_regs : NULL))
13376 return 0;
13378 if (saved_order)
13379 memcpy (saved_order, order, sizeof order);
13381 if (base)
13383 *base = base_reg;
13385 for (i = 0; i < nops; i++)
13387 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13388 if (reg_rtxs)
13389 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13392 *load_offset = unsorted_offsets[order[0]];
13395 if (TARGET_THUMB1
13396 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13397 return 0;
13399 if (unsorted_offsets[order[0]] == 0)
13400 stm_case = 1; /* stmia */
13401 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13402 stm_case = 2; /* stmib */
13403 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13404 stm_case = 3; /* stmda */
13405 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13406 stm_case = 4; /* stmdb */
13407 else
13408 return 0;
13410 if (!multiple_operation_profitable_p (false, nops, 0))
13411 return 0;
13413 return stm_case;
13416 /* Routines for use in generating RTL. */
13418 /* Generate a load-multiple instruction. COUNT is the number of loads in
13419 the instruction; REGS and MEMS are arrays containing the operands.
13420 BASEREG is the base register to be used in addressing the memory operands.
13421 WBACK_OFFSET is nonzero if the instruction should update the base
13422 register. */
13424 static rtx
13425 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13426 HOST_WIDE_INT wback_offset)
13428 int i = 0, j;
13429 rtx result;
13431 if (!multiple_operation_profitable_p (false, count, 0))
13433 rtx seq;
13435 start_sequence ();
13437 for (i = 0; i < count; i++)
13438 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13440 if (wback_offset != 0)
13441 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13443 seq = get_insns ();
13444 end_sequence ();
13446 return seq;
13449 result = gen_rtx_PARALLEL (VOIDmode,
13450 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13451 if (wback_offset != 0)
13453 XVECEXP (result, 0, 0)
13454 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13455 i = 1;
13456 count++;
13459 for (j = 0; i < count; i++, j++)
13460 XVECEXP (result, 0, i)
13461 = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
13463 return result;
13466 /* Generate a store-multiple instruction. COUNT is the number of stores in
13467 the instruction; REGS and MEMS are arrays containing the operands.
13468 BASEREG is the base register to be used in addressing the memory operands.
13469 WBACK_OFFSET is nonzero if the instruction should update the base
13470 register. */
13472 static rtx
13473 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13474 HOST_WIDE_INT wback_offset)
13476 int i = 0, j;
13477 rtx result;
13479 if (GET_CODE (basereg) == PLUS)
13480 basereg = XEXP (basereg, 0);
13482 if (!multiple_operation_profitable_p (false, count, 0))
13484 rtx seq;
13486 start_sequence ();
13488 for (i = 0; i < count; i++)
13489 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13491 if (wback_offset != 0)
13492 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13494 seq = get_insns ();
13495 end_sequence ();
13497 return seq;
13500 result = gen_rtx_PARALLEL (VOIDmode,
13501 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13502 if (wback_offset != 0)
13504 XVECEXP (result, 0, 0)
13505 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13506 i = 1;
13507 count++;
13510 for (j = 0; i < count; i++, j++)
13511 XVECEXP (result, 0, i)
13512 = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
13514 return result;
13517 /* Generate either a load-multiple or a store-multiple instruction. This
13518 function can be used in situations where we can start with a single MEM
13519 rtx and adjust its address upwards.
13520 COUNT is the number of operations in the instruction, not counting a
13521 possible update of the base register. REGS is an array containing the
13522 register operands.
13523 BASEREG is the base register to be used in addressing the memory operands,
13524 which are constructed from BASEMEM.
13525 WRITE_BACK specifies whether the generated instruction should include an
13526 update of the base register.
13527 OFFSETP is used to pass an offset to and from this function; this offset
13528 is not used when constructing the address (instead BASEMEM should have an
13529 appropriate offset in its address), it is used only for setting
13530 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13532 static rtx
13533 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
13534 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
13536 rtx mems[MAX_LDM_STM_OPS];
13537 HOST_WIDE_INT offset = *offsetp;
13538 int i;
13540 gcc_assert (count <= MAX_LDM_STM_OPS);
13542 if (GET_CODE (basereg) == PLUS)
13543 basereg = XEXP (basereg, 0);
13545 for (i = 0; i < count; i++)
13547 rtx addr = plus_constant (Pmode, basereg, i * 4);
13548 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
13549 offset += 4;
13552 if (write_back)
13553 *offsetp = offset;
13555 if (is_load)
13556 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
13557 write_back ? 4 * count : 0);
13558 else
13559 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
13560 write_back ? 4 * count : 0);
13564 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
13565 rtx basemem, HOST_WIDE_INT *offsetp)
13567 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
13568 offsetp);
13572 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
13573 rtx basemem, HOST_WIDE_INT *offsetp)
13575 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
13576 offsetp);
13579 /* Called from a peephole2 expander to turn a sequence of loads into an
13580 LDM instruction. OPERANDS are the operands found by the peephole matcher;
13581 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
13582 is true if we can reorder the registers because they are used commutatively
13583 subsequently.
13584 Returns true iff we could generate a new instruction. */
13586 bool
13587 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
13589 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13590 rtx mems[MAX_LDM_STM_OPS];
13591 int i, j, base_reg;
13592 rtx base_reg_rtx;
13593 HOST_WIDE_INT offset;
13594 int write_back = FALSE;
13595 int ldm_case;
13596 rtx addr;
13598 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
13599 &base_reg, &offset, !sort_regs);
13601 if (ldm_case == 0)
13602 return false;
13604 if (sort_regs)
13605 for (i = 0; i < nops - 1; i++)
13606 for (j = i + 1; j < nops; j++)
13607 if (regs[i] > regs[j])
13609 int t = regs[i];
13610 regs[i] = regs[j];
13611 regs[j] = t;
13613 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13615 if (TARGET_THUMB1)
13617 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
13618 gcc_assert (ldm_case == 1 || ldm_case == 5);
13619 write_back = TRUE;
13622 if (ldm_case == 5)
13624 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
13625 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
13626 offset = 0;
13627 if (!TARGET_THUMB1)
13628 base_reg_rtx = newbase;
13631 for (i = 0; i < nops; i++)
13633 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13634 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13635 SImode, addr, 0);
13637 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
13638 write_back ? offset + i * 4 : 0));
13639 return true;
13642 /* Called from a peephole2 expander to turn a sequence of stores into an
13643 STM instruction. OPERANDS are the operands found by the peephole matcher;
13644 NOPS indicates how many separate stores we are trying to combine.
13645 Returns true iff we could generate a new instruction. */
13647 bool
13648 gen_stm_seq (rtx *operands, int nops)
13650 int i;
13651 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13652 rtx mems[MAX_LDM_STM_OPS];
13653 int base_reg;
13654 rtx base_reg_rtx;
13655 HOST_WIDE_INT offset;
13656 int write_back = FALSE;
13657 int stm_case;
13658 rtx addr;
13659 bool base_reg_dies;
13661 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
13662 mem_order, &base_reg, &offset, true);
13664 if (stm_case == 0)
13665 return false;
13667 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13669 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
13670 if (TARGET_THUMB1)
13672 gcc_assert (base_reg_dies);
13673 write_back = TRUE;
13676 if (stm_case == 5)
13678 gcc_assert (base_reg_dies);
13679 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13680 offset = 0;
13683 addr = plus_constant (Pmode, base_reg_rtx, offset);
13685 for (i = 0; i < nops; i++)
13687 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13688 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13689 SImode, addr, 0);
13691 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
13692 write_back ? offset + i * 4 : 0));
13693 return true;
13696 /* Called from a peephole2 expander to turn a sequence of stores that are
13697 preceded by constant loads into an STM instruction. OPERANDS are the
13698 operands found by the peephole matcher; NOPS indicates how many
13699 separate stores we are trying to combine; there are 2 * NOPS
13700 instructions in the peephole.
13701 Returns true iff we could generate a new instruction. */
13703 bool
13704 gen_const_stm_seq (rtx *operands, int nops)
13706 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
13707 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13708 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
13709 rtx mems[MAX_LDM_STM_OPS];
13710 int base_reg;
13711 rtx base_reg_rtx;
13712 HOST_WIDE_INT offset;
13713 int write_back = FALSE;
13714 int stm_case;
13715 rtx addr;
13716 bool base_reg_dies;
13717 int i, j;
13718 HARD_REG_SET allocated;
13720 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
13721 mem_order, &base_reg, &offset, false);
13723 if (stm_case == 0)
13724 return false;
13726 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
13728 /* If the same register is used more than once, try to find a free
13729 register. */
13730 CLEAR_HARD_REG_SET (allocated);
13731 for (i = 0; i < nops; i++)
13733 for (j = i + 1; j < nops; j++)
13734 if (regs[i] == regs[j])
13736 rtx t = peep2_find_free_register (0, nops * 2,
13737 TARGET_THUMB1 ? "l" : "r",
13738 SImode, &allocated);
13739 if (t == NULL_RTX)
13740 return false;
13741 reg_rtxs[i] = t;
13742 regs[i] = REGNO (t);
13746 /* Compute an ordering that maps the register numbers to an ascending
13747 sequence. */
13748 reg_order[0] = 0;
13749 for (i = 0; i < nops; i++)
13750 if (regs[i] < regs[reg_order[0]])
13751 reg_order[0] = i;
13753 for (i = 1; i < nops; i++)
13755 int this_order = reg_order[i - 1];
13756 for (j = 0; j < nops; j++)
13757 if (regs[j] > regs[reg_order[i - 1]]
13758 && (this_order == reg_order[i - 1]
13759 || regs[j] < regs[this_order]))
13760 this_order = j;
13761 reg_order[i] = this_order;
13764 /* Ensure that registers that must be live after the instruction end
13765 up with the correct value. */
13766 for (i = 0; i < nops; i++)
13768 int this_order = reg_order[i];
13769 if ((this_order != mem_order[i]
13770 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
13771 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
13772 return false;
13775 /* Load the constants. */
13776 for (i = 0; i < nops; i++)
13778 rtx op = operands[2 * nops + mem_order[i]];
13779 sorted_regs[i] = regs[reg_order[i]];
13780 emit_move_insn (reg_rtxs[reg_order[i]], op);
13783 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13785 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
13786 if (TARGET_THUMB1)
13788 gcc_assert (base_reg_dies);
13789 write_back = TRUE;
13792 if (stm_case == 5)
13794 gcc_assert (base_reg_dies);
13795 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13796 offset = 0;
13799 addr = plus_constant (Pmode, base_reg_rtx, offset);
13801 for (i = 0; i < nops; i++)
13803 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13804 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13805 SImode, addr, 0);
13807 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
13808 write_back ? offset + i * 4 : 0));
13809 return true;
13812 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
13813 unaligned copies on processors which support unaligned semantics for those
13814 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
13815 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
13816 An interleave factor of 1 (the minimum) will perform no interleaving.
13817 Load/store multiple are used for aligned addresses where possible. */
13819 static void
13820 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
13821 HOST_WIDE_INT length,
13822 unsigned int interleave_factor)
13824 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
13825 int *regnos = XALLOCAVEC (int, interleave_factor);
13826 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
13827 HOST_WIDE_INT i, j;
13828 HOST_WIDE_INT remaining = length, words;
13829 rtx halfword_tmp = NULL, byte_tmp = NULL;
13830 rtx dst, src;
13831 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
13832 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
13833 HOST_WIDE_INT srcoffset, dstoffset;
13834 HOST_WIDE_INT src_autoinc, dst_autoinc;
13835 rtx mem, addr;
13837 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
13839 /* Use hard registers if we have aligned source or destination so we can use
13840 load/store multiple with contiguous registers. */
13841 if (dst_aligned || src_aligned)
13842 for (i = 0; i < interleave_factor; i++)
13843 regs[i] = gen_rtx_REG (SImode, i);
13844 else
13845 for (i = 0; i < interleave_factor; i++)
13846 regs[i] = gen_reg_rtx (SImode);
13848 dst = copy_addr_to_reg (XEXP (dstbase, 0));
13849 src = copy_addr_to_reg (XEXP (srcbase, 0));
13851 srcoffset = dstoffset = 0;
13853 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
13854 For copying the last bytes we want to subtract this offset again. */
13855 src_autoinc = dst_autoinc = 0;
13857 for (i = 0; i < interleave_factor; i++)
13858 regnos[i] = i;
13860 /* Copy BLOCK_SIZE_BYTES chunks. */
13862 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
13864 /* Load words. */
13865 if (src_aligned && interleave_factor > 1)
13867 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
13868 TRUE, srcbase, &srcoffset));
13869 src_autoinc += UNITS_PER_WORD * interleave_factor;
13871 else
13873 for (j = 0; j < interleave_factor; j++)
13875 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
13876 - src_autoinc));
13877 mem = adjust_automodify_address (srcbase, SImode, addr,
13878 srcoffset + j * UNITS_PER_WORD);
13879 emit_insn (gen_unaligned_loadsi (regs[j], mem));
13881 srcoffset += block_size_bytes;
13884 /* Store words. */
13885 if (dst_aligned && interleave_factor > 1)
13887 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
13888 TRUE, dstbase, &dstoffset));
13889 dst_autoinc += UNITS_PER_WORD * interleave_factor;
13891 else
13893 for (j = 0; j < interleave_factor; j++)
13895 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
13896 - dst_autoinc));
13897 mem = adjust_automodify_address (dstbase, SImode, addr,
13898 dstoffset + j * UNITS_PER_WORD);
13899 emit_insn (gen_unaligned_storesi (mem, regs[j]));
13901 dstoffset += block_size_bytes;
13904 remaining -= block_size_bytes;
13907 /* Copy any whole words left (note these aren't interleaved with any
13908 subsequent halfword/byte load/stores in the interests of simplicity). */
13910 words = remaining / UNITS_PER_WORD;
13912 gcc_assert (words < interleave_factor);
13914 if (src_aligned && words > 1)
13916 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
13917 &srcoffset));
13918 src_autoinc += UNITS_PER_WORD * words;
13920 else
13922 for (j = 0; j < words; j++)
13924 addr = plus_constant (Pmode, src,
13925 srcoffset + j * UNITS_PER_WORD - src_autoinc);
13926 mem = adjust_automodify_address (srcbase, SImode, addr,
13927 srcoffset + j * UNITS_PER_WORD);
13928 if (src_aligned)
13929 emit_move_insn (regs[j], mem);
13930 else
13931 emit_insn (gen_unaligned_loadsi (regs[j], mem));
13933 srcoffset += words * UNITS_PER_WORD;
13936 if (dst_aligned && words > 1)
13938 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
13939 &dstoffset));
13940 dst_autoinc += words * UNITS_PER_WORD;
13942 else
13944 for (j = 0; j < words; j++)
13946 addr = plus_constant (Pmode, dst,
13947 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
13948 mem = adjust_automodify_address (dstbase, SImode, addr,
13949 dstoffset + j * UNITS_PER_WORD);
13950 if (dst_aligned)
13951 emit_move_insn (mem, regs[j]);
13952 else
13953 emit_insn (gen_unaligned_storesi (mem, regs[j]));
13955 dstoffset += words * UNITS_PER_WORD;
13958 remaining -= words * UNITS_PER_WORD;
13960 gcc_assert (remaining < 4);
13962 /* Copy a halfword if necessary. */
13964 if (remaining >= 2)
13966 halfword_tmp = gen_reg_rtx (SImode);
13968 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
13969 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
13970 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
13972 /* Either write out immediately, or delay until we've loaded the last
13973 byte, depending on interleave factor. */
13974 if (interleave_factor == 1)
13976 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
13977 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
13978 emit_insn (gen_unaligned_storehi (mem,
13979 gen_lowpart (HImode, halfword_tmp)));
13980 halfword_tmp = NULL;
13981 dstoffset += 2;
13984 remaining -= 2;
13985 srcoffset += 2;
13988 gcc_assert (remaining < 2);
13990 /* Copy last byte. */
13992 if ((remaining & 1) != 0)
13994 byte_tmp = gen_reg_rtx (SImode);
13996 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
13997 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
13998 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14000 if (interleave_factor == 1)
14002 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14003 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14004 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14005 byte_tmp = NULL;
14006 dstoffset++;
14009 remaining--;
14010 srcoffset++;
14013 /* Store last halfword if we haven't done so already. */
14015 if (halfword_tmp)
14017 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14018 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14019 emit_insn (gen_unaligned_storehi (mem,
14020 gen_lowpart (HImode, halfword_tmp)));
14021 dstoffset += 2;
14024 /* Likewise for last byte. */
14026 if (byte_tmp)
14028 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14029 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14030 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14031 dstoffset++;
14034 gcc_assert (remaining == 0 && srcoffset == dstoffset);
14037 /* From mips_adjust_block_mem:
14039 Helper function for doing a loop-based block operation on memory
14040 reference MEM. Each iteration of the loop will operate on LENGTH
14041 bytes of MEM.
14043 Create a new base register for use within the loop and point it to
14044 the start of MEM. Create a new memory reference that uses this
14045 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14047 static void
14048 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14049 rtx *loop_mem)
14051 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14053 /* Although the new mem does not refer to a known location,
14054 it does keep up to LENGTH bytes of alignment. */
14055 *loop_mem = change_address (mem, BLKmode, *loop_reg);
14056 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14059 /* From mips_block_move_loop:
14061 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14062 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14063 the memory regions do not overlap. */
14065 static void
14066 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14067 unsigned int interleave_factor,
14068 HOST_WIDE_INT bytes_per_iter)
14070 rtx src_reg, dest_reg, final_src, test;
14071 HOST_WIDE_INT leftover;
14073 leftover = length % bytes_per_iter;
14074 length -= leftover;
14076 /* Create registers and memory references for use within the loop. */
14077 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14078 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14080 /* Calculate the value that SRC_REG should have after the last iteration of
14081 the loop. */
14082 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14083 0, 0, OPTAB_WIDEN);
14085 /* Emit the start of the loop. */
14086 rtx_code_label *label = gen_label_rtx ();
14087 emit_label (label);
14089 /* Emit the loop body. */
14090 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14091 interleave_factor);
14093 /* Move on to the next block. */
14094 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14095 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14097 /* Emit the loop condition. */
14098 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14099 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14101 /* Mop up any left-over bytes. */
14102 if (leftover)
14103 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14106 /* Emit a block move when either the source or destination is unaligned (not
14107 aligned to a four-byte boundary). This may need further tuning depending on
14108 core type, optimize_size setting, etc. */
14110 static int
14111 arm_movmemqi_unaligned (rtx *operands)
14113 HOST_WIDE_INT length = INTVAL (operands[2]);
14115 if (optimize_size)
14117 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14118 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14119 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14120 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14121 or dst_aligned though: allow more interleaving in those cases since the
14122 resulting code can be smaller. */
14123 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14124 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14126 if (length > 12)
14127 arm_block_move_unaligned_loop (operands[0], operands[1], length,
14128 interleave_factor, bytes_per_iter);
14129 else
14130 arm_block_move_unaligned_straight (operands[0], operands[1], length,
14131 interleave_factor);
14133 else
14135 /* Note that the loop created by arm_block_move_unaligned_loop may be
14136 subject to loop unrolling, which makes tuning this condition a little
14137 redundant. */
14138 if (length > 32)
14139 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14140 else
14141 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14144 return 1;
14148 arm_gen_movmemqi (rtx *operands)
14150 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14151 HOST_WIDE_INT srcoffset, dstoffset;
14152 rtx src, dst, srcbase, dstbase;
14153 rtx part_bytes_reg = NULL;
14154 rtx mem;
14156 if (!CONST_INT_P (operands[2])
14157 || !CONST_INT_P (operands[3])
14158 || INTVAL (operands[2]) > 64)
14159 return 0;
14161 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14162 return arm_movmemqi_unaligned (operands);
14164 if (INTVAL (operands[3]) & 3)
14165 return 0;
14167 dstbase = operands[0];
14168 srcbase = operands[1];
14170 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14171 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14173 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14174 out_words_to_go = INTVAL (operands[2]) / 4;
14175 last_bytes = INTVAL (operands[2]) & 3;
14176 dstoffset = srcoffset = 0;
14178 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14179 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14181 while (in_words_to_go >= 2)
14183 if (in_words_to_go > 4)
14184 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14185 TRUE, srcbase, &srcoffset));
14186 else
14187 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14188 src, FALSE, srcbase,
14189 &srcoffset));
14191 if (out_words_to_go)
14193 if (out_words_to_go > 4)
14194 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14195 TRUE, dstbase, &dstoffset));
14196 else if (out_words_to_go != 1)
14197 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14198 out_words_to_go, dst,
14199 (last_bytes == 0
14200 ? FALSE : TRUE),
14201 dstbase, &dstoffset));
14202 else
14204 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14205 emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
14206 if (last_bytes != 0)
14208 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14209 dstoffset += 4;
14214 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14215 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14218 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14219 if (out_words_to_go)
14221 rtx sreg;
14223 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14224 sreg = copy_to_reg (mem);
14226 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14227 emit_move_insn (mem, sreg);
14228 in_words_to_go--;
14230 gcc_assert (!in_words_to_go); /* Sanity check */
14233 if (in_words_to_go)
14235 gcc_assert (in_words_to_go > 0);
14237 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14238 part_bytes_reg = copy_to_mode_reg (SImode, mem);
14241 gcc_assert (!last_bytes || part_bytes_reg);
14243 if (BYTES_BIG_ENDIAN && last_bytes)
14245 rtx tmp = gen_reg_rtx (SImode);
14247 /* The bytes we want are in the top end of the word. */
14248 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14249 GEN_INT (8 * (4 - last_bytes))));
14250 part_bytes_reg = tmp;
14252 while (last_bytes)
14254 mem = adjust_automodify_address (dstbase, QImode,
14255 plus_constant (Pmode, dst,
14256 last_bytes - 1),
14257 dstoffset + last_bytes - 1);
14258 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14260 if (--last_bytes)
14262 tmp = gen_reg_rtx (SImode);
14263 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14264 part_bytes_reg = tmp;
14269 else
14271 if (last_bytes > 1)
14273 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14274 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14275 last_bytes -= 2;
14276 if (last_bytes)
14278 rtx tmp = gen_reg_rtx (SImode);
14279 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14280 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14281 part_bytes_reg = tmp;
14282 dstoffset += 2;
14286 if (last_bytes)
14288 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14289 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14293 return 1;
14296 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14297 by mode size. */
14298 inline static rtx
14299 next_consecutive_mem (rtx mem)
14301 machine_mode mode = GET_MODE (mem);
14302 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14303 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14305 return adjust_automodify_address (mem, mode, addr, offset);
14308 /* Copy using LDRD/STRD instructions whenever possible.
14309 Returns true upon success. */
14310 bool
14311 gen_movmem_ldrd_strd (rtx *operands)
14313 unsigned HOST_WIDE_INT len;
14314 HOST_WIDE_INT align;
14315 rtx src, dst, base;
14316 rtx reg0;
14317 bool src_aligned, dst_aligned;
14318 bool src_volatile, dst_volatile;
14320 gcc_assert (CONST_INT_P (operands[2]));
14321 gcc_assert (CONST_INT_P (operands[3]));
14323 len = UINTVAL (operands[2]);
14324 if (len > 64)
14325 return false;
14327 /* Maximum alignment we can assume for both src and dst buffers. */
14328 align = INTVAL (operands[3]);
14330 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14331 return false;
14333 /* Place src and dst addresses in registers
14334 and update the corresponding mem rtx. */
14335 dst = operands[0];
14336 dst_volatile = MEM_VOLATILE_P (dst);
14337 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14338 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14339 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14341 src = operands[1];
14342 src_volatile = MEM_VOLATILE_P (src);
14343 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14344 base = copy_to_mode_reg (SImode, XEXP (src, 0));
14345 src = adjust_automodify_address (src, VOIDmode, base, 0);
14347 if (!unaligned_access && !(src_aligned && dst_aligned))
14348 return false;
14350 if (src_volatile || dst_volatile)
14351 return false;
14353 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14354 if (!(dst_aligned || src_aligned))
14355 return arm_gen_movmemqi (operands);
14357 /* If the either src or dst is unaligned we'll be accessing it as pairs
14358 of unaligned SImode accesses. Otherwise we can generate DImode
14359 ldrd/strd instructions. */
14360 src = adjust_address (src, src_aligned ? DImode : SImode, 0);
14361 dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
14363 while (len >= 8)
14365 len -= 8;
14366 reg0 = gen_reg_rtx (DImode);
14367 rtx low_reg = NULL_RTX;
14368 rtx hi_reg = NULL_RTX;
14370 if (!src_aligned || !dst_aligned)
14372 low_reg = gen_lowpart (SImode, reg0);
14373 hi_reg = gen_highpart_mode (SImode, DImode, reg0);
14375 if (src_aligned)
14376 emit_move_insn (reg0, src);
14377 else
14379 emit_insn (gen_unaligned_loadsi (low_reg, src));
14380 src = next_consecutive_mem (src);
14381 emit_insn (gen_unaligned_loadsi (hi_reg, src));
14384 if (dst_aligned)
14385 emit_move_insn (dst, reg0);
14386 else
14388 emit_insn (gen_unaligned_storesi (dst, low_reg));
14389 dst = next_consecutive_mem (dst);
14390 emit_insn (gen_unaligned_storesi (dst, hi_reg));
14393 src = next_consecutive_mem (src);
14394 dst = next_consecutive_mem (dst);
14397 gcc_assert (len < 8);
14398 if (len >= 4)
14400 /* More than a word but less than a double-word to copy. Copy a word. */
14401 reg0 = gen_reg_rtx (SImode);
14402 src = adjust_address (src, SImode, 0);
14403 dst = adjust_address (dst, SImode, 0);
14404 if (src_aligned)
14405 emit_move_insn (reg0, src);
14406 else
14407 emit_insn (gen_unaligned_loadsi (reg0, src));
14409 if (dst_aligned)
14410 emit_move_insn (dst, reg0);
14411 else
14412 emit_insn (gen_unaligned_storesi (dst, reg0));
14414 src = next_consecutive_mem (src);
14415 dst = next_consecutive_mem (dst);
14416 len -= 4;
14419 if (len == 0)
14420 return true;
14422 /* Copy the remaining bytes. */
14423 if (len >= 2)
14425 dst = adjust_address (dst, HImode, 0);
14426 src = adjust_address (src, HImode, 0);
14427 reg0 = gen_reg_rtx (SImode);
14428 if (src_aligned)
14429 emit_insn (gen_zero_extendhisi2 (reg0, src));
14430 else
14431 emit_insn (gen_unaligned_loadhiu (reg0, src));
14433 if (dst_aligned)
14434 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14435 else
14436 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14438 src = next_consecutive_mem (src);
14439 dst = next_consecutive_mem (dst);
14440 if (len == 2)
14441 return true;
14444 dst = adjust_address (dst, QImode, 0);
14445 src = adjust_address (src, QImode, 0);
14446 reg0 = gen_reg_rtx (QImode);
14447 emit_move_insn (reg0, src);
14448 emit_move_insn (dst, reg0);
14449 return true;
14452 /* Select a dominance comparison mode if possible for a test of the general
14453 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14454 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14455 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14456 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14457 In all cases OP will be either EQ or NE, but we don't need to know which
14458 here. If we are unable to support a dominance comparison we return
14459 CC mode. This will then fail to match for the RTL expressions that
14460 generate this call. */
14461 machine_mode
14462 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14464 enum rtx_code cond1, cond2;
14465 int swapped = 0;
14467 /* Currently we will probably get the wrong result if the individual
14468 comparisons are not simple. This also ensures that it is safe to
14469 reverse a comparison if necessary. */
14470 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14471 != CCmode)
14472 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14473 != CCmode))
14474 return CCmode;
14476 /* The if_then_else variant of this tests the second condition if the
14477 first passes, but is true if the first fails. Reverse the first
14478 condition to get a true "inclusive-or" expression. */
14479 if (cond_or == DOM_CC_NX_OR_Y)
14480 cond1 = reverse_condition (cond1);
14482 /* If the comparisons are not equal, and one doesn't dominate the other,
14483 then we can't do this. */
14484 if (cond1 != cond2
14485 && !comparison_dominates_p (cond1, cond2)
14486 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14487 return CCmode;
14489 if (swapped)
14490 std::swap (cond1, cond2);
14492 switch (cond1)
14494 case EQ:
14495 if (cond_or == DOM_CC_X_AND_Y)
14496 return CC_DEQmode;
14498 switch (cond2)
14500 case EQ: return CC_DEQmode;
14501 case LE: return CC_DLEmode;
14502 case LEU: return CC_DLEUmode;
14503 case GE: return CC_DGEmode;
14504 case GEU: return CC_DGEUmode;
14505 default: gcc_unreachable ();
14508 case LT:
14509 if (cond_or == DOM_CC_X_AND_Y)
14510 return CC_DLTmode;
14512 switch (cond2)
14514 case LT:
14515 return CC_DLTmode;
14516 case LE:
14517 return CC_DLEmode;
14518 case NE:
14519 return CC_DNEmode;
14520 default:
14521 gcc_unreachable ();
14524 case GT:
14525 if (cond_or == DOM_CC_X_AND_Y)
14526 return CC_DGTmode;
14528 switch (cond2)
14530 case GT:
14531 return CC_DGTmode;
14532 case GE:
14533 return CC_DGEmode;
14534 case NE:
14535 return CC_DNEmode;
14536 default:
14537 gcc_unreachable ();
14540 case LTU:
14541 if (cond_or == DOM_CC_X_AND_Y)
14542 return CC_DLTUmode;
14544 switch (cond2)
14546 case LTU:
14547 return CC_DLTUmode;
14548 case LEU:
14549 return CC_DLEUmode;
14550 case NE:
14551 return CC_DNEmode;
14552 default:
14553 gcc_unreachable ();
14556 case GTU:
14557 if (cond_or == DOM_CC_X_AND_Y)
14558 return CC_DGTUmode;
14560 switch (cond2)
14562 case GTU:
14563 return CC_DGTUmode;
14564 case GEU:
14565 return CC_DGEUmode;
14566 case NE:
14567 return CC_DNEmode;
14568 default:
14569 gcc_unreachable ();
14572 /* The remaining cases only occur when both comparisons are the
14573 same. */
14574 case NE:
14575 gcc_assert (cond1 == cond2);
14576 return CC_DNEmode;
14578 case LE:
14579 gcc_assert (cond1 == cond2);
14580 return CC_DLEmode;
14582 case GE:
14583 gcc_assert (cond1 == cond2);
14584 return CC_DGEmode;
14586 case LEU:
14587 gcc_assert (cond1 == cond2);
14588 return CC_DLEUmode;
14590 case GEU:
14591 gcc_assert (cond1 == cond2);
14592 return CC_DGEUmode;
14594 default:
14595 gcc_unreachable ();
14599 machine_mode
14600 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
14602 /* All floating point compares return CCFP if it is an equality
14603 comparison, and CCFPE otherwise. */
14604 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14606 switch (op)
14608 case EQ:
14609 case NE:
14610 case UNORDERED:
14611 case ORDERED:
14612 case UNLT:
14613 case UNLE:
14614 case UNGT:
14615 case UNGE:
14616 case UNEQ:
14617 case LTGT:
14618 return CCFPmode;
14620 case LT:
14621 case LE:
14622 case GT:
14623 case GE:
14624 return CCFPEmode;
14626 default:
14627 gcc_unreachable ();
14631 /* A compare with a shifted operand. Because of canonicalization, the
14632 comparison will have to be swapped when we emit the assembler. */
14633 if (GET_MODE (y) == SImode
14634 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14635 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14636 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
14637 || GET_CODE (x) == ROTATERT))
14638 return CC_SWPmode;
14640 /* This operation is performed swapped, but since we only rely on the Z
14641 flag we don't need an additional mode. */
14642 if (GET_MODE (y) == SImode
14643 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14644 && GET_CODE (x) == NEG
14645 && (op == EQ || op == NE))
14646 return CC_Zmode;
14648 /* This is a special case that is used by combine to allow a
14649 comparison of a shifted byte load to be split into a zero-extend
14650 followed by a comparison of the shifted integer (only valid for
14651 equalities and unsigned inequalities). */
14652 if (GET_MODE (x) == SImode
14653 && GET_CODE (x) == ASHIFT
14654 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
14655 && GET_CODE (XEXP (x, 0)) == SUBREG
14656 && MEM_P (SUBREG_REG (XEXP (x, 0)))
14657 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
14658 && (op == EQ || op == NE
14659 || op == GEU || op == GTU || op == LTU || op == LEU)
14660 && CONST_INT_P (y))
14661 return CC_Zmode;
14663 /* A construct for a conditional compare, if the false arm contains
14664 0, then both conditions must be true, otherwise either condition
14665 must be true. Not all conditions are possible, so CCmode is
14666 returned if it can't be done. */
14667 if (GET_CODE (x) == IF_THEN_ELSE
14668 && (XEXP (x, 2) == const0_rtx
14669 || XEXP (x, 2) == const1_rtx)
14670 && COMPARISON_P (XEXP (x, 0))
14671 && COMPARISON_P (XEXP (x, 1)))
14672 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14673 INTVAL (XEXP (x, 2)));
14675 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
14676 if (GET_CODE (x) == AND
14677 && (op == EQ || op == NE)
14678 && COMPARISON_P (XEXP (x, 0))
14679 && COMPARISON_P (XEXP (x, 1)))
14680 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14681 DOM_CC_X_AND_Y);
14683 if (GET_CODE (x) == IOR
14684 && (op == EQ || op == NE)
14685 && COMPARISON_P (XEXP (x, 0))
14686 && COMPARISON_P (XEXP (x, 1)))
14687 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14688 DOM_CC_X_OR_Y);
14690 /* An operation (on Thumb) where we want to test for a single bit.
14691 This is done by shifting that bit up into the top bit of a
14692 scratch register; we can then branch on the sign bit. */
14693 if (TARGET_THUMB1
14694 && GET_MODE (x) == SImode
14695 && (op == EQ || op == NE)
14696 && GET_CODE (x) == ZERO_EXTRACT
14697 && XEXP (x, 1) == const1_rtx)
14698 return CC_Nmode;
14700 /* An operation that sets the condition codes as a side-effect, the
14701 V flag is not set correctly, so we can only use comparisons where
14702 this doesn't matter. (For LT and GE we can use "mi" and "pl"
14703 instead.) */
14704 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
14705 if (GET_MODE (x) == SImode
14706 && y == const0_rtx
14707 && (op == EQ || op == NE || op == LT || op == GE)
14708 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
14709 || GET_CODE (x) == AND || GET_CODE (x) == IOR
14710 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
14711 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
14712 || GET_CODE (x) == LSHIFTRT
14713 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14714 || GET_CODE (x) == ROTATERT
14715 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
14716 return CC_NOOVmode;
14718 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
14719 return CC_Zmode;
14721 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
14722 && GET_CODE (x) == PLUS
14723 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
14724 return CC_Cmode;
14726 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
14728 switch (op)
14730 case EQ:
14731 case NE:
14732 /* A DImode comparison against zero can be implemented by
14733 or'ing the two halves together. */
14734 if (y == const0_rtx)
14735 return CC_Zmode;
14737 /* We can do an equality test in three Thumb instructions. */
14738 if (!TARGET_32BIT)
14739 return CC_Zmode;
14741 /* FALLTHROUGH */
14743 case LTU:
14744 case LEU:
14745 case GTU:
14746 case GEU:
14747 /* DImode unsigned comparisons can be implemented by cmp +
14748 cmpeq without a scratch register. Not worth doing in
14749 Thumb-2. */
14750 if (TARGET_32BIT)
14751 return CC_CZmode;
14753 /* FALLTHROUGH */
14755 case LT:
14756 case LE:
14757 case GT:
14758 case GE:
14759 /* DImode signed and unsigned comparisons can be implemented
14760 by cmp + sbcs with a scratch register, but that does not
14761 set the Z flag - we must reverse GT/LE/GTU/LEU. */
14762 gcc_assert (op != EQ && op != NE);
14763 return CC_NCVmode;
14765 default:
14766 gcc_unreachable ();
14770 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
14771 return GET_MODE (x);
14773 return CCmode;
14776 /* X and Y are two things to compare using CODE. Emit the compare insn and
14777 return the rtx for register 0 in the proper mode. FP means this is a
14778 floating point compare: I don't think that it is needed on the arm. */
14780 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
14782 machine_mode mode;
14783 rtx cc_reg;
14784 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
14786 /* We might have X as a constant, Y as a register because of the predicates
14787 used for cmpdi. If so, force X to a register here. */
14788 if (dimode_comparison && !REG_P (x))
14789 x = force_reg (DImode, x);
14791 mode = SELECT_CC_MODE (code, x, y);
14792 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
14794 if (dimode_comparison
14795 && mode != CC_CZmode)
14797 rtx clobber, set;
14799 /* To compare two non-zero values for equality, XOR them and
14800 then compare against zero. Not used for ARM mode; there
14801 CC_CZmode is cheaper. */
14802 if (mode == CC_Zmode && y != const0_rtx)
14804 gcc_assert (!reload_completed);
14805 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
14806 y = const0_rtx;
14809 /* A scratch register is required. */
14810 if (reload_completed)
14811 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
14812 else
14813 scratch = gen_rtx_SCRATCH (SImode);
14815 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
14816 set = gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y));
14817 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
14819 else
14820 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
14822 return cc_reg;
14825 /* Generate a sequence of insns that will generate the correct return
14826 address mask depending on the physical architecture that the program
14827 is running on. */
14829 arm_gen_return_addr_mask (void)
14831 rtx reg = gen_reg_rtx (Pmode);
14833 emit_insn (gen_return_addr_mask (reg));
14834 return reg;
14837 void
14838 arm_reload_in_hi (rtx *operands)
14840 rtx ref = operands[1];
14841 rtx base, scratch;
14842 HOST_WIDE_INT offset = 0;
14844 if (GET_CODE (ref) == SUBREG)
14846 offset = SUBREG_BYTE (ref);
14847 ref = SUBREG_REG (ref);
14850 if (REG_P (ref))
14852 /* We have a pseudo which has been spilt onto the stack; there
14853 are two cases here: the first where there is a simple
14854 stack-slot replacement and a second where the stack-slot is
14855 out of range, or is used as a subreg. */
14856 if (reg_equiv_mem (REGNO (ref)))
14858 ref = reg_equiv_mem (REGNO (ref));
14859 base = find_replacement (&XEXP (ref, 0));
14861 else
14862 /* The slot is out of range, or was dressed up in a SUBREG. */
14863 base = reg_equiv_address (REGNO (ref));
14865 /* PR 62554: If there is no equivalent memory location then just move
14866 the value as an SImode register move. This happens when the target
14867 architecture variant does not have an HImode register move. */
14868 if (base == NULL)
14870 gcc_assert (REG_P (operands[0]));
14871 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
14872 gen_rtx_SUBREG (SImode, ref, 0)));
14873 return;
14876 else
14877 base = find_replacement (&XEXP (ref, 0));
14879 /* Handle the case where the address is too complex to be offset by 1. */
14880 if (GET_CODE (base) == MINUS
14881 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
14883 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14885 emit_set_insn (base_plus, base);
14886 base = base_plus;
14888 else if (GET_CODE (base) == PLUS)
14890 /* The addend must be CONST_INT, or we would have dealt with it above. */
14891 HOST_WIDE_INT hi, lo;
14893 offset += INTVAL (XEXP (base, 1));
14894 base = XEXP (base, 0);
14896 /* Rework the address into a legal sequence of insns. */
14897 /* Valid range for lo is -4095 -> 4095 */
14898 lo = (offset >= 0
14899 ? (offset & 0xfff)
14900 : -((-offset) & 0xfff));
14902 /* Corner case, if lo is the max offset then we would be out of range
14903 once we have added the additional 1 below, so bump the msb into the
14904 pre-loading insn(s). */
14905 if (lo == 4095)
14906 lo &= 0x7ff;
14908 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
14909 ^ (HOST_WIDE_INT) 0x80000000)
14910 - (HOST_WIDE_INT) 0x80000000);
14912 gcc_assert (hi + lo == offset);
14914 if (hi != 0)
14916 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14918 /* Get the base address; addsi3 knows how to handle constants
14919 that require more than one insn. */
14920 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
14921 base = base_plus;
14922 offset = lo;
14926 /* Operands[2] may overlap operands[0] (though it won't overlap
14927 operands[1]), that's why we asked for a DImode reg -- so we can
14928 use the bit that does not overlap. */
14929 if (REGNO (operands[2]) == REGNO (operands[0]))
14930 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14931 else
14932 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
14934 emit_insn (gen_zero_extendqisi2 (scratch,
14935 gen_rtx_MEM (QImode,
14936 plus_constant (Pmode, base,
14937 offset))));
14938 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
14939 gen_rtx_MEM (QImode,
14940 plus_constant (Pmode, base,
14941 offset + 1))));
14942 if (!BYTES_BIG_ENDIAN)
14943 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
14944 gen_rtx_IOR (SImode,
14945 gen_rtx_ASHIFT
14946 (SImode,
14947 gen_rtx_SUBREG (SImode, operands[0], 0),
14948 GEN_INT (8)),
14949 scratch));
14950 else
14951 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
14952 gen_rtx_IOR (SImode,
14953 gen_rtx_ASHIFT (SImode, scratch,
14954 GEN_INT (8)),
14955 gen_rtx_SUBREG (SImode, operands[0], 0)));
14958 /* Handle storing a half-word to memory during reload by synthesizing as two
14959 byte stores. Take care not to clobber the input values until after we
14960 have moved them somewhere safe. This code assumes that if the DImode
14961 scratch in operands[2] overlaps either the input value or output address
14962 in some way, then that value must die in this insn (we absolutely need
14963 two scratch registers for some corner cases). */
14964 void
14965 arm_reload_out_hi (rtx *operands)
14967 rtx ref = operands[0];
14968 rtx outval = operands[1];
14969 rtx base, scratch;
14970 HOST_WIDE_INT offset = 0;
14972 if (GET_CODE (ref) == SUBREG)
14974 offset = SUBREG_BYTE (ref);
14975 ref = SUBREG_REG (ref);
14978 if (REG_P (ref))
14980 /* We have a pseudo which has been spilt onto the stack; there
14981 are two cases here: the first where there is a simple
14982 stack-slot replacement and a second where the stack-slot is
14983 out of range, or is used as a subreg. */
14984 if (reg_equiv_mem (REGNO (ref)))
14986 ref = reg_equiv_mem (REGNO (ref));
14987 base = find_replacement (&XEXP (ref, 0));
14989 else
14990 /* The slot is out of range, or was dressed up in a SUBREG. */
14991 base = reg_equiv_address (REGNO (ref));
14993 /* PR 62254: If there is no equivalent memory location then just move
14994 the value as an SImode register move. This happens when the target
14995 architecture variant does not have an HImode register move. */
14996 if (base == NULL)
14998 gcc_assert (REG_P (outval) || SUBREG_P (outval));
15000 if (REG_P (outval))
15002 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15003 gen_rtx_SUBREG (SImode, outval, 0)));
15005 else /* SUBREG_P (outval) */
15007 if (GET_MODE (SUBREG_REG (outval)) == SImode)
15008 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15009 SUBREG_REG (outval)));
15010 else
15011 /* FIXME: Handle other cases ? */
15012 gcc_unreachable ();
15014 return;
15017 else
15018 base = find_replacement (&XEXP (ref, 0));
15020 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15022 /* Handle the case where the address is too complex to be offset by 1. */
15023 if (GET_CODE (base) == MINUS
15024 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15026 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15028 /* Be careful not to destroy OUTVAL. */
15029 if (reg_overlap_mentioned_p (base_plus, outval))
15031 /* Updating base_plus might destroy outval, see if we can
15032 swap the scratch and base_plus. */
15033 if (!reg_overlap_mentioned_p (scratch, outval))
15034 std::swap (scratch, base_plus);
15035 else
15037 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15039 /* Be conservative and copy OUTVAL into the scratch now,
15040 this should only be necessary if outval is a subreg
15041 of something larger than a word. */
15042 /* XXX Might this clobber base? I can't see how it can,
15043 since scratch is known to overlap with OUTVAL, and
15044 must be wider than a word. */
15045 emit_insn (gen_movhi (scratch_hi, outval));
15046 outval = scratch_hi;
15050 emit_set_insn (base_plus, base);
15051 base = base_plus;
15053 else if (GET_CODE (base) == PLUS)
15055 /* The addend must be CONST_INT, or we would have dealt with it above. */
15056 HOST_WIDE_INT hi, lo;
15058 offset += INTVAL (XEXP (base, 1));
15059 base = XEXP (base, 0);
15061 /* Rework the address into a legal sequence of insns. */
15062 /* Valid range for lo is -4095 -> 4095 */
15063 lo = (offset >= 0
15064 ? (offset & 0xfff)
15065 : -((-offset) & 0xfff));
15067 /* Corner case, if lo is the max offset then we would be out of range
15068 once we have added the additional 1 below, so bump the msb into the
15069 pre-loading insn(s). */
15070 if (lo == 4095)
15071 lo &= 0x7ff;
15073 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15074 ^ (HOST_WIDE_INT) 0x80000000)
15075 - (HOST_WIDE_INT) 0x80000000);
15077 gcc_assert (hi + lo == offset);
15079 if (hi != 0)
15081 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15083 /* Be careful not to destroy OUTVAL. */
15084 if (reg_overlap_mentioned_p (base_plus, outval))
15086 /* Updating base_plus might destroy outval, see if we
15087 can swap the scratch and base_plus. */
15088 if (!reg_overlap_mentioned_p (scratch, outval))
15089 std::swap (scratch, base_plus);
15090 else
15092 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15094 /* Be conservative and copy outval into scratch now,
15095 this should only be necessary if outval is a
15096 subreg of something larger than a word. */
15097 /* XXX Might this clobber base? I can't see how it
15098 can, since scratch is known to overlap with
15099 outval. */
15100 emit_insn (gen_movhi (scratch_hi, outval));
15101 outval = scratch_hi;
15105 /* Get the base address; addsi3 knows how to handle constants
15106 that require more than one insn. */
15107 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15108 base = base_plus;
15109 offset = lo;
15113 if (BYTES_BIG_ENDIAN)
15115 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15116 plus_constant (Pmode, base,
15117 offset + 1)),
15118 gen_lowpart (QImode, outval)));
15119 emit_insn (gen_lshrsi3 (scratch,
15120 gen_rtx_SUBREG (SImode, outval, 0),
15121 GEN_INT (8)));
15122 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15123 offset)),
15124 gen_lowpart (QImode, scratch)));
15126 else
15128 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15129 offset)),
15130 gen_lowpart (QImode, outval)));
15131 emit_insn (gen_lshrsi3 (scratch,
15132 gen_rtx_SUBREG (SImode, outval, 0),
15133 GEN_INT (8)));
15134 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15135 plus_constant (Pmode, base,
15136 offset + 1)),
15137 gen_lowpart (QImode, scratch)));
15141 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15142 (padded to the size of a word) should be passed in a register. */
15144 static bool
15145 arm_must_pass_in_stack (machine_mode mode, const_tree type)
15147 if (TARGET_AAPCS_BASED)
15148 return must_pass_in_stack_var_size (mode, type);
15149 else
15150 return must_pass_in_stack_var_size_or_pad (mode, type);
15154 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15155 Return true if an argument passed on the stack should be padded upwards,
15156 i.e. if the least-significant byte has useful data.
15157 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15158 aggregate types are placed in the lowest memory address. */
15160 bool
15161 arm_pad_arg_upward (machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
15163 if (!TARGET_AAPCS_BASED)
15164 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
15166 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15167 return false;
15169 return true;
15173 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15174 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15175 register has useful data, and return the opposite if the most
15176 significant byte does. */
15178 bool
15179 arm_pad_reg_upward (machine_mode mode,
15180 tree type, int first ATTRIBUTE_UNUSED)
15182 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15184 /* For AAPCS, small aggregates, small fixed-point types,
15185 and small complex types are always padded upwards. */
15186 if (type)
15188 if ((AGGREGATE_TYPE_P (type)
15189 || TREE_CODE (type) == COMPLEX_TYPE
15190 || FIXED_POINT_TYPE_P (type))
15191 && int_size_in_bytes (type) <= 4)
15192 return true;
15194 else
15196 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15197 && GET_MODE_SIZE (mode) <= 4)
15198 return true;
15202 /* Otherwise, use default padding. */
15203 return !BYTES_BIG_ENDIAN;
15206 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15207 assuming that the address in the base register is word aligned. */
15208 bool
15209 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15211 HOST_WIDE_INT max_offset;
15213 /* Offset must be a multiple of 4 in Thumb mode. */
15214 if (TARGET_THUMB2 && ((offset & 3) != 0))
15215 return false;
15217 if (TARGET_THUMB2)
15218 max_offset = 1020;
15219 else if (TARGET_ARM)
15220 max_offset = 255;
15221 else
15222 return false;
15224 return ((offset <= max_offset) && (offset >= -max_offset));
15227 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15228 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15229 Assumes that the address in the base register RN is word aligned. Pattern
15230 guarantees that both memory accesses use the same base register,
15231 the offsets are constants within the range, and the gap between the offsets is 4.
15232 If preload complete then check that registers are legal. WBACK indicates whether
15233 address is updated. LOAD indicates whether memory access is load or store. */
15234 bool
15235 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15236 bool wback, bool load)
15238 unsigned int t, t2, n;
15240 if (!reload_completed)
15241 return true;
15243 if (!offset_ok_for_ldrd_strd (offset))
15244 return false;
15246 t = REGNO (rt);
15247 t2 = REGNO (rt2);
15248 n = REGNO (rn);
15250 if ((TARGET_THUMB2)
15251 && ((wback && (n == t || n == t2))
15252 || (t == SP_REGNUM)
15253 || (t == PC_REGNUM)
15254 || (t2 == SP_REGNUM)
15255 || (t2 == PC_REGNUM)
15256 || (!load && (n == PC_REGNUM))
15257 || (load && (t == t2))
15258 /* Triggers Cortex-M3 LDRD errata. */
15259 || (!wback && load && fix_cm3_ldrd && (n == t))))
15260 return false;
15262 if ((TARGET_ARM)
15263 && ((wback && (n == t || n == t2))
15264 || (t2 == PC_REGNUM)
15265 || (t % 2 != 0) /* First destination register is not even. */
15266 || (t2 != t + 1)
15267 /* PC can be used as base register (for offset addressing only),
15268 but it is depricated. */
15269 || (n == PC_REGNUM)))
15270 return false;
15272 return true;
15275 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15276 operand MEM's address contains an immediate offset from the base
15277 register and has no side effects, in which case it sets BASE and
15278 OFFSET accordingly. */
15279 static bool
15280 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset)
15282 rtx addr;
15284 gcc_assert (base != NULL && offset != NULL);
15286 /* TODO: Handle more general memory operand patterns, such as
15287 PRE_DEC and PRE_INC. */
15289 if (side_effects_p (mem))
15290 return false;
15292 /* Can't deal with subregs. */
15293 if (GET_CODE (mem) == SUBREG)
15294 return false;
15296 gcc_assert (MEM_P (mem));
15298 *offset = const0_rtx;
15300 addr = XEXP (mem, 0);
15302 /* If addr isn't valid for DImode, then we can't handle it. */
15303 if (!arm_legitimate_address_p (DImode, addr,
15304 reload_in_progress || reload_completed))
15305 return false;
15307 if (REG_P (addr))
15309 *base = addr;
15310 return true;
15312 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15314 *base = XEXP (addr, 0);
15315 *offset = XEXP (addr, 1);
15316 return (REG_P (*base) && CONST_INT_P (*offset));
15319 return false;
15322 /* Called from a peephole2 to replace two word-size accesses with a
15323 single LDRD/STRD instruction. Returns true iff we can generate a
15324 new instruction sequence. That is, both accesses use the same base
15325 register and the gap between constant offsets is 4. This function
15326 may reorder its operands to match ldrd/strd RTL templates.
15327 OPERANDS are the operands found by the peephole matcher;
15328 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15329 corresponding memory operands. LOAD indicaates whether the access
15330 is load or store. CONST_STORE indicates a store of constant
15331 integer values held in OPERANDS[4,5] and assumes that the pattern
15332 is of length 4 insn, for the purpose of checking dead registers.
15333 COMMUTE indicates that register operands may be reordered. */
15334 bool
15335 gen_operands_ldrd_strd (rtx *operands, bool load,
15336 bool const_store, bool commute)
15338 int nops = 2;
15339 HOST_WIDE_INT offsets[2], offset;
15340 rtx base = NULL_RTX;
15341 rtx cur_base, cur_offset, tmp;
15342 int i, gap;
15343 HARD_REG_SET regset;
15345 gcc_assert (!const_store || !load);
15346 /* Check that the memory references are immediate offsets from the
15347 same base register. Extract the base register, the destination
15348 registers, and the corresponding memory offsets. */
15349 for (i = 0; i < nops; i++)
15351 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
15352 return false;
15354 if (i == 0)
15355 base = cur_base;
15356 else if (REGNO (base) != REGNO (cur_base))
15357 return false;
15359 offsets[i] = INTVAL (cur_offset);
15360 if (GET_CODE (operands[i]) == SUBREG)
15362 tmp = SUBREG_REG (operands[i]);
15363 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15364 operands[i] = tmp;
15368 /* Make sure there is no dependency between the individual loads. */
15369 if (load && REGNO (operands[0]) == REGNO (base))
15370 return false; /* RAW */
15372 if (load && REGNO (operands[0]) == REGNO (operands[1]))
15373 return false; /* WAW */
15375 /* If the same input register is used in both stores
15376 when storing different constants, try to find a free register.
15377 For example, the code
15378 mov r0, 0
15379 str r0, [r2]
15380 mov r0, 1
15381 str r0, [r2, #4]
15382 can be transformed into
15383 mov r1, 0
15384 mov r0, 1
15385 strd r1, r0, [r2]
15386 in Thumb mode assuming that r1 is free.
15387 For ARM mode do the same but only if the starting register
15388 can be made to be even. */
15389 if (const_store
15390 && REGNO (operands[0]) == REGNO (operands[1])
15391 && INTVAL (operands[4]) != INTVAL (operands[5]))
15393 if (TARGET_THUMB2)
15395 CLEAR_HARD_REG_SET (regset);
15396 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15397 if (tmp == NULL_RTX)
15398 return false;
15400 /* Use the new register in the first load to ensure that
15401 if the original input register is not dead after peephole,
15402 then it will have the correct constant value. */
15403 operands[0] = tmp;
15405 else if (TARGET_ARM)
15407 int regno = REGNO (operands[0]);
15408 if (!peep2_reg_dead_p (4, operands[0]))
15410 /* When the input register is even and is not dead after the
15411 pattern, it has to hold the second constant but we cannot
15412 form a legal STRD in ARM mode with this register as the second
15413 register. */
15414 if (regno % 2 == 0)
15415 return false;
15417 /* Is regno-1 free? */
15418 SET_HARD_REG_SET (regset);
15419 CLEAR_HARD_REG_BIT(regset, regno - 1);
15420 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15421 if (tmp == NULL_RTX)
15422 return false;
15424 operands[0] = tmp;
15426 else
15428 /* Find a DImode register. */
15429 CLEAR_HARD_REG_SET (regset);
15430 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15431 if (tmp != NULL_RTX)
15433 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15434 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15436 else
15438 /* Can we use the input register to form a DI register? */
15439 SET_HARD_REG_SET (regset);
15440 CLEAR_HARD_REG_BIT(regset,
15441 regno % 2 == 0 ? regno + 1 : regno - 1);
15442 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15443 if (tmp == NULL_RTX)
15444 return false;
15445 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15449 gcc_assert (operands[0] != NULL_RTX);
15450 gcc_assert (operands[1] != NULL_RTX);
15451 gcc_assert (REGNO (operands[0]) % 2 == 0);
15452 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15456 /* Make sure the instructions are ordered with lower memory access first. */
15457 if (offsets[0] > offsets[1])
15459 gap = offsets[0] - offsets[1];
15460 offset = offsets[1];
15462 /* Swap the instructions such that lower memory is accessed first. */
15463 std::swap (operands[0], operands[1]);
15464 std::swap (operands[2], operands[3]);
15465 if (const_store)
15466 std::swap (operands[4], operands[5]);
15468 else
15470 gap = offsets[1] - offsets[0];
15471 offset = offsets[0];
15474 /* Make sure accesses are to consecutive memory locations. */
15475 if (gap != 4)
15476 return false;
15478 /* Make sure we generate legal instructions. */
15479 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15480 false, load))
15481 return true;
15483 /* In Thumb state, where registers are almost unconstrained, there
15484 is little hope to fix it. */
15485 if (TARGET_THUMB2)
15486 return false;
15488 if (load && commute)
15490 /* Try reordering registers. */
15491 std::swap (operands[0], operands[1]);
15492 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15493 false, load))
15494 return true;
15497 if (const_store)
15499 /* If input registers are dead after this pattern, they can be
15500 reordered or replaced by other registers that are free in the
15501 current pattern. */
15502 if (!peep2_reg_dead_p (4, operands[0])
15503 || !peep2_reg_dead_p (4, operands[1]))
15504 return false;
15506 /* Try to reorder the input registers. */
15507 /* For example, the code
15508 mov r0, 0
15509 mov r1, 1
15510 str r1, [r2]
15511 str r0, [r2, #4]
15512 can be transformed into
15513 mov r1, 0
15514 mov r0, 1
15515 strd r0, [r2]
15517 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15518 false, false))
15520 std::swap (operands[0], operands[1]);
15521 return true;
15524 /* Try to find a free DI register. */
15525 CLEAR_HARD_REG_SET (regset);
15526 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15527 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15528 while (true)
15530 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15531 if (tmp == NULL_RTX)
15532 return false;
15534 /* DREG must be an even-numbered register in DImode.
15535 Split it into SI registers. */
15536 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15537 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15538 gcc_assert (operands[0] != NULL_RTX);
15539 gcc_assert (operands[1] != NULL_RTX);
15540 gcc_assert (REGNO (operands[0]) % 2 == 0);
15541 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15543 return (operands_ok_ldrd_strd (operands[0], operands[1],
15544 base, offset,
15545 false, load));
15549 return false;
15555 /* Print a symbolic form of X to the debug file, F. */
15556 static void
15557 arm_print_value (FILE *f, rtx x)
15559 switch (GET_CODE (x))
15561 case CONST_INT:
15562 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15563 return;
15565 case CONST_DOUBLE:
15566 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15567 return;
15569 case CONST_VECTOR:
15571 int i;
15573 fprintf (f, "<");
15574 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15576 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15577 if (i < (CONST_VECTOR_NUNITS (x) - 1))
15578 fputc (',', f);
15580 fprintf (f, ">");
15582 return;
15584 case CONST_STRING:
15585 fprintf (f, "\"%s\"", XSTR (x, 0));
15586 return;
15588 case SYMBOL_REF:
15589 fprintf (f, "`%s'", XSTR (x, 0));
15590 return;
15592 case LABEL_REF:
15593 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
15594 return;
15596 case CONST:
15597 arm_print_value (f, XEXP (x, 0));
15598 return;
15600 case PLUS:
15601 arm_print_value (f, XEXP (x, 0));
15602 fprintf (f, "+");
15603 arm_print_value (f, XEXP (x, 1));
15604 return;
15606 case PC:
15607 fprintf (f, "pc");
15608 return;
15610 default:
15611 fprintf (f, "????");
15612 return;
15616 /* Routines for manipulation of the constant pool. */
15618 /* Arm instructions cannot load a large constant directly into a
15619 register; they have to come from a pc relative load. The constant
15620 must therefore be placed in the addressable range of the pc
15621 relative load. Depending on the precise pc relative load
15622 instruction the range is somewhere between 256 bytes and 4k. This
15623 means that we often have to dump a constant inside a function, and
15624 generate code to branch around it.
15626 It is important to minimize this, since the branches will slow
15627 things down and make the code larger.
15629 Normally we can hide the table after an existing unconditional
15630 branch so that there is no interruption of the flow, but in the
15631 worst case the code looks like this:
15633 ldr rn, L1
15635 b L2
15636 align
15637 L1: .long value
15641 ldr rn, L3
15643 b L4
15644 align
15645 L3: .long value
15649 We fix this by performing a scan after scheduling, which notices
15650 which instructions need to have their operands fetched from the
15651 constant table and builds the table.
15653 The algorithm starts by building a table of all the constants that
15654 need fixing up and all the natural barriers in the function (places
15655 where a constant table can be dropped without breaking the flow).
15656 For each fixup we note how far the pc-relative replacement will be
15657 able to reach and the offset of the instruction into the function.
15659 Having built the table we then group the fixes together to form
15660 tables that are as large as possible (subject to addressing
15661 constraints) and emit each table of constants after the last
15662 barrier that is within range of all the instructions in the group.
15663 If a group does not contain a barrier, then we forcibly create one
15664 by inserting a jump instruction into the flow. Once the table has
15665 been inserted, the insns are then modified to reference the
15666 relevant entry in the pool.
15668 Possible enhancements to the algorithm (not implemented) are:
15670 1) For some processors and object formats, there may be benefit in
15671 aligning the pools to the start of cache lines; this alignment
15672 would need to be taken into account when calculating addressability
15673 of a pool. */
15675 /* These typedefs are located at the start of this file, so that
15676 they can be used in the prototypes there. This comment is to
15677 remind readers of that fact so that the following structures
15678 can be understood more easily.
15680 typedef struct minipool_node Mnode;
15681 typedef struct minipool_fixup Mfix; */
15683 struct minipool_node
15685 /* Doubly linked chain of entries. */
15686 Mnode * next;
15687 Mnode * prev;
15688 /* The maximum offset into the code that this entry can be placed. While
15689 pushing fixes for forward references, all entries are sorted in order
15690 of increasing max_address. */
15691 HOST_WIDE_INT max_address;
15692 /* Similarly for an entry inserted for a backwards ref. */
15693 HOST_WIDE_INT min_address;
15694 /* The number of fixes referencing this entry. This can become zero
15695 if we "unpush" an entry. In this case we ignore the entry when we
15696 come to emit the code. */
15697 int refcount;
15698 /* The offset from the start of the minipool. */
15699 HOST_WIDE_INT offset;
15700 /* The value in table. */
15701 rtx value;
15702 /* The mode of value. */
15703 machine_mode mode;
15704 /* The size of the value. With iWMMXt enabled
15705 sizes > 4 also imply an alignment of 8-bytes. */
15706 int fix_size;
15709 struct minipool_fixup
15711 Mfix * next;
15712 rtx_insn * insn;
15713 HOST_WIDE_INT address;
15714 rtx * loc;
15715 machine_mode mode;
15716 int fix_size;
15717 rtx value;
15718 Mnode * minipool;
15719 HOST_WIDE_INT forwards;
15720 HOST_WIDE_INT backwards;
15723 /* Fixes less than a word need padding out to a word boundary. */
15724 #define MINIPOOL_FIX_SIZE(mode) \
15725 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15727 static Mnode * minipool_vector_head;
15728 static Mnode * minipool_vector_tail;
15729 static rtx_code_label *minipool_vector_label;
15730 static int minipool_pad;
15732 /* The linked list of all minipool fixes required for this function. */
15733 Mfix * minipool_fix_head;
15734 Mfix * minipool_fix_tail;
15735 /* The fix entry for the current minipool, once it has been placed. */
15736 Mfix * minipool_barrier;
15738 #ifndef JUMP_TABLES_IN_TEXT_SECTION
15739 #define JUMP_TABLES_IN_TEXT_SECTION 0
15740 #endif
15742 static HOST_WIDE_INT
15743 get_jump_table_size (rtx_jump_table_data *insn)
15745 /* ADDR_VECs only take room if read-only data does into the text
15746 section. */
15747 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
15749 rtx body = PATTERN (insn);
15750 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
15751 HOST_WIDE_INT size;
15752 HOST_WIDE_INT modesize;
15754 modesize = GET_MODE_SIZE (GET_MODE (body));
15755 size = modesize * XVECLEN (body, elt);
15756 switch (modesize)
15758 case 1:
15759 /* Round up size of TBB table to a halfword boundary. */
15760 size = (size + 1) & ~HOST_WIDE_INT_1;
15761 break;
15762 case 2:
15763 /* No padding necessary for TBH. */
15764 break;
15765 case 4:
15766 /* Add two bytes for alignment on Thumb. */
15767 if (TARGET_THUMB)
15768 size += 2;
15769 break;
15770 default:
15771 gcc_unreachable ();
15773 return size;
15776 return 0;
15779 /* Return the maximum amount of padding that will be inserted before
15780 label LABEL. */
15782 static HOST_WIDE_INT
15783 get_label_padding (rtx label)
15785 HOST_WIDE_INT align, min_insn_size;
15787 align = 1 << label_to_alignment (label);
15788 min_insn_size = TARGET_THUMB ? 2 : 4;
15789 return align > min_insn_size ? align - min_insn_size : 0;
15792 /* Move a minipool fix MP from its current location to before MAX_MP.
15793 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
15794 constraints may need updating. */
15795 static Mnode *
15796 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
15797 HOST_WIDE_INT max_address)
15799 /* The code below assumes these are different. */
15800 gcc_assert (mp != max_mp);
15802 if (max_mp == NULL)
15804 if (max_address < mp->max_address)
15805 mp->max_address = max_address;
15807 else
15809 if (max_address > max_mp->max_address - mp->fix_size)
15810 mp->max_address = max_mp->max_address - mp->fix_size;
15811 else
15812 mp->max_address = max_address;
15814 /* Unlink MP from its current position. Since max_mp is non-null,
15815 mp->prev must be non-null. */
15816 mp->prev->next = mp->next;
15817 if (mp->next != NULL)
15818 mp->next->prev = mp->prev;
15819 else
15820 minipool_vector_tail = mp->prev;
15822 /* Re-insert it before MAX_MP. */
15823 mp->next = max_mp;
15824 mp->prev = max_mp->prev;
15825 max_mp->prev = mp;
15827 if (mp->prev != NULL)
15828 mp->prev->next = mp;
15829 else
15830 minipool_vector_head = mp;
15833 /* Save the new entry. */
15834 max_mp = mp;
15836 /* Scan over the preceding entries and adjust their addresses as
15837 required. */
15838 while (mp->prev != NULL
15839 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
15841 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
15842 mp = mp->prev;
15845 return max_mp;
15848 /* Add a constant to the minipool for a forward reference. Returns the
15849 node added or NULL if the constant will not fit in this pool. */
15850 static Mnode *
15851 add_minipool_forward_ref (Mfix *fix)
15853 /* If set, max_mp is the first pool_entry that has a lower
15854 constraint than the one we are trying to add. */
15855 Mnode * max_mp = NULL;
15856 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
15857 Mnode * mp;
15859 /* If the minipool starts before the end of FIX->INSN then this FIX
15860 can not be placed into the current pool. Furthermore, adding the
15861 new constant pool entry may cause the pool to start FIX_SIZE bytes
15862 earlier. */
15863 if (minipool_vector_head &&
15864 (fix->address + get_attr_length (fix->insn)
15865 >= minipool_vector_head->max_address - fix->fix_size))
15866 return NULL;
15868 /* Scan the pool to see if a constant with the same value has
15869 already been added. While we are doing this, also note the
15870 location where we must insert the constant if it doesn't already
15871 exist. */
15872 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
15874 if (GET_CODE (fix->value) == GET_CODE (mp->value)
15875 && fix->mode == mp->mode
15876 && (!LABEL_P (fix->value)
15877 || (CODE_LABEL_NUMBER (fix->value)
15878 == CODE_LABEL_NUMBER (mp->value)))
15879 && rtx_equal_p (fix->value, mp->value))
15881 /* More than one fix references this entry. */
15882 mp->refcount++;
15883 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
15886 /* Note the insertion point if necessary. */
15887 if (max_mp == NULL
15888 && mp->max_address > max_address)
15889 max_mp = mp;
15891 /* If we are inserting an 8-bytes aligned quantity and
15892 we have not already found an insertion point, then
15893 make sure that all such 8-byte aligned quantities are
15894 placed at the start of the pool. */
15895 if (ARM_DOUBLEWORD_ALIGN
15896 && max_mp == NULL
15897 && fix->fix_size >= 8
15898 && mp->fix_size < 8)
15900 max_mp = mp;
15901 max_address = mp->max_address;
15905 /* The value is not currently in the minipool, so we need to create
15906 a new entry for it. If MAX_MP is NULL, the entry will be put on
15907 the end of the list since the placement is less constrained than
15908 any existing entry. Otherwise, we insert the new fix before
15909 MAX_MP and, if necessary, adjust the constraints on the other
15910 entries. */
15911 mp = XNEW (Mnode);
15912 mp->fix_size = fix->fix_size;
15913 mp->mode = fix->mode;
15914 mp->value = fix->value;
15915 mp->refcount = 1;
15916 /* Not yet required for a backwards ref. */
15917 mp->min_address = -65536;
15919 if (max_mp == NULL)
15921 mp->max_address = max_address;
15922 mp->next = NULL;
15923 mp->prev = minipool_vector_tail;
15925 if (mp->prev == NULL)
15927 minipool_vector_head = mp;
15928 minipool_vector_label = gen_label_rtx ();
15930 else
15931 mp->prev->next = mp;
15933 minipool_vector_tail = mp;
15935 else
15937 if (max_address > max_mp->max_address - mp->fix_size)
15938 mp->max_address = max_mp->max_address - mp->fix_size;
15939 else
15940 mp->max_address = max_address;
15942 mp->next = max_mp;
15943 mp->prev = max_mp->prev;
15944 max_mp->prev = mp;
15945 if (mp->prev != NULL)
15946 mp->prev->next = mp;
15947 else
15948 minipool_vector_head = mp;
15951 /* Save the new entry. */
15952 max_mp = mp;
15954 /* Scan over the preceding entries and adjust their addresses as
15955 required. */
15956 while (mp->prev != NULL
15957 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
15959 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
15960 mp = mp->prev;
15963 return max_mp;
15966 static Mnode *
15967 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
15968 HOST_WIDE_INT min_address)
15970 HOST_WIDE_INT offset;
15972 /* The code below assumes these are different. */
15973 gcc_assert (mp != min_mp);
15975 if (min_mp == NULL)
15977 if (min_address > mp->min_address)
15978 mp->min_address = min_address;
15980 else
15982 /* We will adjust this below if it is too loose. */
15983 mp->min_address = min_address;
15985 /* Unlink MP from its current position. Since min_mp is non-null,
15986 mp->next must be non-null. */
15987 mp->next->prev = mp->prev;
15988 if (mp->prev != NULL)
15989 mp->prev->next = mp->next;
15990 else
15991 minipool_vector_head = mp->next;
15993 /* Reinsert it after MIN_MP. */
15994 mp->prev = min_mp;
15995 mp->next = min_mp->next;
15996 min_mp->next = mp;
15997 if (mp->next != NULL)
15998 mp->next->prev = mp;
15999 else
16000 minipool_vector_tail = mp;
16003 min_mp = mp;
16005 offset = 0;
16006 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16008 mp->offset = offset;
16009 if (mp->refcount > 0)
16010 offset += mp->fix_size;
16012 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16013 mp->next->min_address = mp->min_address + mp->fix_size;
16016 return min_mp;
16019 /* Add a constant to the minipool for a backward reference. Returns the
16020 node added or NULL if the constant will not fit in this pool.
16022 Note that the code for insertion for a backwards reference can be
16023 somewhat confusing because the calculated offsets for each fix do
16024 not take into account the size of the pool (which is still under
16025 construction. */
16026 static Mnode *
16027 add_minipool_backward_ref (Mfix *fix)
16029 /* If set, min_mp is the last pool_entry that has a lower constraint
16030 than the one we are trying to add. */
16031 Mnode *min_mp = NULL;
16032 /* This can be negative, since it is only a constraint. */
16033 HOST_WIDE_INT min_address = fix->address - fix->backwards;
16034 Mnode *mp;
16036 /* If we can't reach the current pool from this insn, or if we can't
16037 insert this entry at the end of the pool without pushing other
16038 fixes out of range, then we don't try. This ensures that we
16039 can't fail later on. */
16040 if (min_address >= minipool_barrier->address
16041 || (minipool_vector_tail->min_address + fix->fix_size
16042 >= minipool_barrier->address))
16043 return NULL;
16045 /* Scan the pool to see if a constant with the same value has
16046 already been added. While we are doing this, also note the
16047 location where we must insert the constant if it doesn't already
16048 exist. */
16049 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16051 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16052 && fix->mode == mp->mode
16053 && (!LABEL_P (fix->value)
16054 || (CODE_LABEL_NUMBER (fix->value)
16055 == CODE_LABEL_NUMBER (mp->value)))
16056 && rtx_equal_p (fix->value, mp->value)
16057 /* Check that there is enough slack to move this entry to the
16058 end of the table (this is conservative). */
16059 && (mp->max_address
16060 > (minipool_barrier->address
16061 + minipool_vector_tail->offset
16062 + minipool_vector_tail->fix_size)))
16064 mp->refcount++;
16065 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16068 if (min_mp != NULL)
16069 mp->min_address += fix->fix_size;
16070 else
16072 /* Note the insertion point if necessary. */
16073 if (mp->min_address < min_address)
16075 /* For now, we do not allow the insertion of 8-byte alignment
16076 requiring nodes anywhere but at the start of the pool. */
16077 if (ARM_DOUBLEWORD_ALIGN
16078 && fix->fix_size >= 8 && mp->fix_size < 8)
16079 return NULL;
16080 else
16081 min_mp = mp;
16083 else if (mp->max_address
16084 < minipool_barrier->address + mp->offset + fix->fix_size)
16086 /* Inserting before this entry would push the fix beyond
16087 its maximum address (which can happen if we have
16088 re-located a forwards fix); force the new fix to come
16089 after it. */
16090 if (ARM_DOUBLEWORD_ALIGN
16091 && fix->fix_size >= 8 && mp->fix_size < 8)
16092 return NULL;
16093 else
16095 min_mp = mp;
16096 min_address = mp->min_address + fix->fix_size;
16099 /* Do not insert a non-8-byte aligned quantity before 8-byte
16100 aligned quantities. */
16101 else if (ARM_DOUBLEWORD_ALIGN
16102 && fix->fix_size < 8
16103 && mp->fix_size >= 8)
16105 min_mp = mp;
16106 min_address = mp->min_address + fix->fix_size;
16111 /* We need to create a new entry. */
16112 mp = XNEW (Mnode);
16113 mp->fix_size = fix->fix_size;
16114 mp->mode = fix->mode;
16115 mp->value = fix->value;
16116 mp->refcount = 1;
16117 mp->max_address = minipool_barrier->address + 65536;
16119 mp->min_address = min_address;
16121 if (min_mp == NULL)
16123 mp->prev = NULL;
16124 mp->next = minipool_vector_head;
16126 if (mp->next == NULL)
16128 minipool_vector_tail = mp;
16129 minipool_vector_label = gen_label_rtx ();
16131 else
16132 mp->next->prev = mp;
16134 minipool_vector_head = mp;
16136 else
16138 mp->next = min_mp->next;
16139 mp->prev = min_mp;
16140 min_mp->next = mp;
16142 if (mp->next != NULL)
16143 mp->next->prev = mp;
16144 else
16145 minipool_vector_tail = mp;
16148 /* Save the new entry. */
16149 min_mp = mp;
16151 if (mp->prev)
16152 mp = mp->prev;
16153 else
16154 mp->offset = 0;
16156 /* Scan over the following entries and adjust their offsets. */
16157 while (mp->next != NULL)
16159 if (mp->next->min_address < mp->min_address + mp->fix_size)
16160 mp->next->min_address = mp->min_address + mp->fix_size;
16162 if (mp->refcount)
16163 mp->next->offset = mp->offset + mp->fix_size;
16164 else
16165 mp->next->offset = mp->offset;
16167 mp = mp->next;
16170 return min_mp;
16173 static void
16174 assign_minipool_offsets (Mfix *barrier)
16176 HOST_WIDE_INT offset = 0;
16177 Mnode *mp;
16179 minipool_barrier = barrier;
16181 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16183 mp->offset = offset;
16185 if (mp->refcount > 0)
16186 offset += mp->fix_size;
16190 /* Output the literal table */
16191 static void
16192 dump_minipool (rtx_insn *scan)
16194 Mnode * mp;
16195 Mnode * nmp;
16196 int align64 = 0;
16198 if (ARM_DOUBLEWORD_ALIGN)
16199 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16200 if (mp->refcount > 0 && mp->fix_size >= 8)
16202 align64 = 1;
16203 break;
16206 if (dump_file)
16207 fprintf (dump_file,
16208 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16209 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16211 scan = emit_label_after (gen_label_rtx (), scan);
16212 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16213 scan = emit_label_after (minipool_vector_label, scan);
16215 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16217 if (mp->refcount > 0)
16219 if (dump_file)
16221 fprintf (dump_file,
16222 ";; Offset %u, min %ld, max %ld ",
16223 (unsigned) mp->offset, (unsigned long) mp->min_address,
16224 (unsigned long) mp->max_address);
16225 arm_print_value (dump_file, mp->value);
16226 fputc ('\n', dump_file);
16229 rtx val = copy_rtx (mp->value);
16231 switch (GET_MODE_SIZE (mp->mode))
16233 #ifdef HAVE_consttable_1
16234 case 1:
16235 scan = emit_insn_after (gen_consttable_1 (val), scan);
16236 break;
16238 #endif
16239 #ifdef HAVE_consttable_2
16240 case 2:
16241 scan = emit_insn_after (gen_consttable_2 (val), scan);
16242 break;
16244 #endif
16245 #ifdef HAVE_consttable_4
16246 case 4:
16247 scan = emit_insn_after (gen_consttable_4 (val), scan);
16248 break;
16250 #endif
16251 #ifdef HAVE_consttable_8
16252 case 8:
16253 scan = emit_insn_after (gen_consttable_8 (val), scan);
16254 break;
16256 #endif
16257 #ifdef HAVE_consttable_16
16258 case 16:
16259 scan = emit_insn_after (gen_consttable_16 (val), scan);
16260 break;
16262 #endif
16263 default:
16264 gcc_unreachable ();
16268 nmp = mp->next;
16269 free (mp);
16272 minipool_vector_head = minipool_vector_tail = NULL;
16273 scan = emit_insn_after (gen_consttable_end (), scan);
16274 scan = emit_barrier_after (scan);
16277 /* Return the cost of forcibly inserting a barrier after INSN. */
16278 static int
16279 arm_barrier_cost (rtx_insn *insn)
16281 /* Basing the location of the pool on the loop depth is preferable,
16282 but at the moment, the basic block information seems to be
16283 corrupt by this stage of the compilation. */
16284 int base_cost = 50;
16285 rtx_insn *next = next_nonnote_insn (insn);
16287 if (next != NULL && LABEL_P (next))
16288 base_cost -= 20;
16290 switch (GET_CODE (insn))
16292 case CODE_LABEL:
16293 /* It will always be better to place the table before the label, rather
16294 than after it. */
16295 return 50;
16297 case INSN:
16298 case CALL_INSN:
16299 return base_cost;
16301 case JUMP_INSN:
16302 return base_cost - 10;
16304 default:
16305 return base_cost + 10;
16309 /* Find the best place in the insn stream in the range
16310 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16311 Create the barrier by inserting a jump and add a new fix entry for
16312 it. */
16313 static Mfix *
16314 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16316 HOST_WIDE_INT count = 0;
16317 rtx_barrier *barrier;
16318 rtx_insn *from = fix->insn;
16319 /* The instruction after which we will insert the jump. */
16320 rtx_insn *selected = NULL;
16321 int selected_cost;
16322 /* The address at which the jump instruction will be placed. */
16323 HOST_WIDE_INT selected_address;
16324 Mfix * new_fix;
16325 HOST_WIDE_INT max_count = max_address - fix->address;
16326 rtx_code_label *label = gen_label_rtx ();
16328 selected_cost = arm_barrier_cost (from);
16329 selected_address = fix->address;
16331 while (from && count < max_count)
16333 rtx_jump_table_data *tmp;
16334 int new_cost;
16336 /* This code shouldn't have been called if there was a natural barrier
16337 within range. */
16338 gcc_assert (!BARRIER_P (from));
16340 /* Count the length of this insn. This must stay in sync with the
16341 code that pushes minipool fixes. */
16342 if (LABEL_P (from))
16343 count += get_label_padding (from);
16344 else
16345 count += get_attr_length (from);
16347 /* If there is a jump table, add its length. */
16348 if (tablejump_p (from, NULL, &tmp))
16350 count += get_jump_table_size (tmp);
16352 /* Jump tables aren't in a basic block, so base the cost on
16353 the dispatch insn. If we select this location, we will
16354 still put the pool after the table. */
16355 new_cost = arm_barrier_cost (from);
16357 if (count < max_count
16358 && (!selected || new_cost <= selected_cost))
16360 selected = tmp;
16361 selected_cost = new_cost;
16362 selected_address = fix->address + count;
16365 /* Continue after the dispatch table. */
16366 from = NEXT_INSN (tmp);
16367 continue;
16370 new_cost = arm_barrier_cost (from);
16372 if (count < max_count
16373 && (!selected || new_cost <= selected_cost))
16375 selected = from;
16376 selected_cost = new_cost;
16377 selected_address = fix->address + count;
16380 from = NEXT_INSN (from);
16383 /* Make sure that we found a place to insert the jump. */
16384 gcc_assert (selected);
16386 /* Make sure we do not split a call and its corresponding
16387 CALL_ARG_LOCATION note. */
16388 if (CALL_P (selected))
16390 rtx_insn *next = NEXT_INSN (selected);
16391 if (next && NOTE_P (next)
16392 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16393 selected = next;
16396 /* Create a new JUMP_INSN that branches around a barrier. */
16397 from = emit_jump_insn_after (gen_jump (label), selected);
16398 JUMP_LABEL (from) = label;
16399 barrier = emit_barrier_after (from);
16400 emit_label_after (label, barrier);
16402 /* Create a minipool barrier entry for the new barrier. */
16403 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16404 new_fix->insn = barrier;
16405 new_fix->address = selected_address;
16406 new_fix->next = fix->next;
16407 fix->next = new_fix;
16409 return new_fix;
16412 /* Record that there is a natural barrier in the insn stream at
16413 ADDRESS. */
16414 static void
16415 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
16417 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16419 fix->insn = insn;
16420 fix->address = address;
16422 fix->next = NULL;
16423 if (minipool_fix_head != NULL)
16424 minipool_fix_tail->next = fix;
16425 else
16426 minipool_fix_head = fix;
16428 minipool_fix_tail = fix;
16431 /* Record INSN, which will need fixing up to load a value from the
16432 minipool. ADDRESS is the offset of the insn since the start of the
16433 function; LOC is a pointer to the part of the insn which requires
16434 fixing; VALUE is the constant that must be loaded, which is of type
16435 MODE. */
16436 static void
16437 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
16438 machine_mode mode, rtx value)
16440 gcc_assert (!arm_disable_literal_pool);
16441 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16443 fix->insn = insn;
16444 fix->address = address;
16445 fix->loc = loc;
16446 fix->mode = mode;
16447 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16448 fix->value = value;
16449 fix->forwards = get_attr_pool_range (insn);
16450 fix->backwards = get_attr_neg_pool_range (insn);
16451 fix->minipool = NULL;
16453 /* If an insn doesn't have a range defined for it, then it isn't
16454 expecting to be reworked by this code. Better to stop now than
16455 to generate duff assembly code. */
16456 gcc_assert (fix->forwards || fix->backwards);
16458 /* If an entry requires 8-byte alignment then assume all constant pools
16459 require 4 bytes of padding. Trying to do this later on a per-pool
16460 basis is awkward because existing pool entries have to be modified. */
16461 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16462 minipool_pad = 4;
16464 if (dump_file)
16466 fprintf (dump_file,
16467 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16468 GET_MODE_NAME (mode),
16469 INSN_UID (insn), (unsigned long) address,
16470 -1 * (long)fix->backwards, (long)fix->forwards);
16471 arm_print_value (dump_file, fix->value);
16472 fprintf (dump_file, "\n");
16475 /* Add it to the chain of fixes. */
16476 fix->next = NULL;
16478 if (minipool_fix_head != NULL)
16479 minipool_fix_tail->next = fix;
16480 else
16481 minipool_fix_head = fix;
16483 minipool_fix_tail = fix;
16486 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16487 Returns the number of insns needed, or 99 if we always want to synthesize
16488 the value. */
16490 arm_max_const_double_inline_cost ()
16492 return ((optimize_size || arm_ld_sched) ? 3 : 4);
16495 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16496 Returns the number of insns needed, or 99 if we don't know how to
16497 do it. */
16499 arm_const_double_inline_cost (rtx val)
16501 rtx lowpart, highpart;
16502 machine_mode mode;
16504 mode = GET_MODE (val);
16506 if (mode == VOIDmode)
16507 mode = DImode;
16509 gcc_assert (GET_MODE_SIZE (mode) == 8);
16511 lowpart = gen_lowpart (SImode, val);
16512 highpart = gen_highpart_mode (SImode, mode, val);
16514 gcc_assert (CONST_INT_P (lowpart));
16515 gcc_assert (CONST_INT_P (highpart));
16517 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16518 NULL_RTX, NULL_RTX, 0, 0)
16519 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16520 NULL_RTX, NULL_RTX, 0, 0));
16523 /* Cost of loading a SImode constant. */
16524 static inline int
16525 arm_const_inline_cost (enum rtx_code code, rtx val)
16527 return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
16528 NULL_RTX, NULL_RTX, 1, 0);
16531 /* Return true if it is worthwhile to split a 64-bit constant into two
16532 32-bit operations. This is the case if optimizing for size, or
16533 if we have load delay slots, or if one 32-bit part can be done with
16534 a single data operation. */
16535 bool
16536 arm_const_double_by_parts (rtx val)
16538 machine_mode mode = GET_MODE (val);
16539 rtx part;
16541 if (optimize_size || arm_ld_sched)
16542 return true;
16544 if (mode == VOIDmode)
16545 mode = DImode;
16547 part = gen_highpart_mode (SImode, mode, val);
16549 gcc_assert (CONST_INT_P (part));
16551 if (const_ok_for_arm (INTVAL (part))
16552 || const_ok_for_arm (~INTVAL (part)))
16553 return true;
16555 part = gen_lowpart (SImode, val);
16557 gcc_assert (CONST_INT_P (part));
16559 if (const_ok_for_arm (INTVAL (part))
16560 || const_ok_for_arm (~INTVAL (part)))
16561 return true;
16563 return false;
16566 /* Return true if it is possible to inline both the high and low parts
16567 of a 64-bit constant into 32-bit data processing instructions. */
16568 bool
16569 arm_const_double_by_immediates (rtx val)
16571 machine_mode mode = GET_MODE (val);
16572 rtx part;
16574 if (mode == VOIDmode)
16575 mode = DImode;
16577 part = gen_highpart_mode (SImode, mode, val);
16579 gcc_assert (CONST_INT_P (part));
16581 if (!const_ok_for_arm (INTVAL (part)))
16582 return false;
16584 part = gen_lowpart (SImode, val);
16586 gcc_assert (CONST_INT_P (part));
16588 if (!const_ok_for_arm (INTVAL (part)))
16589 return false;
16591 return true;
16594 /* Scan INSN and note any of its operands that need fixing.
16595 If DO_PUSHES is false we do not actually push any of the fixups
16596 needed. */
16597 static void
16598 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
16600 int opno;
16602 extract_constrain_insn (insn);
16604 if (recog_data.n_alternatives == 0)
16605 return;
16607 /* Fill in recog_op_alt with information about the constraints of
16608 this insn. */
16609 preprocess_constraints (insn);
16611 const operand_alternative *op_alt = which_op_alt ();
16612 for (opno = 0; opno < recog_data.n_operands; opno++)
16614 /* Things we need to fix can only occur in inputs. */
16615 if (recog_data.operand_type[opno] != OP_IN)
16616 continue;
16618 /* If this alternative is a memory reference, then any mention
16619 of constants in this alternative is really to fool reload
16620 into allowing us to accept one there. We need to fix them up
16621 now so that we output the right code. */
16622 if (op_alt[opno].memory_ok)
16624 rtx op = recog_data.operand[opno];
16626 if (CONSTANT_P (op))
16628 if (do_pushes)
16629 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
16630 recog_data.operand_mode[opno], op);
16632 else if (MEM_P (op)
16633 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
16634 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
16636 if (do_pushes)
16638 rtx cop = avoid_constant_pool_reference (op);
16640 /* Casting the address of something to a mode narrower
16641 than a word can cause avoid_constant_pool_reference()
16642 to return the pool reference itself. That's no good to
16643 us here. Lets just hope that we can use the
16644 constant pool value directly. */
16645 if (op == cop)
16646 cop = get_pool_constant (XEXP (op, 0));
16648 push_minipool_fix (insn, address,
16649 recog_data.operand_loc[opno],
16650 recog_data.operand_mode[opno], cop);
16657 return;
16660 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
16661 and unions in the context of ARMv8-M Security Extensions. It is used as a
16662 helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
16663 functions. The PADDING_BITS_TO_CLEAR pointer can be the base to either one
16664 or four masks, depending on whether it is being computed for a
16665 'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
16666 respectively. The tree for the type of the argument or a field within an
16667 argument is passed in ARG_TYPE, the current register this argument or field
16668 starts in is kept in the pointer REGNO and updated accordingly, the bit this
16669 argument or field starts at is passed in STARTING_BIT and the last used bit
16670 is kept in LAST_USED_BIT which is also updated accordingly. */
16672 static unsigned HOST_WIDE_INT
16673 comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
16674 uint32_t * padding_bits_to_clear,
16675 unsigned starting_bit, int * last_used_bit)
16678 unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0;
16680 if (TREE_CODE (arg_type) == RECORD_TYPE)
16682 unsigned current_bit = starting_bit;
16683 tree field;
16684 long int offset, size;
16687 field = TYPE_FIELDS (arg_type);
16688 while (field)
16690 /* The offset within a structure is always an offset from
16691 the start of that structure. Make sure we take that into the
16692 calculation of the register based offset that we use here. */
16693 offset = starting_bit;
16694 offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0);
16695 offset %= 32;
16697 /* This is the actual size of the field, for bitfields this is the
16698 bitfield width and not the container size. */
16699 size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16701 if (*last_used_bit != offset)
16703 if (offset < *last_used_bit)
16705 /* This field's offset is before the 'last_used_bit', that
16706 means this field goes on the next register. So we need to
16707 pad the rest of the current register and increase the
16708 register number. */
16709 uint32_t mask;
16710 mask = ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit);
16711 mask++;
16713 padding_bits_to_clear[*regno] |= mask;
16714 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16715 (*regno)++;
16717 else
16719 /* Otherwise we pad the bits between the last field's end and
16720 the start of the new field. */
16721 uint32_t mask;
16723 mask = ((uint32_t)-1) >> (32 - offset);
16724 mask -= ((uint32_t) 1 << *last_used_bit) - 1;
16725 padding_bits_to_clear[*regno] |= mask;
16727 current_bit = offset;
16730 /* Calculate further padding bits for inner structs/unions too. */
16731 if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)))
16733 *last_used_bit = current_bit;
16734 not_to_clear_reg_mask
16735 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno,
16736 padding_bits_to_clear, offset,
16737 last_used_bit);
16739 else
16741 /* Update 'current_bit' with this field's size. If the
16742 'current_bit' lies in a subsequent register, update 'regno' and
16743 reset 'current_bit' to point to the current bit in that new
16744 register. */
16745 current_bit += size;
16746 while (current_bit >= 32)
16748 current_bit-=32;
16749 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16750 (*regno)++;
16752 *last_used_bit = current_bit;
16755 field = TREE_CHAIN (field);
16757 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16759 else if (TREE_CODE (arg_type) == UNION_TYPE)
16761 tree field, field_t;
16762 int i, regno_t, field_size;
16763 int max_reg = -1;
16764 int max_bit = -1;
16765 uint32_t mask;
16766 uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
16767 = {-1, -1, -1, -1};
16769 /* To compute the padding bits in a union we only consider bits as
16770 padding bits if they are always either a padding bit or fall outside a
16771 fields size for all fields in the union. */
16772 field = TYPE_FIELDS (arg_type);
16773 while (field)
16775 uint32_t padding_bits_to_clear_t[NUM_ARG_REGS]
16776 = {0U, 0U, 0U, 0U};
16777 int last_used_bit_t = *last_used_bit;
16778 regno_t = *regno;
16779 field_t = TREE_TYPE (field);
16781 /* If the field's type is either a record or a union make sure to
16782 compute their padding bits too. */
16783 if (RECORD_OR_UNION_TYPE_P (field_t))
16784 not_to_clear_reg_mask
16785 |= comp_not_to_clear_mask_str_un (field_t, &regno_t,
16786 &padding_bits_to_clear_t[0],
16787 starting_bit, &last_used_bit_t);
16788 else
16790 field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16791 regno_t = (field_size / 32) + *regno;
16792 last_used_bit_t = (starting_bit + field_size) % 32;
16795 for (i = *regno; i < regno_t; i++)
16797 /* For all but the last register used by this field only keep the
16798 padding bits that were padding bits in this field. */
16799 padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i];
16802 /* For the last register, keep all padding bits that were padding
16803 bits in this field and any padding bits that are still valid
16804 as padding bits but fall outside of this field's size. */
16805 mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1;
16806 padding_bits_to_clear_res[regno_t]
16807 &= padding_bits_to_clear_t[regno_t] | mask;
16809 /* Update the maximum size of the fields in terms of registers used
16810 ('max_reg') and the 'last_used_bit' in said register. */
16811 if (max_reg < regno_t)
16813 max_reg = regno_t;
16814 max_bit = last_used_bit_t;
16816 else if (max_reg == regno_t && max_bit < last_used_bit_t)
16817 max_bit = last_used_bit_t;
16819 field = TREE_CHAIN (field);
16822 /* Update the current padding_bits_to_clear using the intersection of the
16823 padding bits of all the fields. */
16824 for (i=*regno; i < max_reg; i++)
16825 padding_bits_to_clear[i] |= padding_bits_to_clear_res[i];
16827 /* Do not keep trailing padding bits, we do not know yet whether this
16828 is the end of the argument. */
16829 mask = ((uint32_t) 1 << max_bit) - 1;
16830 padding_bits_to_clear[max_reg]
16831 |= padding_bits_to_clear_res[max_reg] & mask;
16833 *regno = max_reg;
16834 *last_used_bit = max_bit;
16836 else
16837 /* This function should only be used for structs and unions. */
16838 gcc_unreachable ();
16840 return not_to_clear_reg_mask;
16843 /* In the context of ARMv8-M Security Extensions, this function is used for both
16844 'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
16845 registers are used when returning or passing arguments, which is then
16846 returned as a mask. It will also compute a mask to indicate padding/unused
16847 bits for each of these registers, and passes this through the
16848 PADDING_BITS_TO_CLEAR pointer. The tree of the argument type is passed in
16849 ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
16850 the starting register used to pass this argument or return value is passed
16851 in REGNO. It makes use of 'comp_not_to_clear_mask_str_un' to compute these
16852 for struct and union types. */
16854 static unsigned HOST_WIDE_INT
16855 compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
16856 uint32_t * padding_bits_to_clear)
16859 int last_used_bit = 0;
16860 unsigned HOST_WIDE_INT not_to_clear_mask;
16862 if (RECORD_OR_UNION_TYPE_P (arg_type))
16864 not_to_clear_mask
16865 = comp_not_to_clear_mask_str_un (arg_type, &regno,
16866 padding_bits_to_clear, 0,
16867 &last_used_bit);
16870 /* If the 'last_used_bit' is not zero, that means we are still using a
16871 part of the last 'regno'. In such cases we must clear the trailing
16872 bits. Otherwise we are not using regno and we should mark it as to
16873 clear. */
16874 if (last_used_bit != 0)
16875 padding_bits_to_clear[regno]
16876 |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit) + 1;
16877 else
16878 not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno);
16880 else
16882 not_to_clear_mask = 0;
16883 /* We are not dealing with structs nor unions. So these arguments may be
16884 passed in floating point registers too. In some cases a BLKmode is
16885 used when returning or passing arguments in multiple VFP registers. */
16886 if (GET_MODE (arg_rtx) == BLKmode)
16888 int i, arg_regs;
16889 rtx reg;
16891 /* This should really only occur when dealing with the hard-float
16892 ABI. */
16893 gcc_assert (TARGET_HARD_FLOAT_ABI);
16895 for (i = 0; i < XVECLEN (arg_rtx, 0); i++)
16897 reg = XEXP (XVECEXP (arg_rtx, 0, i), 0);
16898 gcc_assert (REG_P (reg));
16900 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg);
16902 /* If we are dealing with DF mode, make sure we don't
16903 clear either of the registers it addresses. */
16904 arg_regs = ARM_NUM_REGS (GET_MODE (reg));
16905 if (arg_regs > 1)
16907 unsigned HOST_WIDE_INT mask;
16908 mask = HOST_WIDE_INT_1U << (REGNO (reg) + arg_regs);
16909 mask -= HOST_WIDE_INT_1U << REGNO (reg);
16910 not_to_clear_mask |= mask;
16914 else
16916 /* Otherwise we can rely on the MODE to determine how many registers
16917 are being used by this argument. */
16918 int arg_regs = ARM_NUM_REGS (GET_MODE (arg_rtx));
16919 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (arg_rtx);
16920 if (arg_regs > 1)
16922 unsigned HOST_WIDE_INT
16923 mask = HOST_WIDE_INT_1U << (REGNO (arg_rtx) + arg_regs);
16924 mask -= HOST_WIDE_INT_1U << REGNO (arg_rtx);
16925 not_to_clear_mask |= mask;
16930 return not_to_clear_mask;
16933 /* Clears caller saved registers not used to pass arguments before a
16934 cmse_nonsecure_call. Saving, clearing and restoring of callee saved
16935 registers is done in __gnu_cmse_nonsecure_call libcall.
16936 See libgcc/config/arm/cmse_nonsecure_call.S. */
16938 static void
16939 cmse_nonsecure_call_clear_caller_saved (void)
16941 basic_block bb;
16943 FOR_EACH_BB_FN (bb, cfun)
16945 rtx_insn *insn;
16947 FOR_BB_INSNS (bb, insn)
16949 uint64_t to_clear_mask, float_mask;
16950 rtx_insn *seq;
16951 rtx pat, call, unspec, reg, cleared_reg, tmp;
16952 unsigned int regno, maxregno;
16953 rtx address;
16954 CUMULATIVE_ARGS args_so_far_v;
16955 cumulative_args_t args_so_far;
16956 tree arg_type, fntype;
16957 bool using_r4, first_param = true;
16958 function_args_iterator args_iter;
16959 uint32_t padding_bits_to_clear[4] = {0U, 0U, 0U, 0U};
16960 uint32_t * padding_bits_to_clear_ptr = &padding_bits_to_clear[0];
16962 if (!NONDEBUG_INSN_P (insn))
16963 continue;
16965 if (!CALL_P (insn))
16966 continue;
16968 pat = PATTERN (insn);
16969 gcc_assert (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 0);
16970 call = XVECEXP (pat, 0, 0);
16972 /* Get the real call RTX if the insn sets a value, ie. returns. */
16973 if (GET_CODE (call) == SET)
16974 call = SET_SRC (call);
16976 /* Check if it is a cmse_nonsecure_call. */
16977 unspec = XEXP (call, 0);
16978 if (GET_CODE (unspec) != UNSPEC
16979 || XINT (unspec, 1) != UNSPEC_NONSECURE_MEM)
16980 continue;
16982 /* Determine the caller-saved registers we need to clear. */
16983 to_clear_mask = (1LL << (NUM_ARG_REGS)) - 1;
16984 maxregno = NUM_ARG_REGS - 1;
16985 /* Only look at the caller-saved floating point registers in case of
16986 -mfloat-abi=hard. For -mfloat-abi=softfp we will be using the
16987 lazy store and loads which clear both caller- and callee-saved
16988 registers. */
16989 if (TARGET_HARD_FLOAT_ABI)
16991 float_mask = (1LL << (D7_VFP_REGNUM + 1)) - 1;
16992 float_mask &= ~((1LL << FIRST_VFP_REGNUM) - 1);
16993 to_clear_mask |= float_mask;
16994 maxregno = D7_VFP_REGNUM;
16997 /* Make sure the register used to hold the function address is not
16998 cleared. */
16999 address = RTVEC_ELT (XVEC (unspec, 0), 0);
17000 gcc_assert (MEM_P (address));
17001 gcc_assert (REG_P (XEXP (address, 0)));
17002 to_clear_mask &= ~(1LL << REGNO (XEXP (address, 0)));
17004 /* Set basic block of call insn so that df rescan is performed on
17005 insns inserted here. */
17006 set_block_for_insn (insn, bb);
17007 df_set_flags (DF_DEFER_INSN_RESCAN);
17008 start_sequence ();
17010 /* Make sure the scheduler doesn't schedule other insns beyond
17011 here. */
17012 emit_insn (gen_blockage ());
17014 /* Walk through all arguments and clear registers appropriately.
17016 fntype = TREE_TYPE (MEM_EXPR (address));
17017 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX,
17018 NULL_TREE);
17019 args_so_far = pack_cumulative_args (&args_so_far_v);
17020 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
17022 rtx arg_rtx;
17023 machine_mode arg_mode = TYPE_MODE (arg_type);
17025 if (VOID_TYPE_P (arg_type))
17026 continue;
17028 if (!first_param)
17029 arm_function_arg_advance (args_so_far, arg_mode, arg_type,
17030 true);
17032 arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type,
17033 true);
17034 gcc_assert (REG_P (arg_rtx));
17035 to_clear_mask
17036 &= ~compute_not_to_clear_mask (arg_type, arg_rtx,
17037 REGNO (arg_rtx),
17038 padding_bits_to_clear_ptr);
17040 first_param = false;
17043 /* Clear padding bits where needed. */
17044 cleared_reg = XEXP (address, 0);
17045 reg = gen_rtx_REG (SImode, IP_REGNUM);
17046 using_r4 = false;
17047 for (regno = R0_REGNUM; regno < NUM_ARG_REGS; regno++)
17049 if (padding_bits_to_clear[regno] == 0)
17050 continue;
17052 /* If this is a Thumb-1 target copy the address of the function
17053 we are calling from 'r4' into 'ip' such that we can use r4 to
17054 clear the unused bits in the arguments. */
17055 if (TARGET_THUMB1 && !using_r4)
17057 using_r4 = true;
17058 reg = cleared_reg;
17059 emit_move_insn (gen_rtx_REG (SImode, IP_REGNUM),
17060 reg);
17063 tmp = GEN_INT ((((~padding_bits_to_clear[regno]) << 16u) >> 16u));
17064 emit_move_insn (reg, tmp);
17065 /* Also fill the top half of the negated
17066 padding_bits_to_clear. */
17067 if (((~padding_bits_to_clear[regno]) >> 16) > 0)
17069 tmp = GEN_INT ((~padding_bits_to_clear[regno]) >> 16);
17070 emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode, reg,
17071 GEN_INT (16),
17072 GEN_INT (16)),
17073 tmp));
17076 emit_insn (gen_andsi3 (gen_rtx_REG (SImode, regno),
17077 gen_rtx_REG (SImode, regno),
17078 reg));
17081 if (using_r4)
17082 emit_move_insn (cleared_reg,
17083 gen_rtx_REG (SImode, IP_REGNUM));
17085 /* We use right shift and left shift to clear the LSB of the address
17086 we jump to instead of using bic, to avoid having to use an extra
17087 register on Thumb-1. */
17088 tmp = gen_rtx_LSHIFTRT (SImode, cleared_reg, const1_rtx);
17089 emit_insn (gen_rtx_SET (cleared_reg, tmp));
17090 tmp = gen_rtx_ASHIFT (SImode, cleared_reg, const1_rtx);
17091 emit_insn (gen_rtx_SET (cleared_reg, tmp));
17093 /* Clearing all registers that leak before doing a non-secure
17094 call. */
17095 for (regno = R0_REGNUM; regno <= maxregno; regno++)
17097 if (!(to_clear_mask & (1LL << regno)))
17098 continue;
17100 /* If regno is an even vfp register and its successor is also to
17101 be cleared, use vmov. */
17102 if (IS_VFP_REGNUM (regno))
17104 if (TARGET_VFP_DOUBLE
17105 && VFP_REGNO_OK_FOR_DOUBLE (regno)
17106 && to_clear_mask & (1LL << (regno + 1)))
17107 emit_move_insn (gen_rtx_REG (DFmode, regno++),
17108 CONST0_RTX (DFmode));
17109 else
17110 emit_move_insn (gen_rtx_REG (SFmode, regno),
17111 CONST0_RTX (SFmode));
17113 else
17114 emit_move_insn (gen_rtx_REG (SImode, regno), cleared_reg);
17117 seq = get_insns ();
17118 end_sequence ();
17119 emit_insn_before (seq, insn);
17125 /* Rewrite move insn into subtract of 0 if the condition codes will
17126 be useful in next conditional jump insn. */
17128 static void
17129 thumb1_reorg (void)
17131 basic_block bb;
17133 FOR_EACH_BB_FN (bb, cfun)
17135 rtx dest, src;
17136 rtx cmp, op0, op1, set = NULL;
17137 rtx_insn *prev, *insn = BB_END (bb);
17138 bool insn_clobbered = false;
17140 while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17141 insn = PREV_INSN (insn);
17143 /* Find the last cbranchsi4_insn in basic block BB. */
17144 if (insn == BB_HEAD (bb)
17145 || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17146 continue;
17148 /* Get the register with which we are comparing. */
17149 cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
17150 op0 = XEXP (cmp, 0);
17151 op1 = XEXP (cmp, 1);
17153 /* Check that comparison is against ZERO. */
17154 if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
17155 continue;
17157 /* Find the first flag setting insn before INSN in basic block BB. */
17158 gcc_assert (insn != BB_HEAD (bb));
17159 for (prev = PREV_INSN (insn);
17160 (!insn_clobbered
17161 && prev != BB_HEAD (bb)
17162 && (NOTE_P (prev)
17163 || DEBUG_INSN_P (prev)
17164 || ((set = single_set (prev)) != NULL
17165 && get_attr_conds (prev) == CONDS_NOCOND)));
17166 prev = PREV_INSN (prev))
17168 if (reg_set_p (op0, prev))
17169 insn_clobbered = true;
17172 /* Skip if op0 is clobbered by insn other than prev. */
17173 if (insn_clobbered)
17174 continue;
17176 if (!set)
17177 continue;
17179 dest = SET_DEST (set);
17180 src = SET_SRC (set);
17181 if (!low_register_operand (dest, SImode)
17182 || !low_register_operand (src, SImode))
17183 continue;
17185 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17186 in INSN. Both src and dest of the move insn are checked. */
17187 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17189 dest = copy_rtx (dest);
17190 src = copy_rtx (src);
17191 src = gen_rtx_MINUS (SImode, src, const0_rtx);
17192 PATTERN (prev) = gen_rtx_SET (dest, src);
17193 INSN_CODE (prev) = -1;
17194 /* Set test register in INSN to dest. */
17195 XEXP (cmp, 0) = copy_rtx (dest);
17196 INSN_CODE (insn) = -1;
17201 /* Convert instructions to their cc-clobbering variant if possible, since
17202 that allows us to use smaller encodings. */
17204 static void
17205 thumb2_reorg (void)
17207 basic_block bb;
17208 regset_head live;
17210 INIT_REG_SET (&live);
17212 /* We are freeing block_for_insn in the toplev to keep compatibility
17213 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17214 compute_bb_for_insn ();
17215 df_analyze ();
17217 enum Convert_Action {SKIP, CONV, SWAP_CONV};
17219 FOR_EACH_BB_FN (bb, cfun)
17221 if ((current_tune->disparage_flag_setting_t16_encodings
17222 == tune_params::DISPARAGE_FLAGS_ALL)
17223 && optimize_bb_for_speed_p (bb))
17224 continue;
17226 rtx_insn *insn;
17227 Convert_Action action = SKIP;
17228 Convert_Action action_for_partial_flag_setting
17229 = ((current_tune->disparage_flag_setting_t16_encodings
17230 != tune_params::DISPARAGE_FLAGS_NEITHER)
17231 && optimize_bb_for_speed_p (bb))
17232 ? SKIP : CONV;
17234 COPY_REG_SET (&live, DF_LR_OUT (bb));
17235 df_simulate_initialize_backwards (bb, &live);
17236 FOR_BB_INSNS_REVERSE (bb, insn)
17238 if (NONJUMP_INSN_P (insn)
17239 && !REGNO_REG_SET_P (&live, CC_REGNUM)
17240 && GET_CODE (PATTERN (insn)) == SET)
17242 action = SKIP;
17243 rtx pat = PATTERN (insn);
17244 rtx dst = XEXP (pat, 0);
17245 rtx src = XEXP (pat, 1);
17246 rtx op0 = NULL_RTX, op1 = NULL_RTX;
17248 if (UNARY_P (src) || BINARY_P (src))
17249 op0 = XEXP (src, 0);
17251 if (BINARY_P (src))
17252 op1 = XEXP (src, 1);
17254 if (low_register_operand (dst, SImode))
17256 switch (GET_CODE (src))
17258 case PLUS:
17259 /* Adding two registers and storing the result
17260 in the first source is already a 16-bit
17261 operation. */
17262 if (rtx_equal_p (dst, op0)
17263 && register_operand (op1, SImode))
17264 break;
17266 if (low_register_operand (op0, SImode))
17268 /* ADDS <Rd>,<Rn>,<Rm> */
17269 if (low_register_operand (op1, SImode))
17270 action = CONV;
17271 /* ADDS <Rdn>,#<imm8> */
17272 /* SUBS <Rdn>,#<imm8> */
17273 else if (rtx_equal_p (dst, op0)
17274 && CONST_INT_P (op1)
17275 && IN_RANGE (INTVAL (op1), -255, 255))
17276 action = CONV;
17277 /* ADDS <Rd>,<Rn>,#<imm3> */
17278 /* SUBS <Rd>,<Rn>,#<imm3> */
17279 else if (CONST_INT_P (op1)
17280 && IN_RANGE (INTVAL (op1), -7, 7))
17281 action = CONV;
17283 /* ADCS <Rd>, <Rn> */
17284 else if (GET_CODE (XEXP (src, 0)) == PLUS
17285 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17286 && low_register_operand (XEXP (XEXP (src, 0), 1),
17287 SImode)
17288 && COMPARISON_P (op1)
17289 && cc_register (XEXP (op1, 0), VOIDmode)
17290 && maybe_get_arm_condition_code (op1) == ARM_CS
17291 && XEXP (op1, 1) == const0_rtx)
17292 action = CONV;
17293 break;
17295 case MINUS:
17296 /* RSBS <Rd>,<Rn>,#0
17297 Not handled here: see NEG below. */
17298 /* SUBS <Rd>,<Rn>,#<imm3>
17299 SUBS <Rdn>,#<imm8>
17300 Not handled here: see PLUS above. */
17301 /* SUBS <Rd>,<Rn>,<Rm> */
17302 if (low_register_operand (op0, SImode)
17303 && low_register_operand (op1, SImode))
17304 action = CONV;
17305 break;
17307 case MULT:
17308 /* MULS <Rdm>,<Rn>,<Rdm>
17309 As an exception to the rule, this is only used
17310 when optimizing for size since MULS is slow on all
17311 known implementations. We do not even want to use
17312 MULS in cold code, if optimizing for speed, so we
17313 test the global flag here. */
17314 if (!optimize_size)
17315 break;
17316 /* Fall through. */
17317 case AND:
17318 case IOR:
17319 case XOR:
17320 /* ANDS <Rdn>,<Rm> */
17321 if (rtx_equal_p (dst, op0)
17322 && low_register_operand (op1, SImode))
17323 action = action_for_partial_flag_setting;
17324 else if (rtx_equal_p (dst, op1)
17325 && low_register_operand (op0, SImode))
17326 action = action_for_partial_flag_setting == SKIP
17327 ? SKIP : SWAP_CONV;
17328 break;
17330 case ASHIFTRT:
17331 case ASHIFT:
17332 case LSHIFTRT:
17333 /* ASRS <Rdn>,<Rm> */
17334 /* LSRS <Rdn>,<Rm> */
17335 /* LSLS <Rdn>,<Rm> */
17336 if (rtx_equal_p (dst, op0)
17337 && low_register_operand (op1, SImode))
17338 action = action_for_partial_flag_setting;
17339 /* ASRS <Rd>,<Rm>,#<imm5> */
17340 /* LSRS <Rd>,<Rm>,#<imm5> */
17341 /* LSLS <Rd>,<Rm>,#<imm5> */
17342 else if (low_register_operand (op0, SImode)
17343 && CONST_INT_P (op1)
17344 && IN_RANGE (INTVAL (op1), 0, 31))
17345 action = action_for_partial_flag_setting;
17346 break;
17348 case ROTATERT:
17349 /* RORS <Rdn>,<Rm> */
17350 if (rtx_equal_p (dst, op0)
17351 && low_register_operand (op1, SImode))
17352 action = action_for_partial_flag_setting;
17353 break;
17355 case NOT:
17356 /* MVNS <Rd>,<Rm> */
17357 if (low_register_operand (op0, SImode))
17358 action = action_for_partial_flag_setting;
17359 break;
17361 case NEG:
17362 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17363 if (low_register_operand (op0, SImode))
17364 action = CONV;
17365 break;
17367 case CONST_INT:
17368 /* MOVS <Rd>,#<imm8> */
17369 if (CONST_INT_P (src)
17370 && IN_RANGE (INTVAL (src), 0, 255))
17371 action = action_for_partial_flag_setting;
17372 break;
17374 case REG:
17375 /* MOVS and MOV<c> with registers have different
17376 encodings, so are not relevant here. */
17377 break;
17379 default:
17380 break;
17384 if (action != SKIP)
17386 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17387 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17388 rtvec vec;
17390 if (action == SWAP_CONV)
17392 src = copy_rtx (src);
17393 XEXP (src, 0) = op1;
17394 XEXP (src, 1) = op0;
17395 pat = gen_rtx_SET (dst, src);
17396 vec = gen_rtvec (2, pat, clobber);
17398 else /* action == CONV */
17399 vec = gen_rtvec (2, pat, clobber);
17401 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17402 INSN_CODE (insn) = -1;
17406 if (NONDEBUG_INSN_P (insn))
17407 df_simulate_one_insn_backwards (bb, insn, &live);
17411 CLEAR_REG_SET (&live);
17414 /* Gcc puts the pool in the wrong place for ARM, since we can only
17415 load addresses a limited distance around the pc. We do some
17416 special munging to move the constant pool values to the correct
17417 point in the code. */
17418 static void
17419 arm_reorg (void)
17421 rtx_insn *insn;
17422 HOST_WIDE_INT address = 0;
17423 Mfix * fix;
17425 if (use_cmse)
17426 cmse_nonsecure_call_clear_caller_saved ();
17427 if (TARGET_THUMB1)
17428 thumb1_reorg ();
17429 else if (TARGET_THUMB2)
17430 thumb2_reorg ();
17432 /* Ensure all insns that must be split have been split at this point.
17433 Otherwise, the pool placement code below may compute incorrect
17434 insn lengths. Note that when optimizing, all insns have already
17435 been split at this point. */
17436 if (!optimize)
17437 split_all_insns_noflow ();
17439 /* Make sure we do not attempt to create a literal pool even though it should
17440 no longer be necessary to create any. */
17441 if (arm_disable_literal_pool)
17442 return ;
17444 minipool_fix_head = minipool_fix_tail = NULL;
17446 /* The first insn must always be a note, or the code below won't
17447 scan it properly. */
17448 insn = get_insns ();
17449 gcc_assert (NOTE_P (insn));
17450 minipool_pad = 0;
17452 /* Scan all the insns and record the operands that will need fixing. */
17453 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17455 if (BARRIER_P (insn))
17456 push_minipool_barrier (insn, address);
17457 else if (INSN_P (insn))
17459 rtx_jump_table_data *table;
17461 note_invalid_constants (insn, address, true);
17462 address += get_attr_length (insn);
17464 /* If the insn is a vector jump, add the size of the table
17465 and skip the table. */
17466 if (tablejump_p (insn, NULL, &table))
17468 address += get_jump_table_size (table);
17469 insn = table;
17472 else if (LABEL_P (insn))
17473 /* Add the worst-case padding due to alignment. We don't add
17474 the _current_ padding because the minipool insertions
17475 themselves might change it. */
17476 address += get_label_padding (insn);
17479 fix = minipool_fix_head;
17481 /* Now scan the fixups and perform the required changes. */
17482 while (fix)
17484 Mfix * ftmp;
17485 Mfix * fdel;
17486 Mfix * last_added_fix;
17487 Mfix * last_barrier = NULL;
17488 Mfix * this_fix;
17490 /* Skip any further barriers before the next fix. */
17491 while (fix && BARRIER_P (fix->insn))
17492 fix = fix->next;
17494 /* No more fixes. */
17495 if (fix == NULL)
17496 break;
17498 last_added_fix = NULL;
17500 for (ftmp = fix; ftmp; ftmp = ftmp->next)
17502 if (BARRIER_P (ftmp->insn))
17504 if (ftmp->address >= minipool_vector_head->max_address)
17505 break;
17507 last_barrier = ftmp;
17509 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17510 break;
17512 last_added_fix = ftmp; /* Keep track of the last fix added. */
17515 /* If we found a barrier, drop back to that; any fixes that we
17516 could have reached but come after the barrier will now go in
17517 the next mini-pool. */
17518 if (last_barrier != NULL)
17520 /* Reduce the refcount for those fixes that won't go into this
17521 pool after all. */
17522 for (fdel = last_barrier->next;
17523 fdel && fdel != ftmp;
17524 fdel = fdel->next)
17526 fdel->minipool->refcount--;
17527 fdel->minipool = NULL;
17530 ftmp = last_barrier;
17532 else
17534 /* ftmp is first fix that we can't fit into this pool and
17535 there no natural barriers that we could use. Insert a
17536 new barrier in the code somewhere between the previous
17537 fix and this one, and arrange to jump around it. */
17538 HOST_WIDE_INT max_address;
17540 /* The last item on the list of fixes must be a barrier, so
17541 we can never run off the end of the list of fixes without
17542 last_barrier being set. */
17543 gcc_assert (ftmp);
17545 max_address = minipool_vector_head->max_address;
17546 /* Check that there isn't another fix that is in range that
17547 we couldn't fit into this pool because the pool was
17548 already too large: we need to put the pool before such an
17549 instruction. The pool itself may come just after the
17550 fix because create_fix_barrier also allows space for a
17551 jump instruction. */
17552 if (ftmp->address < max_address)
17553 max_address = ftmp->address + 1;
17555 last_barrier = create_fix_barrier (last_added_fix, max_address);
17558 assign_minipool_offsets (last_barrier);
17560 while (ftmp)
17562 if (!BARRIER_P (ftmp->insn)
17563 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17564 == NULL))
17565 break;
17567 ftmp = ftmp->next;
17570 /* Scan over the fixes we have identified for this pool, fixing them
17571 up and adding the constants to the pool itself. */
17572 for (this_fix = fix; this_fix && ftmp != this_fix;
17573 this_fix = this_fix->next)
17574 if (!BARRIER_P (this_fix->insn))
17576 rtx addr
17577 = plus_constant (Pmode,
17578 gen_rtx_LABEL_REF (VOIDmode,
17579 minipool_vector_label),
17580 this_fix->minipool->offset);
17581 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17584 dump_minipool (last_barrier->insn);
17585 fix = ftmp;
17588 /* From now on we must synthesize any constants that we can't handle
17589 directly. This can happen if the RTL gets split during final
17590 instruction generation. */
17591 cfun->machine->after_arm_reorg = 1;
17593 /* Free the minipool memory. */
17594 obstack_free (&minipool_obstack, minipool_startobj);
17597 /* Routines to output assembly language. */
17599 /* Return string representation of passed in real value. */
17600 static const char *
17601 fp_const_from_val (REAL_VALUE_TYPE *r)
17603 if (!fp_consts_inited)
17604 init_fp_table ();
17606 gcc_assert (real_equal (r, &value_fp0));
17607 return "0";
17610 /* OPERANDS[0] is the entire list of insns that constitute pop,
17611 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17612 is in the list, UPDATE is true iff the list contains explicit
17613 update of base register. */
17614 void
17615 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17616 bool update)
17618 int i;
17619 char pattern[100];
17620 int offset;
17621 const char *conditional;
17622 int num_saves = XVECLEN (operands[0], 0);
17623 unsigned int regno;
17624 unsigned int regno_base = REGNO (operands[1]);
17625 bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
17627 offset = 0;
17628 offset += update ? 1 : 0;
17629 offset += return_pc ? 1 : 0;
17631 /* Is the base register in the list? */
17632 for (i = offset; i < num_saves; i++)
17634 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17635 /* If SP is in the list, then the base register must be SP. */
17636 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17637 /* If base register is in the list, there must be no explicit update. */
17638 if (regno == regno_base)
17639 gcc_assert (!update);
17642 conditional = reverse ? "%?%D0" : "%?%d0";
17643 /* Can't use POP if returning from an interrupt. */
17644 if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
17645 sprintf (pattern, "pop%s\t{", conditional);
17646 else
17648 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17649 It's just a convention, their semantics are identical. */
17650 if (regno_base == SP_REGNUM)
17651 sprintf (pattern, "ldmfd%s\t", conditional);
17652 else if (update)
17653 sprintf (pattern, "ldmia%s\t", conditional);
17654 else
17655 sprintf (pattern, "ldm%s\t", conditional);
17657 strcat (pattern, reg_names[regno_base]);
17658 if (update)
17659 strcat (pattern, "!, {");
17660 else
17661 strcat (pattern, ", {");
17664 /* Output the first destination register. */
17665 strcat (pattern,
17666 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17668 /* Output the rest of the destination registers. */
17669 for (i = offset + 1; i < num_saves; i++)
17671 strcat (pattern, ", ");
17672 strcat (pattern,
17673 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17676 strcat (pattern, "}");
17678 if (interrupt_p && return_pc)
17679 strcat (pattern, "^");
17681 output_asm_insn (pattern, &cond);
17685 /* Output the assembly for a store multiple. */
17687 const char *
17688 vfp_output_vstmd (rtx * operands)
17690 char pattern[100];
17691 int p;
17692 int base;
17693 int i;
17694 rtx addr_reg = REG_P (XEXP (operands[0], 0))
17695 ? XEXP (operands[0], 0)
17696 : XEXP (XEXP (operands[0], 0), 0);
17697 bool push_p = REGNO (addr_reg) == SP_REGNUM;
17699 if (push_p)
17700 strcpy (pattern, "vpush%?.64\t{%P1");
17701 else
17702 strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
17704 p = strlen (pattern);
17706 gcc_assert (REG_P (operands[1]));
17708 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17709 for (i = 1; i < XVECLEN (operands[2], 0); i++)
17711 p += sprintf (&pattern[p], ", d%d", base + i);
17713 strcpy (&pattern[p], "}");
17715 output_asm_insn (pattern, operands);
17716 return "";
17720 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17721 number of bytes pushed. */
17723 static int
17724 vfp_emit_fstmd (int base_reg, int count)
17726 rtx par;
17727 rtx dwarf;
17728 rtx tmp, reg;
17729 int i;
17731 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17732 register pairs are stored by a store multiple insn. We avoid this
17733 by pushing an extra pair. */
17734 if (count == 2 && !arm_arch6)
17736 if (base_reg == LAST_VFP_REGNUM - 3)
17737 base_reg -= 2;
17738 count++;
17741 /* FSTMD may not store more than 16 doubleword registers at once. Split
17742 larger stores into multiple parts (up to a maximum of two, in
17743 practice). */
17744 if (count > 16)
17746 int saved;
17747 /* NOTE: base_reg is an internal register number, so each D register
17748 counts as 2. */
17749 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17750 saved += vfp_emit_fstmd (base_reg, 16);
17751 return saved;
17754 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17755 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17757 reg = gen_rtx_REG (DFmode, base_reg);
17758 base_reg += 2;
17760 XVECEXP (par, 0, 0)
17761 = gen_rtx_SET (gen_frame_mem
17762 (BLKmode,
17763 gen_rtx_PRE_MODIFY (Pmode,
17764 stack_pointer_rtx,
17765 plus_constant
17766 (Pmode, stack_pointer_rtx,
17767 - (count * 8)))
17769 gen_rtx_UNSPEC (BLKmode,
17770 gen_rtvec (1, reg),
17771 UNSPEC_PUSH_MULT));
17773 tmp = gen_rtx_SET (stack_pointer_rtx,
17774 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17775 RTX_FRAME_RELATED_P (tmp) = 1;
17776 XVECEXP (dwarf, 0, 0) = tmp;
17778 tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
17779 RTX_FRAME_RELATED_P (tmp) = 1;
17780 XVECEXP (dwarf, 0, 1) = tmp;
17782 for (i = 1; i < count; i++)
17784 reg = gen_rtx_REG (DFmode, base_reg);
17785 base_reg += 2;
17786 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
17788 tmp = gen_rtx_SET (gen_frame_mem (DFmode,
17789 plus_constant (Pmode,
17790 stack_pointer_rtx,
17791 i * 8)),
17792 reg);
17793 RTX_FRAME_RELATED_P (tmp) = 1;
17794 XVECEXP (dwarf, 0, i + 1) = tmp;
17797 par = emit_insn (par);
17798 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17799 RTX_FRAME_RELATED_P (par) = 1;
17801 return count * 8;
17804 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
17805 has the cmse_nonsecure_call attribute and returns false otherwise. */
17807 bool
17808 detect_cmse_nonsecure_call (tree addr)
17810 if (!addr)
17811 return FALSE;
17813 tree fntype = TREE_TYPE (addr);
17814 if (use_cmse && lookup_attribute ("cmse_nonsecure_call",
17815 TYPE_ATTRIBUTES (fntype)))
17816 return TRUE;
17817 return FALSE;
17821 /* Emit a call instruction with pattern PAT. ADDR is the address of
17822 the call target. */
17824 void
17825 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
17827 rtx insn;
17829 insn = emit_call_insn (pat);
17831 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17832 If the call might use such an entry, add a use of the PIC register
17833 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17834 if (TARGET_VXWORKS_RTP
17835 && flag_pic
17836 && !sibcall
17837 && GET_CODE (addr) == SYMBOL_REF
17838 && (SYMBOL_REF_DECL (addr)
17839 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
17840 : !SYMBOL_REF_LOCAL_P (addr)))
17842 require_pic_register ();
17843 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
17846 if (TARGET_AAPCS_BASED)
17848 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17849 linker. We need to add an IP clobber to allow setting
17850 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
17851 is not needed since it's a fixed register. */
17852 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
17853 clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
17857 /* Output a 'call' insn. */
17858 const char *
17859 output_call (rtx *operands)
17861 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
17863 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17864 if (REGNO (operands[0]) == LR_REGNUM)
17866 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
17867 output_asm_insn ("mov%?\t%0, %|lr", operands);
17870 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17872 if (TARGET_INTERWORK || arm_arch4t)
17873 output_asm_insn ("bx%?\t%0", operands);
17874 else
17875 output_asm_insn ("mov%?\t%|pc, %0", operands);
17877 return "";
17880 /* Output a move from arm registers to arm registers of a long double
17881 OPERANDS[0] is the destination.
17882 OPERANDS[1] is the source. */
17883 const char *
17884 output_mov_long_double_arm_from_arm (rtx *operands)
17886 /* We have to be careful here because the two might overlap. */
17887 int dest_start = REGNO (operands[0]);
17888 int src_start = REGNO (operands[1]);
17889 rtx ops[2];
17890 int i;
17892 if (dest_start < src_start)
17894 for (i = 0; i < 3; i++)
17896 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17897 ops[1] = gen_rtx_REG (SImode, src_start + i);
17898 output_asm_insn ("mov%?\t%0, %1", ops);
17901 else
17903 for (i = 2; i >= 0; i--)
17905 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17906 ops[1] = gen_rtx_REG (SImode, src_start + i);
17907 output_asm_insn ("mov%?\t%0, %1", ops);
17911 return "";
17914 void
17915 arm_emit_movpair (rtx dest, rtx src)
17917 /* If the src is an immediate, simplify it. */
17918 if (CONST_INT_P (src))
17920 HOST_WIDE_INT val = INTVAL (src);
17921 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
17922 if ((val >> 16) & 0x0000ffff)
17924 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
17925 GEN_INT (16)),
17926 GEN_INT ((val >> 16) & 0x0000ffff));
17927 rtx_insn *insn = get_last_insn ();
17928 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
17930 return;
17932 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
17933 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
17934 rtx_insn *insn = get_last_insn ();
17935 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
17938 /* Output a move between double words. It must be REG<-MEM
17939 or MEM<-REG. */
17940 const char *
17941 output_move_double (rtx *operands, bool emit, int *count)
17943 enum rtx_code code0 = GET_CODE (operands[0]);
17944 enum rtx_code code1 = GET_CODE (operands[1]);
17945 rtx otherops[3];
17946 if (count)
17947 *count = 1;
17949 /* The only case when this might happen is when
17950 you are looking at the length of a DImode instruction
17951 that has an invalid constant in it. */
17952 if (code0 == REG && code1 != MEM)
17954 gcc_assert (!emit);
17955 *count = 2;
17956 return "";
17959 if (code0 == REG)
17961 unsigned int reg0 = REGNO (operands[0]);
17963 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
17965 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
17967 switch (GET_CODE (XEXP (operands[1], 0)))
17969 case REG:
17971 if (emit)
17973 if (TARGET_LDRD
17974 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
17975 output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
17976 else
17977 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
17979 break;
17981 case PRE_INC:
17982 gcc_assert (TARGET_LDRD);
17983 if (emit)
17984 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
17985 break;
17987 case PRE_DEC:
17988 if (emit)
17990 if (TARGET_LDRD)
17991 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
17992 else
17993 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
17995 break;
17997 case POST_INC:
17998 if (emit)
18000 if (TARGET_LDRD)
18001 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
18002 else
18003 output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
18005 break;
18007 case POST_DEC:
18008 gcc_assert (TARGET_LDRD);
18009 if (emit)
18010 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
18011 break;
18013 case PRE_MODIFY:
18014 case POST_MODIFY:
18015 /* Autoicrement addressing modes should never have overlapping
18016 base and destination registers, and overlapping index registers
18017 are already prohibited, so this doesn't need to worry about
18018 fix_cm3_ldrd. */
18019 otherops[0] = operands[0];
18020 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
18021 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
18023 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
18025 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
18027 /* Registers overlap so split out the increment. */
18028 if (emit)
18030 output_asm_insn ("add%?\t%1, %1, %2", otherops);
18031 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
18033 if (count)
18034 *count = 2;
18036 else
18038 /* Use a single insn if we can.
18039 FIXME: IWMMXT allows offsets larger than ldrd can
18040 handle, fix these up with a pair of ldr. */
18041 if (TARGET_THUMB2
18042 || !CONST_INT_P (otherops[2])
18043 || (INTVAL (otherops[2]) > -256
18044 && INTVAL (otherops[2]) < 256))
18046 if (emit)
18047 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
18049 else
18051 if (emit)
18053 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
18054 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18056 if (count)
18057 *count = 2;
18062 else
18064 /* Use a single insn if we can.
18065 FIXME: IWMMXT allows offsets larger than ldrd can handle,
18066 fix these up with a pair of ldr. */
18067 if (TARGET_THUMB2
18068 || !CONST_INT_P (otherops[2])
18069 || (INTVAL (otherops[2]) > -256
18070 && INTVAL (otherops[2]) < 256))
18072 if (emit)
18073 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
18075 else
18077 if (emit)
18079 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18080 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
18082 if (count)
18083 *count = 2;
18086 break;
18088 case LABEL_REF:
18089 case CONST:
18090 /* We might be able to use ldrd %0, %1 here. However the range is
18091 different to ldr/adr, and it is broken on some ARMv7-M
18092 implementations. */
18093 /* Use the second register of the pair to avoid problematic
18094 overlap. */
18095 otherops[1] = operands[1];
18096 if (emit)
18097 output_asm_insn ("adr%?\t%0, %1", otherops);
18098 operands[1] = otherops[0];
18099 if (emit)
18101 if (TARGET_LDRD)
18102 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18103 else
18104 output_asm_insn ("ldmia%?\t%1, %M0", operands);
18107 if (count)
18108 *count = 2;
18109 break;
18111 /* ??? This needs checking for thumb2. */
18112 default:
18113 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18114 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18116 otherops[0] = operands[0];
18117 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18118 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18120 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18122 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18124 switch ((int) INTVAL (otherops[2]))
18126 case -8:
18127 if (emit)
18128 output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
18129 return "";
18130 case -4:
18131 if (TARGET_THUMB2)
18132 break;
18133 if (emit)
18134 output_asm_insn ("ldmda%?\t%1, %M0", otherops);
18135 return "";
18136 case 4:
18137 if (TARGET_THUMB2)
18138 break;
18139 if (emit)
18140 output_asm_insn ("ldmib%?\t%1, %M0", otherops);
18141 return "";
18144 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18145 operands[1] = otherops[0];
18146 if (TARGET_LDRD
18147 && (REG_P (otherops[2])
18148 || TARGET_THUMB2
18149 || (CONST_INT_P (otherops[2])
18150 && INTVAL (otherops[2]) > -256
18151 && INTVAL (otherops[2]) < 256)))
18153 if (reg_overlap_mentioned_p (operands[0],
18154 otherops[2]))
18156 /* Swap base and index registers over to
18157 avoid a conflict. */
18158 std::swap (otherops[1], otherops[2]);
18160 /* If both registers conflict, it will usually
18161 have been fixed by a splitter. */
18162 if (reg_overlap_mentioned_p (operands[0], otherops[2])
18163 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18165 if (emit)
18167 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18168 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18170 if (count)
18171 *count = 2;
18173 else
18175 otherops[0] = operands[0];
18176 if (emit)
18177 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
18179 return "";
18182 if (CONST_INT_P (otherops[2]))
18184 if (emit)
18186 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18187 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18188 else
18189 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18192 else
18194 if (emit)
18195 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18198 else
18200 if (emit)
18201 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18204 if (count)
18205 *count = 2;
18207 if (TARGET_LDRD)
18208 return "ldrd%?\t%0, [%1]";
18210 return "ldmia%?\t%1, %M0";
18212 else
18214 otherops[1] = adjust_address (operands[1], SImode, 4);
18215 /* Take care of overlapping base/data reg. */
18216 if (reg_mentioned_p (operands[0], operands[1]))
18218 if (emit)
18220 output_asm_insn ("ldr%?\t%0, %1", otherops);
18221 output_asm_insn ("ldr%?\t%0, %1", operands);
18223 if (count)
18224 *count = 2;
18227 else
18229 if (emit)
18231 output_asm_insn ("ldr%?\t%0, %1", operands);
18232 output_asm_insn ("ldr%?\t%0, %1", otherops);
18234 if (count)
18235 *count = 2;
18240 else
18242 /* Constraints should ensure this. */
18243 gcc_assert (code0 == MEM && code1 == REG);
18244 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18245 || (TARGET_ARM && TARGET_LDRD));
18247 switch (GET_CODE (XEXP (operands[0], 0)))
18249 case REG:
18250 if (emit)
18252 if (TARGET_LDRD)
18253 output_asm_insn ("strd%?\t%1, [%m0]", operands);
18254 else
18255 output_asm_insn ("stm%?\t%m0, %M1", operands);
18257 break;
18259 case PRE_INC:
18260 gcc_assert (TARGET_LDRD);
18261 if (emit)
18262 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
18263 break;
18265 case PRE_DEC:
18266 if (emit)
18268 if (TARGET_LDRD)
18269 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
18270 else
18271 output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
18273 break;
18275 case POST_INC:
18276 if (emit)
18278 if (TARGET_LDRD)
18279 output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
18280 else
18281 output_asm_insn ("stm%?\t%m0!, %M1", operands);
18283 break;
18285 case POST_DEC:
18286 gcc_assert (TARGET_LDRD);
18287 if (emit)
18288 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
18289 break;
18291 case PRE_MODIFY:
18292 case POST_MODIFY:
18293 otherops[0] = operands[1];
18294 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18295 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18297 /* IWMMXT allows offsets larger than ldrd can handle,
18298 fix these up with a pair of ldr. */
18299 if (!TARGET_THUMB2
18300 && CONST_INT_P (otherops[2])
18301 && (INTVAL(otherops[2]) <= -256
18302 || INTVAL(otherops[2]) >= 256))
18304 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18306 if (emit)
18308 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18309 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18311 if (count)
18312 *count = 2;
18314 else
18316 if (emit)
18318 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18319 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18321 if (count)
18322 *count = 2;
18325 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18327 if (emit)
18328 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
18330 else
18332 if (emit)
18333 output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
18335 break;
18337 case PLUS:
18338 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18339 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18341 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18343 case -8:
18344 if (emit)
18345 output_asm_insn ("stmdb%?\t%m0, %M1", operands);
18346 return "";
18348 case -4:
18349 if (TARGET_THUMB2)
18350 break;
18351 if (emit)
18352 output_asm_insn ("stmda%?\t%m0, %M1", operands);
18353 return "";
18355 case 4:
18356 if (TARGET_THUMB2)
18357 break;
18358 if (emit)
18359 output_asm_insn ("stmib%?\t%m0, %M1", operands);
18360 return "";
18363 if (TARGET_LDRD
18364 && (REG_P (otherops[2])
18365 || TARGET_THUMB2
18366 || (CONST_INT_P (otherops[2])
18367 && INTVAL (otherops[2]) > -256
18368 && INTVAL (otherops[2]) < 256)))
18370 otherops[0] = operands[1];
18371 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18372 if (emit)
18373 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
18374 return "";
18376 /* Fall through */
18378 default:
18379 otherops[0] = adjust_address (operands[0], SImode, 4);
18380 otherops[1] = operands[1];
18381 if (emit)
18383 output_asm_insn ("str%?\t%1, %0", operands);
18384 output_asm_insn ("str%?\t%H1, %0", otherops);
18386 if (count)
18387 *count = 2;
18391 return "";
18394 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18395 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18397 const char *
18398 output_move_quad (rtx *operands)
18400 if (REG_P (operands[0]))
18402 /* Load, or reg->reg move. */
18404 if (MEM_P (operands[1]))
18406 switch (GET_CODE (XEXP (operands[1], 0)))
18408 case REG:
18409 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18410 break;
18412 case LABEL_REF:
18413 case CONST:
18414 output_asm_insn ("adr%?\t%0, %1", operands);
18415 output_asm_insn ("ldmia%?\t%0, %M0", operands);
18416 break;
18418 default:
18419 gcc_unreachable ();
18422 else
18424 rtx ops[2];
18425 int dest, src, i;
18427 gcc_assert (REG_P (operands[1]));
18429 dest = REGNO (operands[0]);
18430 src = REGNO (operands[1]);
18432 /* This seems pretty dumb, but hopefully GCC won't try to do it
18433 very often. */
18434 if (dest < src)
18435 for (i = 0; i < 4; i++)
18437 ops[0] = gen_rtx_REG (SImode, dest + i);
18438 ops[1] = gen_rtx_REG (SImode, src + i);
18439 output_asm_insn ("mov%?\t%0, %1", ops);
18441 else
18442 for (i = 3; i >= 0; i--)
18444 ops[0] = gen_rtx_REG (SImode, dest + i);
18445 ops[1] = gen_rtx_REG (SImode, src + i);
18446 output_asm_insn ("mov%?\t%0, %1", ops);
18450 else
18452 gcc_assert (MEM_P (operands[0]));
18453 gcc_assert (REG_P (operands[1]));
18454 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18456 switch (GET_CODE (XEXP (operands[0], 0)))
18458 case REG:
18459 output_asm_insn ("stm%?\t%m0, %M1", operands);
18460 break;
18462 default:
18463 gcc_unreachable ();
18467 return "";
18470 /* Output a VFP load or store instruction. */
18472 const char *
18473 output_move_vfp (rtx *operands)
18475 rtx reg, mem, addr, ops[2];
18476 int load = REG_P (operands[0]);
18477 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18478 int sp = (!TARGET_VFP_FP16INST
18479 || GET_MODE_SIZE (GET_MODE (operands[0])) == 4);
18480 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18481 const char *templ;
18482 char buff[50];
18483 machine_mode mode;
18485 reg = operands[!load];
18486 mem = operands[load];
18488 mode = GET_MODE (reg);
18490 gcc_assert (REG_P (reg));
18491 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18492 gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT)
18493 || mode == SFmode
18494 || mode == DFmode
18495 || mode == HImode
18496 || mode == SImode
18497 || mode == DImode
18498 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18499 gcc_assert (MEM_P (mem));
18501 addr = XEXP (mem, 0);
18503 switch (GET_CODE (addr))
18505 case PRE_DEC:
18506 templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18507 ops[0] = XEXP (addr, 0);
18508 ops[1] = reg;
18509 break;
18511 case POST_INC:
18512 templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18513 ops[0] = XEXP (addr, 0);
18514 ops[1] = reg;
18515 break;
18517 default:
18518 templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18519 ops[0] = reg;
18520 ops[1] = mem;
18521 break;
18524 sprintf (buff, templ,
18525 load ? "ld" : "st",
18526 dp ? "64" : sp ? "32" : "16",
18527 dp ? "P" : "",
18528 integer_p ? "\t%@ int" : "");
18529 output_asm_insn (buff, ops);
18531 return "";
18534 /* Output a Neon double-word or quad-word load or store, or a load
18535 or store for larger structure modes.
18537 WARNING: The ordering of elements is weird in big-endian mode,
18538 because the EABI requires that vectors stored in memory appear
18539 as though they were stored by a VSTM, as required by the EABI.
18540 GCC RTL defines element ordering based on in-memory order.
18541 This can be different from the architectural ordering of elements
18542 within a NEON register. The intrinsics defined in arm_neon.h use the
18543 NEON register element ordering, not the GCC RTL element ordering.
18545 For example, the in-memory ordering of a big-endian a quadword
18546 vector with 16-bit elements when stored from register pair {d0,d1}
18547 will be (lowest address first, d0[N] is NEON register element N):
18549 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18551 When necessary, quadword registers (dN, dN+1) are moved to ARM
18552 registers from rN in the order:
18554 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18556 So that STM/LDM can be used on vectors in ARM registers, and the
18557 same memory layout will result as if VSTM/VLDM were used.
18559 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18560 possible, which allows use of appropriate alignment tags.
18561 Note that the choice of "64" is independent of the actual vector
18562 element size; this size simply ensures that the behavior is
18563 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18565 Due to limitations of those instructions, use of VST1.64/VLD1.64
18566 is not possible if:
18567 - the address contains PRE_DEC, or
18568 - the mode refers to more than 4 double-word registers
18570 In those cases, it would be possible to replace VSTM/VLDM by a
18571 sequence of instructions; this is not currently implemented since
18572 this is not certain to actually improve performance. */
18574 const char *
18575 output_move_neon (rtx *operands)
18577 rtx reg, mem, addr, ops[2];
18578 int regno, nregs, load = REG_P (operands[0]);
18579 const char *templ;
18580 char buff[50];
18581 machine_mode mode;
18583 reg = operands[!load];
18584 mem = operands[load];
18586 mode = GET_MODE (reg);
18588 gcc_assert (REG_P (reg));
18589 regno = REGNO (reg);
18590 nregs = HARD_REGNO_NREGS (regno, mode) / 2;
18591 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18592 || NEON_REGNO_OK_FOR_QUAD (regno));
18593 gcc_assert (VALID_NEON_DREG_MODE (mode)
18594 || VALID_NEON_QREG_MODE (mode)
18595 || VALID_NEON_STRUCT_MODE (mode));
18596 gcc_assert (MEM_P (mem));
18598 addr = XEXP (mem, 0);
18600 /* Strip off const from addresses like (const (plus (...))). */
18601 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18602 addr = XEXP (addr, 0);
18604 switch (GET_CODE (addr))
18606 case POST_INC:
18607 /* We have to use vldm / vstm for too-large modes. */
18608 if (nregs > 4)
18610 templ = "v%smia%%?\t%%0!, %%h1";
18611 ops[0] = XEXP (addr, 0);
18613 else
18615 templ = "v%s1.64\t%%h1, %%A0";
18616 ops[0] = mem;
18618 ops[1] = reg;
18619 break;
18621 case PRE_DEC:
18622 /* We have to use vldm / vstm in this case, since there is no
18623 pre-decrement form of the vld1 / vst1 instructions. */
18624 templ = "v%smdb%%?\t%%0!, %%h1";
18625 ops[0] = XEXP (addr, 0);
18626 ops[1] = reg;
18627 break;
18629 case POST_MODIFY:
18630 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18631 gcc_unreachable ();
18633 case REG:
18634 /* We have to use vldm / vstm for too-large modes. */
18635 if (nregs > 1)
18637 if (nregs > 4)
18638 templ = "v%smia%%?\t%%m0, %%h1";
18639 else
18640 templ = "v%s1.64\t%%h1, %%A0";
18642 ops[0] = mem;
18643 ops[1] = reg;
18644 break;
18646 /* Fall through. */
18647 case LABEL_REF:
18648 case PLUS:
18650 int i;
18651 int overlap = -1;
18652 for (i = 0; i < nregs; i++)
18654 /* We're only using DImode here because it's a convenient size. */
18655 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18656 ops[1] = adjust_address (mem, DImode, 8 * i);
18657 if (reg_overlap_mentioned_p (ops[0], mem))
18659 gcc_assert (overlap == -1);
18660 overlap = i;
18662 else
18664 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18665 output_asm_insn (buff, ops);
18668 if (overlap != -1)
18670 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18671 ops[1] = adjust_address (mem, SImode, 8 * overlap);
18672 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18673 output_asm_insn (buff, ops);
18676 return "";
18679 default:
18680 gcc_unreachable ();
18683 sprintf (buff, templ, load ? "ld" : "st");
18684 output_asm_insn (buff, ops);
18686 return "";
18689 /* Compute and return the length of neon_mov<mode>, where <mode> is
18690 one of VSTRUCT modes: EI, OI, CI or XI. */
18692 arm_attr_length_move_neon (rtx_insn *insn)
18694 rtx reg, mem, addr;
18695 int load;
18696 machine_mode mode;
18698 extract_insn_cached (insn);
18700 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18702 mode = GET_MODE (recog_data.operand[0]);
18703 switch (mode)
18705 case E_EImode:
18706 case E_OImode:
18707 return 8;
18708 case E_CImode:
18709 return 12;
18710 case E_XImode:
18711 return 16;
18712 default:
18713 gcc_unreachable ();
18717 load = REG_P (recog_data.operand[0]);
18718 reg = recog_data.operand[!load];
18719 mem = recog_data.operand[load];
18721 gcc_assert (MEM_P (mem));
18723 mode = GET_MODE (reg);
18724 addr = XEXP (mem, 0);
18726 /* Strip off const from addresses like (const (plus (...))). */
18727 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18728 addr = XEXP (addr, 0);
18730 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18732 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
18733 return insns * 4;
18735 else
18736 return 4;
18739 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18740 return zero. */
18743 arm_address_offset_is_imm (rtx_insn *insn)
18745 rtx mem, addr;
18747 extract_insn_cached (insn);
18749 if (REG_P (recog_data.operand[0]))
18750 return 0;
18752 mem = recog_data.operand[0];
18754 gcc_assert (MEM_P (mem));
18756 addr = XEXP (mem, 0);
18758 if (REG_P (addr)
18759 || (GET_CODE (addr) == PLUS
18760 && REG_P (XEXP (addr, 0))
18761 && CONST_INT_P (XEXP (addr, 1))))
18762 return 1;
18763 else
18764 return 0;
18767 /* Output an ADD r, s, #n where n may be too big for one instruction.
18768 If adding zero to one register, output nothing. */
18769 const char *
18770 output_add_immediate (rtx *operands)
18772 HOST_WIDE_INT n = INTVAL (operands[2]);
18774 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18776 if (n < 0)
18777 output_multi_immediate (operands,
18778 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18779 -n);
18780 else
18781 output_multi_immediate (operands,
18782 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18786 return "";
18789 /* Output a multiple immediate operation.
18790 OPERANDS is the vector of operands referred to in the output patterns.
18791 INSTR1 is the output pattern to use for the first constant.
18792 INSTR2 is the output pattern to use for subsequent constants.
18793 IMMED_OP is the index of the constant slot in OPERANDS.
18794 N is the constant value. */
18795 static const char *
18796 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18797 int immed_op, HOST_WIDE_INT n)
18799 #if HOST_BITS_PER_WIDE_INT > 32
18800 n &= 0xffffffff;
18801 #endif
18803 if (n == 0)
18805 /* Quick and easy output. */
18806 operands[immed_op] = const0_rtx;
18807 output_asm_insn (instr1, operands);
18809 else
18811 int i;
18812 const char * instr = instr1;
18814 /* Note that n is never zero here (which would give no output). */
18815 for (i = 0; i < 32; i += 2)
18817 if (n & (3 << i))
18819 operands[immed_op] = GEN_INT (n & (255 << i));
18820 output_asm_insn (instr, operands);
18821 instr = instr2;
18822 i += 6;
18827 return "";
18830 /* Return the name of a shifter operation. */
18831 static const char *
18832 arm_shift_nmem(enum rtx_code code)
18834 switch (code)
18836 case ASHIFT:
18837 return ARM_LSL_NAME;
18839 case ASHIFTRT:
18840 return "asr";
18842 case LSHIFTRT:
18843 return "lsr";
18845 case ROTATERT:
18846 return "ror";
18848 default:
18849 abort();
18853 /* Return the appropriate ARM instruction for the operation code.
18854 The returned result should not be overwritten. OP is the rtx of the
18855 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18856 was shifted. */
18857 const char *
18858 arithmetic_instr (rtx op, int shift_first_arg)
18860 switch (GET_CODE (op))
18862 case PLUS:
18863 return "add";
18865 case MINUS:
18866 return shift_first_arg ? "rsb" : "sub";
18868 case IOR:
18869 return "orr";
18871 case XOR:
18872 return "eor";
18874 case AND:
18875 return "and";
18877 case ASHIFT:
18878 case ASHIFTRT:
18879 case LSHIFTRT:
18880 case ROTATERT:
18881 return arm_shift_nmem(GET_CODE(op));
18883 default:
18884 gcc_unreachable ();
18888 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18889 for the operation code. The returned result should not be overwritten.
18890 OP is the rtx code of the shift.
18891 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18892 shift. */
18893 static const char *
18894 shift_op (rtx op, HOST_WIDE_INT *amountp)
18896 const char * mnem;
18897 enum rtx_code code = GET_CODE (op);
18899 switch (code)
18901 case ROTATE:
18902 if (!CONST_INT_P (XEXP (op, 1)))
18904 output_operand_lossage ("invalid shift operand");
18905 return NULL;
18908 code = ROTATERT;
18909 *amountp = 32 - INTVAL (XEXP (op, 1));
18910 mnem = "ror";
18911 break;
18913 case ASHIFT:
18914 case ASHIFTRT:
18915 case LSHIFTRT:
18916 case ROTATERT:
18917 mnem = arm_shift_nmem(code);
18918 if (CONST_INT_P (XEXP (op, 1)))
18920 *amountp = INTVAL (XEXP (op, 1));
18922 else if (REG_P (XEXP (op, 1)))
18924 *amountp = -1;
18925 return mnem;
18927 else
18929 output_operand_lossage ("invalid shift operand");
18930 return NULL;
18932 break;
18934 case MULT:
18935 /* We never have to worry about the amount being other than a
18936 power of 2, since this case can never be reloaded from a reg. */
18937 if (!CONST_INT_P (XEXP (op, 1)))
18939 output_operand_lossage ("invalid shift operand");
18940 return NULL;
18943 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
18945 /* Amount must be a power of two. */
18946 if (*amountp & (*amountp - 1))
18948 output_operand_lossage ("invalid shift operand");
18949 return NULL;
18952 *amountp = exact_log2 (*amountp);
18953 gcc_assert (IN_RANGE (*amountp, 0, 31));
18954 return ARM_LSL_NAME;
18956 default:
18957 output_operand_lossage ("invalid shift operand");
18958 return NULL;
18961 /* This is not 100% correct, but follows from the desire to merge
18962 multiplication by a power of 2 with the recognizer for a
18963 shift. >=32 is not a valid shift for "lsl", so we must try and
18964 output a shift that produces the correct arithmetical result.
18965 Using lsr #32 is identical except for the fact that the carry bit
18966 is not set correctly if we set the flags; but we never use the
18967 carry bit from such an operation, so we can ignore that. */
18968 if (code == ROTATERT)
18969 /* Rotate is just modulo 32. */
18970 *amountp &= 31;
18971 else if (*amountp != (*amountp & 31))
18973 if (code == ASHIFT)
18974 mnem = "lsr";
18975 *amountp = 32;
18978 /* Shifts of 0 are no-ops. */
18979 if (*amountp == 0)
18980 return NULL;
18982 return mnem;
18985 /* Output a .ascii pseudo-op, keeping track of lengths. This is
18986 because /bin/as is horribly restrictive. The judgement about
18987 whether or not each character is 'printable' (and can be output as
18988 is) or not (and must be printed with an octal escape) must be made
18989 with reference to the *host* character set -- the situation is
18990 similar to that discussed in the comments above pp_c_char in
18991 c-pretty-print.c. */
18993 #define MAX_ASCII_LEN 51
18995 void
18996 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
18998 int i;
18999 int len_so_far = 0;
19001 fputs ("\t.ascii\t\"", stream);
19003 for (i = 0; i < len; i++)
19005 int c = p[i];
19007 if (len_so_far >= MAX_ASCII_LEN)
19009 fputs ("\"\n\t.ascii\t\"", stream);
19010 len_so_far = 0;
19013 if (ISPRINT (c))
19015 if (c == '\\' || c == '\"')
19017 putc ('\\', stream);
19018 len_so_far++;
19020 putc (c, stream);
19021 len_so_far++;
19023 else
19025 fprintf (stream, "\\%03o", c);
19026 len_so_far += 4;
19030 fputs ("\"\n", stream);
19033 /* Whether a register is callee saved or not. This is necessary because high
19034 registers are marked as caller saved when optimizing for size on Thumb-1
19035 targets despite being callee saved in order to avoid using them. */
19036 #define callee_saved_reg_p(reg) \
19037 (!call_used_regs[reg] \
19038 || (TARGET_THUMB1 && optimize_size \
19039 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
19041 /* Compute the register save mask for registers 0 through 12
19042 inclusive. This code is used by arm_compute_save_core_reg_mask (). */
19044 static unsigned long
19045 arm_compute_save_reg0_reg12_mask (void)
19047 unsigned long func_type = arm_current_func_type ();
19048 unsigned long save_reg_mask = 0;
19049 unsigned int reg;
19051 if (IS_INTERRUPT (func_type))
19053 unsigned int max_reg;
19054 /* Interrupt functions must not corrupt any registers,
19055 even call clobbered ones. If this is a leaf function
19056 we can just examine the registers used by the RTL, but
19057 otherwise we have to assume that whatever function is
19058 called might clobber anything, and so we have to save
19059 all the call-clobbered registers as well. */
19060 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
19061 /* FIQ handlers have registers r8 - r12 banked, so
19062 we only need to check r0 - r7, Normal ISRs only
19063 bank r14 and r15, so we must check up to r12.
19064 r13 is the stack pointer which is always preserved,
19065 so we do not need to consider it here. */
19066 max_reg = 7;
19067 else
19068 max_reg = 12;
19070 for (reg = 0; reg <= max_reg; reg++)
19071 if (df_regs_ever_live_p (reg)
19072 || (! crtl->is_leaf && call_used_regs[reg]))
19073 save_reg_mask |= (1 << reg);
19075 /* Also save the pic base register if necessary. */
19076 if (flag_pic
19077 && !TARGET_SINGLE_PIC_BASE
19078 && arm_pic_register != INVALID_REGNUM
19079 && crtl->uses_pic_offset_table)
19080 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19082 else if (IS_VOLATILE(func_type))
19084 /* For noreturn functions we historically omitted register saves
19085 altogether. However this really messes up debugging. As a
19086 compromise save just the frame pointers. Combined with the link
19087 register saved elsewhere this should be sufficient to get
19088 a backtrace. */
19089 if (frame_pointer_needed)
19090 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19091 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
19092 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19093 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
19094 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
19096 else
19098 /* In the normal case we only need to save those registers
19099 which are call saved and which are used by this function. */
19100 for (reg = 0; reg <= 11; reg++)
19101 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19102 save_reg_mask |= (1 << reg);
19104 /* Handle the frame pointer as a special case. */
19105 if (frame_pointer_needed)
19106 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19108 /* If we aren't loading the PIC register,
19109 don't stack it even though it may be live. */
19110 if (flag_pic
19111 && !TARGET_SINGLE_PIC_BASE
19112 && arm_pic_register != INVALID_REGNUM
19113 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19114 || crtl->uses_pic_offset_table))
19115 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19117 /* The prologue will copy SP into R0, so save it. */
19118 if (IS_STACKALIGN (func_type))
19119 save_reg_mask |= 1;
19122 /* Save registers so the exception handler can modify them. */
19123 if (crtl->calls_eh_return)
19125 unsigned int i;
19127 for (i = 0; ; i++)
19129 reg = EH_RETURN_DATA_REGNO (i);
19130 if (reg == INVALID_REGNUM)
19131 break;
19132 save_reg_mask |= 1 << reg;
19136 return save_reg_mask;
19139 /* Return true if r3 is live at the start of the function. */
19141 static bool
19142 arm_r3_live_at_start_p (void)
19144 /* Just look at cfg info, which is still close enough to correct at this
19145 point. This gives false positives for broken functions that might use
19146 uninitialized data that happens to be allocated in r3, but who cares? */
19147 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19150 /* Compute the number of bytes used to store the static chain register on the
19151 stack, above the stack frame. We need to know this accurately to get the
19152 alignment of the rest of the stack frame correct. */
19154 static int
19155 arm_compute_static_chain_stack_bytes (void)
19157 /* See the defining assertion in arm_expand_prologue. */
19158 if (IS_NESTED (arm_current_func_type ())
19159 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19160 || (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
19161 && !df_regs_ever_live_p (LR_REGNUM)))
19162 && arm_r3_live_at_start_p ()
19163 && crtl->args.pretend_args_size == 0)
19164 return 4;
19166 return 0;
19169 /* Compute a bit mask of which core registers need to be
19170 saved on the stack for the current function.
19171 This is used by arm_compute_frame_layout, which may add extra registers. */
19173 static unsigned long
19174 arm_compute_save_core_reg_mask (void)
19176 unsigned int save_reg_mask = 0;
19177 unsigned long func_type = arm_current_func_type ();
19178 unsigned int reg;
19180 if (IS_NAKED (func_type))
19181 /* This should never really happen. */
19182 return 0;
19184 /* If we are creating a stack frame, then we must save the frame pointer,
19185 IP (which will hold the old stack pointer), LR and the PC. */
19186 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19187 save_reg_mask |=
19188 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19189 | (1 << IP_REGNUM)
19190 | (1 << LR_REGNUM)
19191 | (1 << PC_REGNUM);
19193 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19195 /* Decide if we need to save the link register.
19196 Interrupt routines have their own banked link register,
19197 so they never need to save it.
19198 Otherwise if we do not use the link register we do not need to save
19199 it. If we are pushing other registers onto the stack however, we
19200 can save an instruction in the epilogue by pushing the link register
19201 now and then popping it back into the PC. This incurs extra memory
19202 accesses though, so we only do it when optimizing for size, and only
19203 if we know that we will not need a fancy return sequence. */
19204 if (df_regs_ever_live_p (LR_REGNUM)
19205 || (save_reg_mask
19206 && optimize_size
19207 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19208 && !crtl->tail_call_emit
19209 && !crtl->calls_eh_return))
19210 save_reg_mask |= 1 << LR_REGNUM;
19212 if (cfun->machine->lr_save_eliminated)
19213 save_reg_mask &= ~ (1 << LR_REGNUM);
19215 if (TARGET_REALLY_IWMMXT
19216 && ((bit_count (save_reg_mask)
19217 + ARM_NUM_INTS (crtl->args.pretend_args_size +
19218 arm_compute_static_chain_stack_bytes())
19219 ) % 2) != 0)
19221 /* The total number of registers that are going to be pushed
19222 onto the stack is odd. We need to ensure that the stack
19223 is 64-bit aligned before we start to save iWMMXt registers,
19224 and also before we start to create locals. (A local variable
19225 might be a double or long long which we will load/store using
19226 an iWMMXt instruction). Therefore we need to push another
19227 ARM register, so that the stack will be 64-bit aligned. We
19228 try to avoid using the arg registers (r0 -r3) as they might be
19229 used to pass values in a tail call. */
19230 for (reg = 4; reg <= 12; reg++)
19231 if ((save_reg_mask & (1 << reg)) == 0)
19232 break;
19234 if (reg <= 12)
19235 save_reg_mask |= (1 << reg);
19236 else
19238 cfun->machine->sibcall_blocked = 1;
19239 save_reg_mask |= (1 << 3);
19243 /* We may need to push an additional register for use initializing the
19244 PIC base register. */
19245 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19246 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19248 reg = thumb_find_work_register (1 << 4);
19249 if (!call_used_regs[reg])
19250 save_reg_mask |= (1 << reg);
19253 return save_reg_mask;
19256 /* Compute a bit mask of which core registers need to be
19257 saved on the stack for the current function. */
19258 static unsigned long
19259 thumb1_compute_save_core_reg_mask (void)
19261 unsigned long mask;
19262 unsigned reg;
19264 mask = 0;
19265 for (reg = 0; reg < 12; reg ++)
19266 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19267 mask |= 1 << reg;
19269 /* Handle the frame pointer as a special case. */
19270 if (frame_pointer_needed)
19271 mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19273 if (flag_pic
19274 && !TARGET_SINGLE_PIC_BASE
19275 && arm_pic_register != INVALID_REGNUM
19276 && crtl->uses_pic_offset_table)
19277 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19279 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19280 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19281 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19283 /* LR will also be pushed if any lo regs are pushed. */
19284 if (mask & 0xff || thumb_force_lr_save ())
19285 mask |= (1 << LR_REGNUM);
19287 /* Make sure we have a low work register if we need one.
19288 We will need one if we are going to push a high register,
19289 but we are not currently intending to push a low register. */
19290 if ((mask & 0xff) == 0
19291 && ((mask & 0x0f00) || TARGET_BACKTRACE))
19293 /* Use thumb_find_work_register to choose which register
19294 we will use. If the register is live then we will
19295 have to push it. Use LAST_LO_REGNUM as our fallback
19296 choice for the register to select. */
19297 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19298 /* Make sure the register returned by thumb_find_work_register is
19299 not part of the return value. */
19300 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19301 reg = LAST_LO_REGNUM;
19303 if (callee_saved_reg_p (reg))
19304 mask |= 1 << reg;
19307 /* The 504 below is 8 bytes less than 512 because there are two possible
19308 alignment words. We can't tell here if they will be present or not so we
19309 have to play it safe and assume that they are. */
19310 if ((CALLER_INTERWORKING_SLOT_SIZE +
19311 ROUND_UP_WORD (get_frame_size ()) +
19312 crtl->outgoing_args_size) >= 504)
19314 /* This is the same as the code in thumb1_expand_prologue() which
19315 determines which register to use for stack decrement. */
19316 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19317 if (mask & (1 << reg))
19318 break;
19320 if (reg > LAST_LO_REGNUM)
19322 /* Make sure we have a register available for stack decrement. */
19323 mask |= 1 << LAST_LO_REGNUM;
19327 return mask;
19331 /* Return the number of bytes required to save VFP registers. */
19332 static int
19333 arm_get_vfp_saved_size (void)
19335 unsigned int regno;
19336 int count;
19337 int saved;
19339 saved = 0;
19340 /* Space for saved VFP registers. */
19341 if (TARGET_HARD_FLOAT)
19343 count = 0;
19344 for (regno = FIRST_VFP_REGNUM;
19345 regno < LAST_VFP_REGNUM;
19346 regno += 2)
19348 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19349 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19351 if (count > 0)
19353 /* Workaround ARM10 VFPr1 bug. */
19354 if (count == 2 && !arm_arch6)
19355 count++;
19356 saved += count * 8;
19358 count = 0;
19360 else
19361 count++;
19363 if (count > 0)
19365 if (count == 2 && !arm_arch6)
19366 count++;
19367 saved += count * 8;
19370 return saved;
19374 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19375 everything bar the final return instruction. If simple_return is true,
19376 then do not output epilogue, because it has already been emitted in RTL. */
19377 const char *
19378 output_return_instruction (rtx operand, bool really_return, bool reverse,
19379 bool simple_return)
19381 char conditional[10];
19382 char instr[100];
19383 unsigned reg;
19384 unsigned long live_regs_mask;
19385 unsigned long func_type;
19386 arm_stack_offsets *offsets;
19388 func_type = arm_current_func_type ();
19390 if (IS_NAKED (func_type))
19391 return "";
19393 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19395 /* If this function was declared non-returning, and we have
19396 found a tail call, then we have to trust that the called
19397 function won't return. */
19398 if (really_return)
19400 rtx ops[2];
19402 /* Otherwise, trap an attempted return by aborting. */
19403 ops[0] = operand;
19404 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19405 : "abort");
19406 assemble_external_libcall (ops[1]);
19407 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19410 return "";
19413 gcc_assert (!cfun->calls_alloca || really_return);
19415 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19417 cfun->machine->return_used_this_function = 1;
19419 offsets = arm_get_frame_offsets ();
19420 live_regs_mask = offsets->saved_regs_mask;
19422 if (!simple_return && live_regs_mask)
19424 const char * return_reg;
19426 /* If we do not have any special requirements for function exit
19427 (e.g. interworking) then we can load the return address
19428 directly into the PC. Otherwise we must load it into LR. */
19429 if (really_return
19430 && !IS_CMSE_ENTRY (func_type)
19431 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19432 return_reg = reg_names[PC_REGNUM];
19433 else
19434 return_reg = reg_names[LR_REGNUM];
19436 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19438 /* There are three possible reasons for the IP register
19439 being saved. 1) a stack frame was created, in which case
19440 IP contains the old stack pointer, or 2) an ISR routine
19441 corrupted it, or 3) it was saved to align the stack on
19442 iWMMXt. In case 1, restore IP into SP, otherwise just
19443 restore IP. */
19444 if (frame_pointer_needed)
19446 live_regs_mask &= ~ (1 << IP_REGNUM);
19447 live_regs_mask |= (1 << SP_REGNUM);
19449 else
19450 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19453 /* On some ARM architectures it is faster to use LDR rather than
19454 LDM to load a single register. On other architectures, the
19455 cost is the same. In 26 bit mode, or for exception handlers,
19456 we have to use LDM to load the PC so that the CPSR is also
19457 restored. */
19458 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19459 if (live_regs_mask == (1U << reg))
19460 break;
19462 if (reg <= LAST_ARM_REGNUM
19463 && (reg != LR_REGNUM
19464 || ! really_return
19465 || ! IS_INTERRUPT (func_type)))
19467 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19468 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19470 else
19472 char *p;
19473 int first = 1;
19475 /* Generate the load multiple instruction to restore the
19476 registers. Note we can get here, even if
19477 frame_pointer_needed is true, but only if sp already
19478 points to the base of the saved core registers. */
19479 if (live_regs_mask & (1 << SP_REGNUM))
19481 unsigned HOST_WIDE_INT stack_adjust;
19483 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19484 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19486 if (stack_adjust && arm_arch5 && TARGET_ARM)
19487 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19488 else
19490 /* If we can't use ldmib (SA110 bug),
19491 then try to pop r3 instead. */
19492 if (stack_adjust)
19493 live_regs_mask |= 1 << 3;
19495 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19498 /* For interrupt returns we have to use an LDM rather than
19499 a POP so that we can use the exception return variant. */
19500 else if (IS_INTERRUPT (func_type))
19501 sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
19502 else
19503 sprintf (instr, "pop%s\t{", conditional);
19505 p = instr + strlen (instr);
19507 for (reg = 0; reg <= SP_REGNUM; reg++)
19508 if (live_regs_mask & (1 << reg))
19510 int l = strlen (reg_names[reg]);
19512 if (first)
19513 first = 0;
19514 else
19516 memcpy (p, ", ", 2);
19517 p += 2;
19520 memcpy (p, "%|", 2);
19521 memcpy (p + 2, reg_names[reg], l);
19522 p += l + 2;
19525 if (live_regs_mask & (1 << LR_REGNUM))
19527 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19528 /* If returning from an interrupt, restore the CPSR. */
19529 if (IS_INTERRUPT (func_type))
19530 strcat (p, "^");
19532 else
19533 strcpy (p, "}");
19536 output_asm_insn (instr, & operand);
19538 /* See if we need to generate an extra instruction to
19539 perform the actual function return. */
19540 if (really_return
19541 && func_type != ARM_FT_INTERWORKED
19542 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19544 /* The return has already been handled
19545 by loading the LR into the PC. */
19546 return "";
19550 if (really_return)
19552 switch ((int) ARM_FUNC_TYPE (func_type))
19554 case ARM_FT_ISR:
19555 case ARM_FT_FIQ:
19556 /* ??? This is wrong for unified assembly syntax. */
19557 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19558 break;
19560 case ARM_FT_INTERWORKED:
19561 gcc_assert (arm_arch5 || arm_arch4t);
19562 sprintf (instr, "bx%s\t%%|lr", conditional);
19563 break;
19565 case ARM_FT_EXCEPTION:
19566 /* ??? This is wrong for unified assembly syntax. */
19567 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19568 break;
19570 default:
19571 if (IS_CMSE_ENTRY (func_type))
19573 /* Check if we have to clear the 'GE bits' which is only used if
19574 parallel add and subtraction instructions are available. */
19575 if (TARGET_INT_SIMD)
19576 snprintf (instr, sizeof (instr),
19577 "msr%s\tAPSR_nzcvqg, %%|lr", conditional);
19578 else
19579 snprintf (instr, sizeof (instr),
19580 "msr%s\tAPSR_nzcvq, %%|lr", conditional);
19582 output_asm_insn (instr, & operand);
19583 if (TARGET_HARD_FLOAT && !TARGET_THUMB1)
19585 /* Clear the cumulative exception-status bits (0-4,7) and the
19586 condition code bits (28-31) of the FPSCR. We need to
19587 remember to clear the first scratch register used (IP) and
19588 save and restore the second (r4). */
19589 snprintf (instr, sizeof (instr), "push\t{%%|r4}");
19590 output_asm_insn (instr, & operand);
19591 snprintf (instr, sizeof (instr), "vmrs\t%%|ip, fpscr");
19592 output_asm_insn (instr, & operand);
19593 snprintf (instr, sizeof (instr), "movw\t%%|r4, #65376");
19594 output_asm_insn (instr, & operand);
19595 snprintf (instr, sizeof (instr), "movt\t%%|r4, #4095");
19596 output_asm_insn (instr, & operand);
19597 snprintf (instr, sizeof (instr), "and\t%%|ip, %%|r4");
19598 output_asm_insn (instr, & operand);
19599 snprintf (instr, sizeof (instr), "vmsr\tfpscr, %%|ip");
19600 output_asm_insn (instr, & operand);
19601 snprintf (instr, sizeof (instr), "pop\t{%%|r4}");
19602 output_asm_insn (instr, & operand);
19603 snprintf (instr, sizeof (instr), "mov\t%%|ip, %%|lr");
19604 output_asm_insn (instr, & operand);
19606 snprintf (instr, sizeof (instr), "bxns\t%%|lr");
19608 /* Use bx if it's available. */
19609 else if (arm_arch5 || arm_arch4t)
19610 sprintf (instr, "bx%s\t%%|lr", conditional);
19611 else
19612 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19613 break;
19616 output_asm_insn (instr, & operand);
19619 return "";
19622 /* Output in FILE asm statements needed to declare the NAME of the function
19623 defined by its DECL node. */
19625 void
19626 arm_asm_declare_function_name (FILE *file, const char *name, tree decl)
19628 size_t cmse_name_len;
19629 char *cmse_name = 0;
19630 char cmse_prefix[] = "__acle_se_";
19632 /* When compiling with ARMv8-M Security Extensions enabled, we should print an
19633 extra function label for each function with the 'cmse_nonsecure_entry'
19634 attribute. This extra function label should be prepended with
19635 '__acle_se_', telling the linker that it needs to create secure gateway
19636 veneers for this function. */
19637 if (use_cmse && lookup_attribute ("cmse_nonsecure_entry",
19638 DECL_ATTRIBUTES (decl)))
19640 cmse_name_len = sizeof (cmse_prefix) + strlen (name);
19641 cmse_name = XALLOCAVEC (char, cmse_name_len);
19642 snprintf (cmse_name, cmse_name_len, "%s%s", cmse_prefix, name);
19643 targetm.asm_out.globalize_label (file, cmse_name);
19645 ARM_DECLARE_FUNCTION_NAME (file, cmse_name, decl);
19646 ASM_OUTPUT_TYPE_DIRECTIVE (file, cmse_name, "function");
19649 ARM_DECLARE_FUNCTION_NAME (file, name, decl);
19650 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
19651 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
19652 ASM_OUTPUT_LABEL (file, name);
19654 if (cmse_name)
19655 ASM_OUTPUT_LABEL (file, cmse_name);
19657 ARM_OUTPUT_FN_UNWIND (file, TRUE);
19660 /* Write the function name into the code section, directly preceding
19661 the function prologue.
19663 Code will be output similar to this:
19665 .ascii "arm_poke_function_name", 0
19666 .align
19668 .word 0xff000000 + (t1 - t0)
19669 arm_poke_function_name
19670 mov ip, sp
19671 stmfd sp!, {fp, ip, lr, pc}
19672 sub fp, ip, #4
19674 When performing a stack backtrace, code can inspect the value
19675 of 'pc' stored at 'fp' + 0. If the trace function then looks
19676 at location pc - 12 and the top 8 bits are set, then we know
19677 that there is a function name embedded immediately preceding this
19678 location and has length ((pc[-3]) & 0xff000000).
19680 We assume that pc is declared as a pointer to an unsigned long.
19682 It is of no benefit to output the function name if we are assembling
19683 a leaf function. These function types will not contain a stack
19684 backtrace structure, therefore it is not possible to determine the
19685 function name. */
19686 void
19687 arm_poke_function_name (FILE *stream, const char *name)
19689 unsigned long alignlength;
19690 unsigned long length;
19691 rtx x;
19693 length = strlen (name) + 1;
19694 alignlength = ROUND_UP_WORD (length);
19696 ASM_OUTPUT_ASCII (stream, name, length);
19697 ASM_OUTPUT_ALIGN (stream, 2);
19698 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19699 assemble_aligned_integer (UNITS_PER_WORD, x);
19702 /* Place some comments into the assembler stream
19703 describing the current function. */
19704 static void
19705 arm_output_function_prologue (FILE *f)
19707 unsigned long func_type;
19709 /* Sanity check. */
19710 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19712 func_type = arm_current_func_type ();
19714 switch ((int) ARM_FUNC_TYPE (func_type))
19716 default:
19717 case ARM_FT_NORMAL:
19718 break;
19719 case ARM_FT_INTERWORKED:
19720 asm_fprintf (f, "\t%@ Function supports interworking.\n");
19721 break;
19722 case ARM_FT_ISR:
19723 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19724 break;
19725 case ARM_FT_FIQ:
19726 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19727 break;
19728 case ARM_FT_EXCEPTION:
19729 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19730 break;
19733 if (IS_NAKED (func_type))
19734 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19736 if (IS_VOLATILE (func_type))
19737 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19739 if (IS_NESTED (func_type))
19740 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19741 if (IS_STACKALIGN (func_type))
19742 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19743 if (IS_CMSE_ENTRY (func_type))
19744 asm_fprintf (f, "\t%@ Non-secure entry function: called from non-secure code.\n");
19746 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19747 crtl->args.size,
19748 crtl->args.pretend_args_size,
19749 (HOST_WIDE_INT) get_frame_size ());
19751 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19752 frame_pointer_needed,
19753 cfun->machine->uses_anonymous_args);
19755 if (cfun->machine->lr_save_eliminated)
19756 asm_fprintf (f, "\t%@ link register save eliminated.\n");
19758 if (crtl->calls_eh_return)
19759 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19763 static void
19764 arm_output_function_epilogue (FILE *)
19766 arm_stack_offsets *offsets;
19768 if (TARGET_THUMB1)
19770 int regno;
19772 /* Emit any call-via-reg trampolines that are needed for v4t support
19773 of call_reg and call_value_reg type insns. */
19774 for (regno = 0; regno < LR_REGNUM; regno++)
19776 rtx label = cfun->machine->call_via[regno];
19778 if (label != NULL)
19780 switch_to_section (function_section (current_function_decl));
19781 targetm.asm_out.internal_label (asm_out_file, "L",
19782 CODE_LABEL_NUMBER (label));
19783 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19787 /* ??? Probably not safe to set this here, since it assumes that a
19788 function will be emitted as assembly immediately after we generate
19789 RTL for it. This does not happen for inline functions. */
19790 cfun->machine->return_used_this_function = 0;
19792 else /* TARGET_32BIT */
19794 /* We need to take into account any stack-frame rounding. */
19795 offsets = arm_get_frame_offsets ();
19797 gcc_assert (!use_return_insn (FALSE, NULL)
19798 || (cfun->machine->return_used_this_function != 0)
19799 || offsets->saved_regs == offsets->outgoing_args
19800 || frame_pointer_needed);
19804 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19805 STR and STRD. If an even number of registers are being pushed, one
19806 or more STRD patterns are created for each register pair. If an
19807 odd number of registers are pushed, emit an initial STR followed by
19808 as many STRD instructions as are needed. This works best when the
19809 stack is initially 64-bit aligned (the normal case), since it
19810 ensures that each STRD is also 64-bit aligned. */
19811 static void
19812 thumb2_emit_strd_push (unsigned long saved_regs_mask)
19814 int num_regs = 0;
19815 int i;
19816 int regno;
19817 rtx par = NULL_RTX;
19818 rtx dwarf = NULL_RTX;
19819 rtx tmp;
19820 bool first = true;
19822 num_regs = bit_count (saved_regs_mask);
19824 /* Must be at least one register to save, and can't save SP or PC. */
19825 gcc_assert (num_regs > 0 && num_regs <= 14);
19826 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19827 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19829 /* Create sequence for DWARF info. All the frame-related data for
19830 debugging is held in this wrapper. */
19831 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19833 /* Describe the stack adjustment. */
19834 tmp = gen_rtx_SET (stack_pointer_rtx,
19835 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19836 RTX_FRAME_RELATED_P (tmp) = 1;
19837 XVECEXP (dwarf, 0, 0) = tmp;
19839 /* Find the first register. */
19840 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19843 i = 0;
19845 /* If there's an odd number of registers to push. Start off by
19846 pushing a single register. This ensures that subsequent strd
19847 operations are dword aligned (assuming that SP was originally
19848 64-bit aligned). */
19849 if ((num_regs & 1) != 0)
19851 rtx reg, mem, insn;
19853 reg = gen_rtx_REG (SImode, regno);
19854 if (num_regs == 1)
19855 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19856 stack_pointer_rtx));
19857 else
19858 mem = gen_frame_mem (Pmode,
19859 gen_rtx_PRE_MODIFY
19860 (Pmode, stack_pointer_rtx,
19861 plus_constant (Pmode, stack_pointer_rtx,
19862 -4 * num_regs)));
19864 tmp = gen_rtx_SET (mem, reg);
19865 RTX_FRAME_RELATED_P (tmp) = 1;
19866 insn = emit_insn (tmp);
19867 RTX_FRAME_RELATED_P (insn) = 1;
19868 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19869 tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
19870 RTX_FRAME_RELATED_P (tmp) = 1;
19871 i++;
19872 regno++;
19873 XVECEXP (dwarf, 0, i) = tmp;
19874 first = false;
19877 while (i < num_regs)
19878 if (saved_regs_mask & (1 << regno))
19880 rtx reg1, reg2, mem1, mem2;
19881 rtx tmp0, tmp1, tmp2;
19882 int regno2;
19884 /* Find the register to pair with this one. */
19885 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
19886 regno2++)
19889 reg1 = gen_rtx_REG (SImode, regno);
19890 reg2 = gen_rtx_REG (SImode, regno2);
19892 if (first)
19894 rtx insn;
19896 first = false;
19897 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19898 stack_pointer_rtx,
19899 -4 * num_regs));
19900 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19901 stack_pointer_rtx,
19902 -4 * (num_regs - 1)));
19903 tmp0 = gen_rtx_SET (stack_pointer_rtx,
19904 plus_constant (Pmode, stack_pointer_rtx,
19905 -4 * (num_regs)));
19906 tmp1 = gen_rtx_SET (mem1, reg1);
19907 tmp2 = gen_rtx_SET (mem2, reg2);
19908 RTX_FRAME_RELATED_P (tmp0) = 1;
19909 RTX_FRAME_RELATED_P (tmp1) = 1;
19910 RTX_FRAME_RELATED_P (tmp2) = 1;
19911 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
19912 XVECEXP (par, 0, 0) = tmp0;
19913 XVECEXP (par, 0, 1) = tmp1;
19914 XVECEXP (par, 0, 2) = tmp2;
19915 insn = emit_insn (par);
19916 RTX_FRAME_RELATED_P (insn) = 1;
19917 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19919 else
19921 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19922 stack_pointer_rtx,
19923 4 * i));
19924 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19925 stack_pointer_rtx,
19926 4 * (i + 1)));
19927 tmp1 = gen_rtx_SET (mem1, reg1);
19928 tmp2 = gen_rtx_SET (mem2, reg2);
19929 RTX_FRAME_RELATED_P (tmp1) = 1;
19930 RTX_FRAME_RELATED_P (tmp2) = 1;
19931 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19932 XVECEXP (par, 0, 0) = tmp1;
19933 XVECEXP (par, 0, 1) = tmp2;
19934 emit_insn (par);
19937 /* Create unwind information. This is an approximation. */
19938 tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
19939 plus_constant (Pmode,
19940 stack_pointer_rtx,
19941 4 * i)),
19942 reg1);
19943 tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
19944 plus_constant (Pmode,
19945 stack_pointer_rtx,
19946 4 * (i + 1))),
19947 reg2);
19949 RTX_FRAME_RELATED_P (tmp1) = 1;
19950 RTX_FRAME_RELATED_P (tmp2) = 1;
19951 XVECEXP (dwarf, 0, i + 1) = tmp1;
19952 XVECEXP (dwarf, 0, i + 2) = tmp2;
19953 i += 2;
19954 regno = regno2 + 1;
19956 else
19957 regno++;
19959 return;
19962 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19963 whenever possible, otherwise it emits single-word stores. The first store
19964 also allocates stack space for all saved registers, using writeback with
19965 post-addressing mode. All other stores use offset addressing. If no STRD
19966 can be emitted, this function emits a sequence of single-word stores,
19967 and not an STM as before, because single-word stores provide more freedom
19968 scheduling and can be turned into an STM by peephole optimizations. */
19969 static void
19970 arm_emit_strd_push (unsigned long saved_regs_mask)
19972 int num_regs = 0;
19973 int i, j, dwarf_index = 0;
19974 int offset = 0;
19975 rtx dwarf = NULL_RTX;
19976 rtx insn = NULL_RTX;
19977 rtx tmp, mem;
19979 /* TODO: A more efficient code can be emitted by changing the
19980 layout, e.g., first push all pairs that can use STRD to keep the
19981 stack aligned, and then push all other registers. */
19982 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19983 if (saved_regs_mask & (1 << i))
19984 num_regs++;
19986 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19987 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19988 gcc_assert (num_regs > 0);
19990 /* Create sequence for DWARF info. */
19991 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19993 /* For dwarf info, we generate explicit stack update. */
19994 tmp = gen_rtx_SET (stack_pointer_rtx,
19995 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19996 RTX_FRAME_RELATED_P (tmp) = 1;
19997 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19999 /* Save registers. */
20000 offset = - 4 * num_regs;
20001 j = 0;
20002 while (j <= LAST_ARM_REGNUM)
20003 if (saved_regs_mask & (1 << j))
20005 if ((j % 2 == 0)
20006 && (saved_regs_mask & (1 << (j + 1))))
20008 /* Current register and previous register form register pair for
20009 which STRD can be generated. */
20010 if (offset < 0)
20012 /* Allocate stack space for all saved registers. */
20013 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20014 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20015 mem = gen_frame_mem (DImode, tmp);
20016 offset = 0;
20018 else if (offset > 0)
20019 mem = gen_frame_mem (DImode,
20020 plus_constant (Pmode,
20021 stack_pointer_rtx,
20022 offset));
20023 else
20024 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20026 tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
20027 RTX_FRAME_RELATED_P (tmp) = 1;
20028 tmp = emit_insn (tmp);
20030 /* Record the first store insn. */
20031 if (dwarf_index == 1)
20032 insn = tmp;
20034 /* Generate dwarf info. */
20035 mem = gen_frame_mem (SImode,
20036 plus_constant (Pmode,
20037 stack_pointer_rtx,
20038 offset));
20039 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20040 RTX_FRAME_RELATED_P (tmp) = 1;
20041 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20043 mem = gen_frame_mem (SImode,
20044 plus_constant (Pmode,
20045 stack_pointer_rtx,
20046 offset + 4));
20047 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
20048 RTX_FRAME_RELATED_P (tmp) = 1;
20049 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20051 offset += 8;
20052 j += 2;
20054 else
20056 /* Emit a single word store. */
20057 if (offset < 0)
20059 /* Allocate stack space for all saved registers. */
20060 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20061 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20062 mem = gen_frame_mem (SImode, tmp);
20063 offset = 0;
20065 else if (offset > 0)
20066 mem = gen_frame_mem (SImode,
20067 plus_constant (Pmode,
20068 stack_pointer_rtx,
20069 offset));
20070 else
20071 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20073 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20074 RTX_FRAME_RELATED_P (tmp) = 1;
20075 tmp = emit_insn (tmp);
20077 /* Record the first store insn. */
20078 if (dwarf_index == 1)
20079 insn = tmp;
20081 /* Generate dwarf info. */
20082 mem = gen_frame_mem (SImode,
20083 plus_constant(Pmode,
20084 stack_pointer_rtx,
20085 offset));
20086 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20087 RTX_FRAME_RELATED_P (tmp) = 1;
20088 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20090 offset += 4;
20091 j += 1;
20094 else
20095 j++;
20097 /* Attach dwarf info to the first insn we generate. */
20098 gcc_assert (insn != NULL_RTX);
20099 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20100 RTX_FRAME_RELATED_P (insn) = 1;
20103 /* Generate and emit an insn that we will recognize as a push_multi.
20104 Unfortunately, since this insn does not reflect very well the actual
20105 semantics of the operation, we need to annotate the insn for the benefit
20106 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
20107 MASK for registers that should be annotated for DWARF2 frame unwind
20108 information. */
20109 static rtx
20110 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
20112 int num_regs = 0;
20113 int num_dwarf_regs = 0;
20114 int i, j;
20115 rtx par;
20116 rtx dwarf;
20117 int dwarf_par_index;
20118 rtx tmp, reg;
20120 /* We don't record the PC in the dwarf frame information. */
20121 dwarf_regs_mask &= ~(1 << PC_REGNUM);
20123 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20125 if (mask & (1 << i))
20126 num_regs++;
20127 if (dwarf_regs_mask & (1 << i))
20128 num_dwarf_regs++;
20131 gcc_assert (num_regs && num_regs <= 16);
20132 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
20134 /* For the body of the insn we are going to generate an UNSPEC in
20135 parallel with several USEs. This allows the insn to be recognized
20136 by the push_multi pattern in the arm.md file.
20138 The body of the insn looks something like this:
20140 (parallel [
20141 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20142 (const_int:SI <num>)))
20143 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20144 (use (reg:SI XX))
20145 (use (reg:SI YY))
20149 For the frame note however, we try to be more explicit and actually
20150 show each register being stored into the stack frame, plus a (single)
20151 decrement of the stack pointer. We do it this way in order to be
20152 friendly to the stack unwinding code, which only wants to see a single
20153 stack decrement per instruction. The RTL we generate for the note looks
20154 something like this:
20156 (sequence [
20157 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20158 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20159 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20160 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20164 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20165 instead we'd have a parallel expression detailing all
20166 the stores to the various memory addresses so that debug
20167 information is more up-to-date. Remember however while writing
20168 this to take care of the constraints with the push instruction.
20170 Note also that this has to be taken care of for the VFP registers.
20172 For more see PR43399. */
20174 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20175 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20176 dwarf_par_index = 1;
20178 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20180 if (mask & (1 << i))
20182 reg = gen_rtx_REG (SImode, i);
20184 XVECEXP (par, 0, 0)
20185 = gen_rtx_SET (gen_frame_mem
20186 (BLKmode,
20187 gen_rtx_PRE_MODIFY (Pmode,
20188 stack_pointer_rtx,
20189 plus_constant
20190 (Pmode, stack_pointer_rtx,
20191 -4 * num_regs))
20193 gen_rtx_UNSPEC (BLKmode,
20194 gen_rtvec (1, reg),
20195 UNSPEC_PUSH_MULT));
20197 if (dwarf_regs_mask & (1 << i))
20199 tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
20200 reg);
20201 RTX_FRAME_RELATED_P (tmp) = 1;
20202 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20205 break;
20209 for (j = 1, i++; j < num_regs; i++)
20211 if (mask & (1 << i))
20213 reg = gen_rtx_REG (SImode, i);
20215 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20217 if (dwarf_regs_mask & (1 << i))
20220 = gen_rtx_SET (gen_frame_mem
20221 (SImode,
20222 plus_constant (Pmode, stack_pointer_rtx,
20223 4 * j)),
20224 reg);
20225 RTX_FRAME_RELATED_P (tmp) = 1;
20226 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20229 j++;
20233 par = emit_insn (par);
20235 tmp = gen_rtx_SET (stack_pointer_rtx,
20236 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20237 RTX_FRAME_RELATED_P (tmp) = 1;
20238 XVECEXP (dwarf, 0, 0) = tmp;
20240 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20242 return par;
20245 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20246 SIZE is the offset to be adjusted.
20247 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20248 static void
20249 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20251 rtx dwarf;
20253 RTX_FRAME_RELATED_P (insn) = 1;
20254 dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
20255 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20258 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20259 SAVED_REGS_MASK shows which registers need to be restored.
20261 Unfortunately, since this insn does not reflect very well the actual
20262 semantics of the operation, we need to annotate the insn for the benefit
20263 of DWARF2 frame unwind information. */
20264 static void
20265 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20267 int num_regs = 0;
20268 int i, j;
20269 rtx par;
20270 rtx dwarf = NULL_RTX;
20271 rtx tmp, reg;
20272 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20273 int offset_adj;
20274 int emit_update;
20276 offset_adj = return_in_pc ? 1 : 0;
20277 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20278 if (saved_regs_mask & (1 << i))
20279 num_regs++;
20281 gcc_assert (num_regs && num_regs <= 16);
20283 /* If SP is in reglist, then we don't emit SP update insn. */
20284 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20286 /* The parallel needs to hold num_regs SETs
20287 and one SET for the stack update. */
20288 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20290 if (return_in_pc)
20291 XVECEXP (par, 0, 0) = ret_rtx;
20293 if (emit_update)
20295 /* Increment the stack pointer, based on there being
20296 num_regs 4-byte registers to restore. */
20297 tmp = gen_rtx_SET (stack_pointer_rtx,
20298 plus_constant (Pmode,
20299 stack_pointer_rtx,
20300 4 * num_regs));
20301 RTX_FRAME_RELATED_P (tmp) = 1;
20302 XVECEXP (par, 0, offset_adj) = tmp;
20305 /* Now restore every reg, which may include PC. */
20306 for (j = 0, i = 0; j < num_regs; i++)
20307 if (saved_regs_mask & (1 << i))
20309 reg = gen_rtx_REG (SImode, i);
20310 if ((num_regs == 1) && emit_update && !return_in_pc)
20312 /* Emit single load with writeback. */
20313 tmp = gen_frame_mem (SImode,
20314 gen_rtx_POST_INC (Pmode,
20315 stack_pointer_rtx));
20316 tmp = emit_insn (gen_rtx_SET (reg, tmp));
20317 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20318 return;
20321 tmp = gen_rtx_SET (reg,
20322 gen_frame_mem
20323 (SImode,
20324 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20325 RTX_FRAME_RELATED_P (tmp) = 1;
20326 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20328 /* We need to maintain a sequence for DWARF info too. As dwarf info
20329 should not have PC, skip PC. */
20330 if (i != PC_REGNUM)
20331 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20333 j++;
20336 if (return_in_pc)
20337 par = emit_jump_insn (par);
20338 else
20339 par = emit_insn (par);
20341 REG_NOTES (par) = dwarf;
20342 if (!return_in_pc)
20343 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20344 stack_pointer_rtx, stack_pointer_rtx);
20347 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20348 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20350 Unfortunately, since this insn does not reflect very well the actual
20351 semantics of the operation, we need to annotate the insn for the benefit
20352 of DWARF2 frame unwind information. */
20353 static void
20354 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20356 int i, j;
20357 rtx par;
20358 rtx dwarf = NULL_RTX;
20359 rtx tmp, reg;
20361 gcc_assert (num_regs && num_regs <= 32);
20363 /* Workaround ARM10 VFPr1 bug. */
20364 if (num_regs == 2 && !arm_arch6)
20366 if (first_reg == 15)
20367 first_reg--;
20369 num_regs++;
20372 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20373 there could be up to 32 D-registers to restore.
20374 If there are more than 16 D-registers, make two recursive calls,
20375 each of which emits one pop_multi instruction. */
20376 if (num_regs > 16)
20378 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20379 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20380 return;
20383 /* The parallel needs to hold num_regs SETs
20384 and one SET for the stack update. */
20385 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20387 /* Increment the stack pointer, based on there being
20388 num_regs 8-byte registers to restore. */
20389 tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
20390 RTX_FRAME_RELATED_P (tmp) = 1;
20391 XVECEXP (par, 0, 0) = tmp;
20393 /* Now show every reg that will be restored, using a SET for each. */
20394 for (j = 0, i=first_reg; j < num_regs; i += 2)
20396 reg = gen_rtx_REG (DFmode, i);
20398 tmp = gen_rtx_SET (reg,
20399 gen_frame_mem
20400 (DFmode,
20401 plus_constant (Pmode, base_reg, 8 * j)));
20402 RTX_FRAME_RELATED_P (tmp) = 1;
20403 XVECEXP (par, 0, j + 1) = tmp;
20405 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20407 j++;
20410 par = emit_insn (par);
20411 REG_NOTES (par) = dwarf;
20413 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20414 if (REGNO (base_reg) == IP_REGNUM)
20416 RTX_FRAME_RELATED_P (par) = 1;
20417 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20419 else
20420 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20421 base_reg, base_reg);
20424 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20425 number of registers are being popped, multiple LDRD patterns are created for
20426 all register pairs. If odd number of registers are popped, last register is
20427 loaded by using LDR pattern. */
20428 static void
20429 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20431 int num_regs = 0;
20432 int i, j;
20433 rtx par = NULL_RTX;
20434 rtx dwarf = NULL_RTX;
20435 rtx tmp, reg, tmp1;
20436 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20438 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20439 if (saved_regs_mask & (1 << i))
20440 num_regs++;
20442 gcc_assert (num_regs && num_regs <= 16);
20444 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20445 to be popped. So, if num_regs is even, now it will become odd,
20446 and we can generate pop with PC. If num_regs is odd, it will be
20447 even now, and ldr with return can be generated for PC. */
20448 if (return_in_pc)
20449 num_regs--;
20451 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20453 /* Var j iterates over all the registers to gather all the registers in
20454 saved_regs_mask. Var i gives index of saved registers in stack frame.
20455 A PARALLEL RTX of register-pair is created here, so that pattern for
20456 LDRD can be matched. As PC is always last register to be popped, and
20457 we have already decremented num_regs if PC, we don't have to worry
20458 about PC in this loop. */
20459 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20460 if (saved_regs_mask & (1 << j))
20462 /* Create RTX for memory load. */
20463 reg = gen_rtx_REG (SImode, j);
20464 tmp = gen_rtx_SET (reg,
20465 gen_frame_mem (SImode,
20466 plus_constant (Pmode,
20467 stack_pointer_rtx, 4 * i)));
20468 RTX_FRAME_RELATED_P (tmp) = 1;
20470 if (i % 2 == 0)
20472 /* When saved-register index (i) is even, the RTX to be emitted is
20473 yet to be created. Hence create it first. The LDRD pattern we
20474 are generating is :
20475 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20476 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20477 where target registers need not be consecutive. */
20478 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20479 dwarf = NULL_RTX;
20482 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20483 added as 0th element and if i is odd, reg_i is added as 1st element
20484 of LDRD pattern shown above. */
20485 XVECEXP (par, 0, (i % 2)) = tmp;
20486 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20488 if ((i % 2) == 1)
20490 /* When saved-register index (i) is odd, RTXs for both the registers
20491 to be loaded are generated in above given LDRD pattern, and the
20492 pattern can be emitted now. */
20493 par = emit_insn (par);
20494 REG_NOTES (par) = dwarf;
20495 RTX_FRAME_RELATED_P (par) = 1;
20498 i++;
20501 /* If the number of registers pushed is odd AND return_in_pc is false OR
20502 number of registers are even AND return_in_pc is true, last register is
20503 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20504 then LDR with post increment. */
20506 /* Increment the stack pointer, based on there being
20507 num_regs 4-byte registers to restore. */
20508 tmp = gen_rtx_SET (stack_pointer_rtx,
20509 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20510 RTX_FRAME_RELATED_P (tmp) = 1;
20511 tmp = emit_insn (tmp);
20512 if (!return_in_pc)
20514 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20515 stack_pointer_rtx, stack_pointer_rtx);
20518 dwarf = NULL_RTX;
20520 if (((num_regs % 2) == 1 && !return_in_pc)
20521 || ((num_regs % 2) == 0 && return_in_pc))
20523 /* Scan for the single register to be popped. Skip until the saved
20524 register is found. */
20525 for (; (saved_regs_mask & (1 << j)) == 0; j++);
20527 /* Gen LDR with post increment here. */
20528 tmp1 = gen_rtx_MEM (SImode,
20529 gen_rtx_POST_INC (SImode,
20530 stack_pointer_rtx));
20531 set_mem_alias_set (tmp1, get_frame_alias_set ());
20533 reg = gen_rtx_REG (SImode, j);
20534 tmp = gen_rtx_SET (reg, tmp1);
20535 RTX_FRAME_RELATED_P (tmp) = 1;
20536 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20538 if (return_in_pc)
20540 /* If return_in_pc, j must be PC_REGNUM. */
20541 gcc_assert (j == PC_REGNUM);
20542 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20543 XVECEXP (par, 0, 0) = ret_rtx;
20544 XVECEXP (par, 0, 1) = tmp;
20545 par = emit_jump_insn (par);
20547 else
20549 par = emit_insn (tmp);
20550 REG_NOTES (par) = dwarf;
20551 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20552 stack_pointer_rtx, stack_pointer_rtx);
20556 else if ((num_regs % 2) == 1 && return_in_pc)
20558 /* There are 2 registers to be popped. So, generate the pattern
20559 pop_multiple_with_stack_update_and_return to pop in PC. */
20560 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20563 return;
20566 /* LDRD in ARM mode needs consecutive registers as operands. This function
20567 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20568 offset addressing and then generates one separate stack udpate. This provides
20569 more scheduling freedom, compared to writeback on every load. However,
20570 if the function returns using load into PC directly
20571 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20572 before the last load. TODO: Add a peephole optimization to recognize
20573 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20574 peephole optimization to merge the load at stack-offset zero
20575 with the stack update instruction using load with writeback
20576 in post-index addressing mode. */
20577 static void
20578 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20580 int j = 0;
20581 int offset = 0;
20582 rtx par = NULL_RTX;
20583 rtx dwarf = NULL_RTX;
20584 rtx tmp, mem;
20586 /* Restore saved registers. */
20587 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20588 j = 0;
20589 while (j <= LAST_ARM_REGNUM)
20590 if (saved_regs_mask & (1 << j))
20592 if ((j % 2) == 0
20593 && (saved_regs_mask & (1 << (j + 1)))
20594 && (j + 1) != PC_REGNUM)
20596 /* Current register and next register form register pair for which
20597 LDRD can be generated. PC is always the last register popped, and
20598 we handle it separately. */
20599 if (offset > 0)
20600 mem = gen_frame_mem (DImode,
20601 plus_constant (Pmode,
20602 stack_pointer_rtx,
20603 offset));
20604 else
20605 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20607 tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
20608 tmp = emit_insn (tmp);
20609 RTX_FRAME_RELATED_P (tmp) = 1;
20611 /* Generate dwarf info. */
20613 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20614 gen_rtx_REG (SImode, j),
20615 NULL_RTX);
20616 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20617 gen_rtx_REG (SImode, j + 1),
20618 dwarf);
20620 REG_NOTES (tmp) = dwarf;
20622 offset += 8;
20623 j += 2;
20625 else if (j != PC_REGNUM)
20627 /* Emit a single word load. */
20628 if (offset > 0)
20629 mem = gen_frame_mem (SImode,
20630 plus_constant (Pmode,
20631 stack_pointer_rtx,
20632 offset));
20633 else
20634 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20636 tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
20637 tmp = emit_insn (tmp);
20638 RTX_FRAME_RELATED_P (tmp) = 1;
20640 /* Generate dwarf info. */
20641 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20642 gen_rtx_REG (SImode, j),
20643 NULL_RTX);
20645 offset += 4;
20646 j += 1;
20648 else /* j == PC_REGNUM */
20649 j++;
20651 else
20652 j++;
20654 /* Update the stack. */
20655 if (offset > 0)
20657 tmp = gen_rtx_SET (stack_pointer_rtx,
20658 plus_constant (Pmode,
20659 stack_pointer_rtx,
20660 offset));
20661 tmp = emit_insn (tmp);
20662 arm_add_cfa_adjust_cfa_note (tmp, offset,
20663 stack_pointer_rtx, stack_pointer_rtx);
20664 offset = 0;
20667 if (saved_regs_mask & (1 << PC_REGNUM))
20669 /* Only PC is to be popped. */
20670 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20671 XVECEXP (par, 0, 0) = ret_rtx;
20672 tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
20673 gen_frame_mem (SImode,
20674 gen_rtx_POST_INC (SImode,
20675 stack_pointer_rtx)));
20676 RTX_FRAME_RELATED_P (tmp) = 1;
20677 XVECEXP (par, 0, 1) = tmp;
20678 par = emit_jump_insn (par);
20680 /* Generate dwarf info. */
20681 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20682 gen_rtx_REG (SImode, PC_REGNUM),
20683 NULL_RTX);
20684 REG_NOTES (par) = dwarf;
20685 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20686 stack_pointer_rtx, stack_pointer_rtx);
20690 /* Calculate the size of the return value that is passed in registers. */
20691 static unsigned
20692 arm_size_return_regs (void)
20694 machine_mode mode;
20696 if (crtl->return_rtx != 0)
20697 mode = GET_MODE (crtl->return_rtx);
20698 else
20699 mode = DECL_MODE (DECL_RESULT (current_function_decl));
20701 return GET_MODE_SIZE (mode);
20704 /* Return true if the current function needs to save/restore LR. */
20705 static bool
20706 thumb_force_lr_save (void)
20708 return !cfun->machine->lr_save_eliminated
20709 && (!crtl->is_leaf
20710 || thumb_far_jump_used_p ()
20711 || df_regs_ever_live_p (LR_REGNUM));
20714 /* We do not know if r3 will be available because
20715 we do have an indirect tailcall happening in this
20716 particular case. */
20717 static bool
20718 is_indirect_tailcall_p (rtx call)
20720 rtx pat = PATTERN (call);
20722 /* Indirect tail call. */
20723 pat = XVECEXP (pat, 0, 0);
20724 if (GET_CODE (pat) == SET)
20725 pat = SET_SRC (pat);
20727 pat = XEXP (XEXP (pat, 0), 0);
20728 return REG_P (pat);
20731 /* Return true if r3 is used by any of the tail call insns in the
20732 current function. */
20733 static bool
20734 any_sibcall_could_use_r3 (void)
20736 edge_iterator ei;
20737 edge e;
20739 if (!crtl->tail_call_emit)
20740 return false;
20741 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20742 if (e->flags & EDGE_SIBCALL)
20744 rtx_insn *call = BB_END (e->src);
20745 if (!CALL_P (call))
20746 call = prev_nonnote_nondebug_insn (call);
20747 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20748 if (find_regno_fusage (call, USE, 3)
20749 || is_indirect_tailcall_p (call))
20750 return true;
20752 return false;
20756 /* Compute the distance from register FROM to register TO.
20757 These can be the arg pointer (26), the soft frame pointer (25),
20758 the stack pointer (13) or the hard frame pointer (11).
20759 In thumb mode r7 is used as the soft frame pointer, if needed.
20760 Typical stack layout looks like this:
20762 old stack pointer -> | |
20763 ----
20764 | | \
20765 | | saved arguments for
20766 | | vararg functions
20767 | | /
20769 hard FP & arg pointer -> | | \
20770 | | stack
20771 | | frame
20772 | | /
20774 | | \
20775 | | call saved
20776 | | registers
20777 soft frame pointer -> | | /
20779 | | \
20780 | | local
20781 | | variables
20782 locals base pointer -> | | /
20784 | | \
20785 | | outgoing
20786 | | arguments
20787 current stack pointer -> | | /
20790 For a given function some or all of these stack components
20791 may not be needed, giving rise to the possibility of
20792 eliminating some of the registers.
20794 The values returned by this function must reflect the behavior
20795 of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
20797 The sign of the number returned reflects the direction of stack
20798 growth, so the values are positive for all eliminations except
20799 from the soft frame pointer to the hard frame pointer.
20801 SFP may point just inside the local variables block to ensure correct
20802 alignment. */
20805 /* Return cached stack offsets. */
20807 static arm_stack_offsets *
20808 arm_get_frame_offsets (void)
20810 struct arm_stack_offsets *offsets;
20812 offsets = &cfun->machine->stack_offsets;
20814 return offsets;
20818 /* Calculate stack offsets. These are used to calculate register elimination
20819 offsets and in prologue/epilogue code. Also calculates which registers
20820 should be saved. */
20822 static void
20823 arm_compute_frame_layout (void)
20825 struct arm_stack_offsets *offsets;
20826 unsigned long func_type;
20827 int saved;
20828 int core_saved;
20829 HOST_WIDE_INT frame_size;
20830 int i;
20832 offsets = &cfun->machine->stack_offsets;
20834 /* Initially this is the size of the local variables. It will translated
20835 into an offset once we have determined the size of preceding data. */
20836 frame_size = ROUND_UP_WORD (get_frame_size ());
20838 /* Space for variadic functions. */
20839 offsets->saved_args = crtl->args.pretend_args_size;
20841 /* In Thumb mode this is incorrect, but never used. */
20842 offsets->frame
20843 = (offsets->saved_args
20844 + arm_compute_static_chain_stack_bytes ()
20845 + (frame_pointer_needed ? 4 : 0));
20847 if (TARGET_32BIT)
20849 unsigned int regno;
20851 offsets->saved_regs_mask = arm_compute_save_core_reg_mask ();
20852 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20853 saved = core_saved;
20855 /* We know that SP will be doubleword aligned on entry, and we must
20856 preserve that condition at any subroutine call. We also require the
20857 soft frame pointer to be doubleword aligned. */
20859 if (TARGET_REALLY_IWMMXT)
20861 /* Check for the call-saved iWMMXt registers. */
20862 for (regno = FIRST_IWMMXT_REGNUM;
20863 regno <= LAST_IWMMXT_REGNUM;
20864 regno++)
20865 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20866 saved += 8;
20869 func_type = arm_current_func_type ();
20870 /* Space for saved VFP registers. */
20871 if (! IS_VOLATILE (func_type)
20872 && TARGET_HARD_FLOAT)
20873 saved += arm_get_vfp_saved_size ();
20875 else /* TARGET_THUMB1 */
20877 offsets->saved_regs_mask = thumb1_compute_save_core_reg_mask ();
20878 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20879 saved = core_saved;
20880 if (TARGET_BACKTRACE)
20881 saved += 16;
20884 /* Saved registers include the stack frame. */
20885 offsets->saved_regs
20886 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
20887 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
20889 /* A leaf function does not need any stack alignment if it has nothing
20890 on the stack. */
20891 if (crtl->is_leaf && frame_size == 0
20892 /* However if it calls alloca(), we have a dynamically allocated
20893 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20894 && ! cfun->calls_alloca)
20896 offsets->outgoing_args = offsets->soft_frame;
20897 offsets->locals_base = offsets->soft_frame;
20898 return;
20901 /* Ensure SFP has the correct alignment. */
20902 if (ARM_DOUBLEWORD_ALIGN
20903 && (offsets->soft_frame & 7))
20905 offsets->soft_frame += 4;
20906 /* Try to align stack by pushing an extra reg. Don't bother doing this
20907 when there is a stack frame as the alignment will be rolled into
20908 the normal stack adjustment. */
20909 if (frame_size + crtl->outgoing_args_size == 0)
20911 int reg = -1;
20913 /* Register r3 is caller-saved. Normally it does not need to be
20914 saved on entry by the prologue. However if we choose to save
20915 it for padding then we may confuse the compiler into thinking
20916 a prologue sequence is required when in fact it is not. This
20917 will occur when shrink-wrapping if r3 is used as a scratch
20918 register and there are no other callee-saved writes.
20920 This situation can be avoided when other callee-saved registers
20921 are available and r3 is not mandatory if we choose a callee-saved
20922 register for padding. */
20923 bool prefer_callee_reg_p = false;
20925 /* If it is safe to use r3, then do so. This sometimes
20926 generates better code on Thumb-2 by avoiding the need to
20927 use 32-bit push/pop instructions. */
20928 if (! any_sibcall_could_use_r3 ()
20929 && arm_size_return_regs () <= 12
20930 && (offsets->saved_regs_mask & (1 << 3)) == 0
20931 && (TARGET_THUMB2
20932 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
20934 reg = 3;
20935 if (!TARGET_THUMB2)
20936 prefer_callee_reg_p = true;
20938 if (reg == -1
20939 || prefer_callee_reg_p)
20941 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
20943 /* Avoid fixed registers; they may be changed at
20944 arbitrary times so it's unsafe to restore them
20945 during the epilogue. */
20946 if (!fixed_regs[i]
20947 && (offsets->saved_regs_mask & (1 << i)) == 0)
20949 reg = i;
20950 break;
20955 if (reg != -1)
20957 offsets->saved_regs += 4;
20958 offsets->saved_regs_mask |= (1 << reg);
20963 offsets->locals_base = offsets->soft_frame + frame_size;
20964 offsets->outgoing_args = (offsets->locals_base
20965 + crtl->outgoing_args_size);
20967 if (ARM_DOUBLEWORD_ALIGN)
20969 /* Ensure SP remains doubleword aligned. */
20970 if (offsets->outgoing_args & 7)
20971 offsets->outgoing_args += 4;
20972 gcc_assert (!(offsets->outgoing_args & 7));
20977 /* Calculate the relative offsets for the different stack pointers. Positive
20978 offsets are in the direction of stack growth. */
20980 HOST_WIDE_INT
20981 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20983 arm_stack_offsets *offsets;
20985 offsets = arm_get_frame_offsets ();
20987 /* OK, now we have enough information to compute the distances.
20988 There must be an entry in these switch tables for each pair
20989 of registers in ELIMINABLE_REGS, even if some of the entries
20990 seem to be redundant or useless. */
20991 switch (from)
20993 case ARG_POINTER_REGNUM:
20994 switch (to)
20996 case THUMB_HARD_FRAME_POINTER_REGNUM:
20997 return 0;
20999 case FRAME_POINTER_REGNUM:
21000 /* This is the reverse of the soft frame pointer
21001 to hard frame pointer elimination below. */
21002 return offsets->soft_frame - offsets->saved_args;
21004 case ARM_HARD_FRAME_POINTER_REGNUM:
21005 /* This is only non-zero in the case where the static chain register
21006 is stored above the frame. */
21007 return offsets->frame - offsets->saved_args - 4;
21009 case STACK_POINTER_REGNUM:
21010 /* If nothing has been pushed on the stack at all
21011 then this will return -4. This *is* correct! */
21012 return offsets->outgoing_args - (offsets->saved_args + 4);
21014 default:
21015 gcc_unreachable ();
21017 gcc_unreachable ();
21019 case FRAME_POINTER_REGNUM:
21020 switch (to)
21022 case THUMB_HARD_FRAME_POINTER_REGNUM:
21023 return 0;
21025 case ARM_HARD_FRAME_POINTER_REGNUM:
21026 /* The hard frame pointer points to the top entry in the
21027 stack frame. The soft frame pointer to the bottom entry
21028 in the stack frame. If there is no stack frame at all,
21029 then they are identical. */
21031 return offsets->frame - offsets->soft_frame;
21033 case STACK_POINTER_REGNUM:
21034 return offsets->outgoing_args - offsets->soft_frame;
21036 default:
21037 gcc_unreachable ();
21039 gcc_unreachable ();
21041 default:
21042 /* You cannot eliminate from the stack pointer.
21043 In theory you could eliminate from the hard frame
21044 pointer to the stack pointer, but this will never
21045 happen, since if a stack frame is not needed the
21046 hard frame pointer will never be used. */
21047 gcc_unreachable ();
21051 /* Given FROM and TO register numbers, say whether this elimination is
21052 allowed. Frame pointer elimination is automatically handled.
21054 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
21055 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
21056 pointer, we must eliminate FRAME_POINTER_REGNUM into
21057 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
21058 ARG_POINTER_REGNUM. */
21060 bool
21061 arm_can_eliminate (const int from, const int to)
21063 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
21064 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
21065 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
21066 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
21067 true);
21070 /* Emit RTL to save coprocessor registers on function entry. Returns the
21071 number of bytes pushed. */
21073 static int
21074 arm_save_coproc_regs(void)
21076 int saved_size = 0;
21077 unsigned reg;
21078 unsigned start_reg;
21079 rtx insn;
21081 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
21082 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
21084 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21085 insn = gen_rtx_MEM (V2SImode, insn);
21086 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
21087 RTX_FRAME_RELATED_P (insn) = 1;
21088 saved_size += 8;
21091 if (TARGET_HARD_FLOAT)
21093 start_reg = FIRST_VFP_REGNUM;
21095 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
21097 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
21098 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
21100 if (start_reg != reg)
21101 saved_size += vfp_emit_fstmd (start_reg,
21102 (reg - start_reg) / 2);
21103 start_reg = reg + 2;
21106 if (start_reg != reg)
21107 saved_size += vfp_emit_fstmd (start_reg,
21108 (reg - start_reg) / 2);
21110 return saved_size;
21114 /* Set the Thumb frame pointer from the stack pointer. */
21116 static void
21117 thumb_set_frame_pointer (arm_stack_offsets *offsets)
21119 HOST_WIDE_INT amount;
21120 rtx insn, dwarf;
21122 amount = offsets->outgoing_args - offsets->locals_base;
21123 if (amount < 1024)
21124 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21125 stack_pointer_rtx, GEN_INT (amount)));
21126 else
21128 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21129 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21130 expects the first two operands to be the same. */
21131 if (TARGET_THUMB2)
21133 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21134 stack_pointer_rtx,
21135 hard_frame_pointer_rtx));
21137 else
21139 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21140 hard_frame_pointer_rtx,
21141 stack_pointer_rtx));
21143 dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
21144 plus_constant (Pmode, stack_pointer_rtx, amount));
21145 RTX_FRAME_RELATED_P (dwarf) = 1;
21146 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21149 RTX_FRAME_RELATED_P (insn) = 1;
21152 struct scratch_reg {
21153 rtx reg;
21154 bool saved;
21157 /* Return a short-lived scratch register for use as a 2nd scratch register on
21158 function entry after the registers are saved in the prologue. This register
21159 must be released by means of release_scratch_register_on_entry. IP is not
21160 considered since it is always used as the 1st scratch register if available.
21162 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
21163 mask of live registers. */
21165 static void
21166 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
21167 unsigned long live_regs)
21169 int regno = -1;
21171 sr->saved = false;
21173 if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
21174 regno = LR_REGNUM;
21175 else
21177 unsigned int i;
21179 for (i = 4; i < 11; i++)
21180 if (regno1 != i && (live_regs & (1 << i)) != 0)
21182 regno = i;
21183 break;
21186 if (regno < 0)
21188 /* If IP is used as the 1st scratch register for a nested function,
21189 then either r3 wasn't available or is used to preserve IP. */
21190 if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
21191 regno1 = 3;
21192 regno = (regno1 == 3 ? 2 : 3);
21193 sr->saved
21194 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
21195 regno);
21199 sr->reg = gen_rtx_REG (SImode, regno);
21200 if (sr->saved)
21202 rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21203 rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
21204 rtx x = gen_rtx_SET (stack_pointer_rtx,
21205 plus_constant (Pmode, stack_pointer_rtx, -4));
21206 RTX_FRAME_RELATED_P (insn) = 1;
21207 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21211 /* Release a scratch register obtained from the preceding function. */
21213 static void
21214 release_scratch_register_on_entry (struct scratch_reg *sr)
21216 if (sr->saved)
21218 rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
21219 rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
21220 rtx x = gen_rtx_SET (stack_pointer_rtx,
21221 plus_constant (Pmode, stack_pointer_rtx, 4));
21222 RTX_FRAME_RELATED_P (insn) = 1;
21223 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21227 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
21229 #if PROBE_INTERVAL > 4096
21230 #error Cannot use indexed addressing mode for stack probing
21231 #endif
21233 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
21234 inclusive. These are offsets from the current stack pointer. REGNO1
21235 is the index number of the 1st scratch register and LIVE_REGS is the
21236 mask of live registers. */
21238 static void
21239 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
21240 unsigned int regno1, unsigned long live_regs)
21242 rtx reg1 = gen_rtx_REG (Pmode, regno1);
21244 /* See if we have a constant small number of probes to generate. If so,
21245 that's the easy case. */
21246 if (size <= PROBE_INTERVAL)
21248 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21249 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21250 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
21253 /* The run-time loop is made up of 10 insns in the generic case while the
21254 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
21255 else if (size <= 5 * PROBE_INTERVAL)
21257 HOST_WIDE_INT i, rem;
21259 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21260 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21261 emit_stack_probe (reg1);
21263 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
21264 it exceeds SIZE. If only two probes are needed, this will not
21265 generate any code. Then probe at FIRST + SIZE. */
21266 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
21268 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21269 emit_stack_probe (reg1);
21272 rem = size - (i - PROBE_INTERVAL);
21273 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21275 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21276 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
21278 else
21279 emit_stack_probe (plus_constant (Pmode, reg1, -rem));
21282 /* Otherwise, do the same as above, but in a loop. Note that we must be
21283 extra careful with variables wrapping around because we might be at
21284 the very top (or the very bottom) of the address space and we have
21285 to be able to handle this case properly; in particular, we use an
21286 equality test for the loop condition. */
21287 else
21289 HOST_WIDE_INT rounded_size;
21290 struct scratch_reg sr;
21292 get_scratch_register_on_entry (&sr, regno1, live_regs);
21294 emit_move_insn (reg1, GEN_INT (first));
21297 /* Step 1: round SIZE to the previous multiple of the interval. */
21299 rounded_size = size & -PROBE_INTERVAL;
21300 emit_move_insn (sr.reg, GEN_INT (rounded_size));
21303 /* Step 2: compute initial and final value of the loop counter. */
21305 /* TEST_ADDR = SP + FIRST. */
21306 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21308 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
21309 emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
21312 /* Step 3: the loop
21316 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
21317 probe at TEST_ADDR
21319 while (TEST_ADDR != LAST_ADDR)
21321 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
21322 until it is equal to ROUNDED_SIZE. */
21324 emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
21327 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
21328 that SIZE is equal to ROUNDED_SIZE. */
21330 if (size != rounded_size)
21332 HOST_WIDE_INT rem = size - rounded_size;
21334 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21336 emit_set_insn (sr.reg,
21337 plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
21338 emit_stack_probe (plus_constant (Pmode, sr.reg,
21339 PROBE_INTERVAL - rem));
21341 else
21342 emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
21345 release_scratch_register_on_entry (&sr);
21348 /* Make sure nothing is scheduled before we are done. */
21349 emit_insn (gen_blockage ());
21352 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
21353 absolute addresses. */
21355 const char *
21356 output_probe_stack_range (rtx reg1, rtx reg2)
21358 static int labelno = 0;
21359 char loop_lab[32];
21360 rtx xops[2];
21362 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
21364 /* Loop. */
21365 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
21367 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
21368 xops[0] = reg1;
21369 xops[1] = GEN_INT (PROBE_INTERVAL);
21370 output_asm_insn ("sub\t%0, %0, %1", xops);
21372 /* Probe at TEST_ADDR. */
21373 output_asm_insn ("str\tr0, [%0, #0]", xops);
21375 /* Test if TEST_ADDR == LAST_ADDR. */
21376 xops[1] = reg2;
21377 output_asm_insn ("cmp\t%0, %1", xops);
21379 /* Branch. */
21380 fputs ("\tbne\t", asm_out_file);
21381 assemble_name_raw (asm_out_file, loop_lab);
21382 fputc ('\n', asm_out_file);
21384 return "";
21387 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21388 function. */
21389 void
21390 arm_expand_prologue (void)
21392 rtx amount;
21393 rtx insn;
21394 rtx ip_rtx;
21395 unsigned long live_regs_mask;
21396 unsigned long func_type;
21397 int fp_offset = 0;
21398 int saved_pretend_args = 0;
21399 int saved_regs = 0;
21400 unsigned HOST_WIDE_INT args_to_push;
21401 HOST_WIDE_INT size;
21402 arm_stack_offsets *offsets;
21403 bool clobber_ip;
21405 func_type = arm_current_func_type ();
21407 /* Naked functions don't have prologues. */
21408 if (IS_NAKED (func_type))
21410 if (flag_stack_usage_info)
21411 current_function_static_stack_size = 0;
21412 return;
21415 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21416 args_to_push = crtl->args.pretend_args_size;
21418 /* Compute which register we will have to save onto the stack. */
21419 offsets = arm_get_frame_offsets ();
21420 live_regs_mask = offsets->saved_regs_mask;
21422 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21424 if (IS_STACKALIGN (func_type))
21426 rtx r0, r1;
21428 /* Handle a word-aligned stack pointer. We generate the following:
21430 mov r0, sp
21431 bic r1, r0, #7
21432 mov sp, r1
21433 <save and restore r0 in normal prologue/epilogue>
21434 mov sp, r0
21435 bx lr
21437 The unwinder doesn't need to know about the stack realignment.
21438 Just tell it we saved SP in r0. */
21439 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21441 r0 = gen_rtx_REG (SImode, R0_REGNUM);
21442 r1 = gen_rtx_REG (SImode, R1_REGNUM);
21444 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21445 RTX_FRAME_RELATED_P (insn) = 1;
21446 add_reg_note (insn, REG_CFA_REGISTER, NULL);
21448 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21450 /* ??? The CFA changes here, which may cause GDB to conclude that it
21451 has entered a different function. That said, the unwind info is
21452 correct, individually, before and after this instruction because
21453 we've described the save of SP, which will override the default
21454 handling of SP as restoring from the CFA. */
21455 emit_insn (gen_movsi (stack_pointer_rtx, r1));
21458 /* The static chain register is the same as the IP register. If it is
21459 clobbered when creating the frame, we need to save and restore it. */
21460 clobber_ip = IS_NESTED (func_type)
21461 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21462 || (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21463 && !df_regs_ever_live_p (LR_REGNUM)
21464 && arm_r3_live_at_start_p ()));
21466 /* Find somewhere to store IP whilst the frame is being created.
21467 We try the following places in order:
21469 1. The last argument register r3 if it is available.
21470 2. A slot on the stack above the frame if there are no
21471 arguments to push onto the stack.
21472 3. Register r3 again, after pushing the argument registers
21473 onto the stack, if this is a varargs function.
21474 4. The last slot on the stack created for the arguments to
21475 push, if this isn't a varargs function.
21477 Note - we only need to tell the dwarf2 backend about the SP
21478 adjustment in the second variant; the static chain register
21479 doesn't need to be unwound, as it doesn't contain a value
21480 inherited from the caller. */
21481 if (clobber_ip)
21483 if (!arm_r3_live_at_start_p ())
21484 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21485 else if (args_to_push == 0)
21487 rtx addr, dwarf;
21489 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21490 saved_regs += 4;
21492 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21493 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21494 fp_offset = 4;
21496 /* Just tell the dwarf backend that we adjusted SP. */
21497 dwarf = gen_rtx_SET (stack_pointer_rtx,
21498 plus_constant (Pmode, stack_pointer_rtx,
21499 -fp_offset));
21500 RTX_FRAME_RELATED_P (insn) = 1;
21501 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21503 else
21505 /* Store the args on the stack. */
21506 if (cfun->machine->uses_anonymous_args)
21508 insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21509 (0xf0 >> (args_to_push / 4)) & 0xf);
21510 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21511 saved_pretend_args = 1;
21513 else
21515 rtx addr, dwarf;
21517 if (args_to_push == 4)
21518 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21519 else
21520 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21521 plus_constant (Pmode,
21522 stack_pointer_rtx,
21523 -args_to_push));
21525 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21527 /* Just tell the dwarf backend that we adjusted SP. */
21528 dwarf = gen_rtx_SET (stack_pointer_rtx,
21529 plus_constant (Pmode, stack_pointer_rtx,
21530 -args_to_push));
21531 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21534 RTX_FRAME_RELATED_P (insn) = 1;
21535 fp_offset = args_to_push;
21536 args_to_push = 0;
21540 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21542 if (IS_INTERRUPT (func_type))
21544 /* Interrupt functions must not corrupt any registers.
21545 Creating a frame pointer however, corrupts the IP
21546 register, so we must push it first. */
21547 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21549 /* Do not set RTX_FRAME_RELATED_P on this insn.
21550 The dwarf stack unwinding code only wants to see one
21551 stack decrement per function, and this is not it. If
21552 this instruction is labeled as being part of the frame
21553 creation sequence then dwarf2out_frame_debug_expr will
21554 die when it encounters the assignment of IP to FP
21555 later on, since the use of SP here establishes SP as
21556 the CFA register and not IP.
21558 Anyway this instruction is not really part of the stack
21559 frame creation although it is part of the prologue. */
21562 insn = emit_set_insn (ip_rtx,
21563 plus_constant (Pmode, stack_pointer_rtx,
21564 fp_offset));
21565 RTX_FRAME_RELATED_P (insn) = 1;
21568 if (args_to_push)
21570 /* Push the argument registers, or reserve space for them. */
21571 if (cfun->machine->uses_anonymous_args)
21572 insn = emit_multi_reg_push
21573 ((0xf0 >> (args_to_push / 4)) & 0xf,
21574 (0xf0 >> (args_to_push / 4)) & 0xf);
21575 else
21576 insn = emit_insn
21577 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21578 GEN_INT (- args_to_push)));
21579 RTX_FRAME_RELATED_P (insn) = 1;
21582 /* If this is an interrupt service routine, and the link register
21583 is going to be pushed, and we're not generating extra
21584 push of IP (needed when frame is needed and frame layout if apcs),
21585 subtracting four from LR now will mean that the function return
21586 can be done with a single instruction. */
21587 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21588 && (live_regs_mask & (1 << LR_REGNUM)) != 0
21589 && !(frame_pointer_needed && TARGET_APCS_FRAME)
21590 && TARGET_ARM)
21592 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21594 emit_set_insn (lr, plus_constant (SImode, lr, -4));
21597 if (live_regs_mask)
21599 unsigned long dwarf_regs_mask = live_regs_mask;
21601 saved_regs += bit_count (live_regs_mask) * 4;
21602 if (optimize_size && !frame_pointer_needed
21603 && saved_regs == offsets->saved_regs - offsets->saved_args)
21605 /* If no coprocessor registers are being pushed and we don't have
21606 to worry about a frame pointer then push extra registers to
21607 create the stack frame. This is done in a way that does not
21608 alter the frame layout, so is independent of the epilogue. */
21609 int n;
21610 int frame;
21611 n = 0;
21612 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21613 n++;
21614 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21615 if (frame && n * 4 >= frame)
21617 n = frame / 4;
21618 live_regs_mask |= (1 << n) - 1;
21619 saved_regs += frame;
21623 if (TARGET_LDRD
21624 && current_tune->prefer_ldrd_strd
21625 && !optimize_function_for_size_p (cfun))
21627 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21628 if (TARGET_THUMB2)
21629 thumb2_emit_strd_push (live_regs_mask);
21630 else if (TARGET_ARM
21631 && !TARGET_APCS_FRAME
21632 && !IS_INTERRUPT (func_type))
21633 arm_emit_strd_push (live_regs_mask);
21634 else
21636 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21637 RTX_FRAME_RELATED_P (insn) = 1;
21640 else
21642 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21643 RTX_FRAME_RELATED_P (insn) = 1;
21647 if (! IS_VOLATILE (func_type))
21648 saved_regs += arm_save_coproc_regs ();
21650 if (frame_pointer_needed && TARGET_ARM)
21652 /* Create the new frame pointer. */
21653 if (TARGET_APCS_FRAME)
21655 insn = GEN_INT (-(4 + args_to_push + fp_offset));
21656 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21657 RTX_FRAME_RELATED_P (insn) = 1;
21659 else
21661 insn = GEN_INT (saved_regs - (4 + fp_offset));
21662 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21663 stack_pointer_rtx, insn));
21664 RTX_FRAME_RELATED_P (insn) = 1;
21668 size = offsets->outgoing_args - offsets->saved_args;
21669 if (flag_stack_usage_info)
21670 current_function_static_stack_size = size;
21672 /* If this isn't an interrupt service routine and we have a frame, then do
21673 stack checking. We use IP as the first scratch register, except for the
21674 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
21675 if (!IS_INTERRUPT (func_type)
21676 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
21678 unsigned int regno;
21680 if (!IS_NESTED (func_type) || clobber_ip)
21681 regno = IP_REGNUM;
21682 else if (df_regs_ever_live_p (LR_REGNUM))
21683 regno = LR_REGNUM;
21684 else
21685 regno = 3;
21687 if (crtl->is_leaf && !cfun->calls_alloca)
21689 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
21690 arm_emit_probe_stack_range (STACK_CHECK_PROTECT,
21691 size - STACK_CHECK_PROTECT,
21692 regno, live_regs_mask);
21694 else if (size > 0)
21695 arm_emit_probe_stack_range (STACK_CHECK_PROTECT, size,
21696 regno, live_regs_mask);
21699 /* Recover the static chain register. */
21700 if (clobber_ip)
21702 if (!arm_r3_live_at_start_p () || saved_pretend_args)
21703 insn = gen_rtx_REG (SImode, 3);
21704 else
21706 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21707 insn = gen_frame_mem (SImode, insn);
21709 emit_set_insn (ip_rtx, insn);
21710 emit_insn (gen_force_register_use (ip_rtx));
21713 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21715 /* This add can produce multiple insns for a large constant, so we
21716 need to get tricky. */
21717 rtx_insn *last = get_last_insn ();
21719 amount = GEN_INT (offsets->saved_args + saved_regs
21720 - offsets->outgoing_args);
21722 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21723 amount));
21726 last = last ? NEXT_INSN (last) : get_insns ();
21727 RTX_FRAME_RELATED_P (last) = 1;
21729 while (last != insn);
21731 /* If the frame pointer is needed, emit a special barrier that
21732 will prevent the scheduler from moving stores to the frame
21733 before the stack adjustment. */
21734 if (frame_pointer_needed)
21735 emit_insn (gen_stack_tie (stack_pointer_rtx,
21736 hard_frame_pointer_rtx));
21740 if (frame_pointer_needed && TARGET_THUMB2)
21741 thumb_set_frame_pointer (offsets);
21743 if (flag_pic && arm_pic_register != INVALID_REGNUM)
21745 unsigned long mask;
21747 mask = live_regs_mask;
21748 mask &= THUMB2_WORK_REGS;
21749 if (!IS_NESTED (func_type))
21750 mask |= (1 << IP_REGNUM);
21751 arm_load_pic_register (mask);
21754 /* If we are profiling, make sure no instructions are scheduled before
21755 the call to mcount. Similarly if the user has requested no
21756 scheduling in the prolog. Similarly if we want non-call exceptions
21757 using the EABI unwinder, to prevent faulting instructions from being
21758 swapped with a stack adjustment. */
21759 if (crtl->profile || !TARGET_SCHED_PROLOG
21760 || (arm_except_unwind_info (&global_options) == UI_TARGET
21761 && cfun->can_throw_non_call_exceptions))
21762 emit_insn (gen_blockage ());
21764 /* If the link register is being kept alive, with the return address in it,
21765 then make sure that it does not get reused by the ce2 pass. */
21766 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
21767 cfun->machine->lr_save_eliminated = 1;
21770 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21771 static void
21772 arm_print_condition (FILE *stream)
21774 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
21776 /* Branch conversion is not implemented for Thumb-2. */
21777 if (TARGET_THUMB)
21779 output_operand_lossage ("predicated Thumb instruction");
21780 return;
21782 if (current_insn_predicate != NULL)
21784 output_operand_lossage
21785 ("predicated instruction in conditional sequence");
21786 return;
21789 fputs (arm_condition_codes[arm_current_cc], stream);
21791 else if (current_insn_predicate)
21793 enum arm_cond_code code;
21795 if (TARGET_THUMB1)
21797 output_operand_lossage ("predicated Thumb instruction");
21798 return;
21801 code = get_arm_condition_code (current_insn_predicate);
21802 fputs (arm_condition_codes[code], stream);
21807 /* Globally reserved letters: acln
21808 Puncutation letters currently used: @_|?().!#
21809 Lower case letters currently used: bcdefhimpqtvwxyz
21810 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21811 Letters previously used, but now deprecated/obsolete: sVWXYZ.
21813 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21815 If CODE is 'd', then the X is a condition operand and the instruction
21816 should only be executed if the condition is true.
21817 if CODE is 'D', then the X is a condition operand and the instruction
21818 should only be executed if the condition is false: however, if the mode
21819 of the comparison is CCFPEmode, then always execute the instruction -- we
21820 do this because in these circumstances !GE does not necessarily imply LT;
21821 in these cases the instruction pattern will take care to make sure that
21822 an instruction containing %d will follow, thereby undoing the effects of
21823 doing this instruction unconditionally.
21824 If CODE is 'N' then X is a floating point operand that must be negated
21825 before output.
21826 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21827 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21828 static void
21829 arm_print_operand (FILE *stream, rtx x, int code)
21831 switch (code)
21833 case '@':
21834 fputs (ASM_COMMENT_START, stream);
21835 return;
21837 case '_':
21838 fputs (user_label_prefix, stream);
21839 return;
21841 case '|':
21842 fputs (REGISTER_PREFIX, stream);
21843 return;
21845 case '?':
21846 arm_print_condition (stream);
21847 return;
21849 case '.':
21850 /* The current condition code for a condition code setting instruction.
21851 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21852 fputc('s', stream);
21853 arm_print_condition (stream);
21854 return;
21856 case '!':
21857 /* If the instruction is conditionally executed then print
21858 the current condition code, otherwise print 's'. */
21859 gcc_assert (TARGET_THUMB2);
21860 if (current_insn_predicate)
21861 arm_print_condition (stream);
21862 else
21863 fputc('s', stream);
21864 break;
21866 /* %# is a "break" sequence. It doesn't output anything, but is used to
21867 separate e.g. operand numbers from following text, if that text consists
21868 of further digits which we don't want to be part of the operand
21869 number. */
21870 case '#':
21871 return;
21873 case 'N':
21875 REAL_VALUE_TYPE r;
21876 r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
21877 fprintf (stream, "%s", fp_const_from_val (&r));
21879 return;
21881 /* An integer or symbol address without a preceding # sign. */
21882 case 'c':
21883 switch (GET_CODE (x))
21885 case CONST_INT:
21886 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
21887 break;
21889 case SYMBOL_REF:
21890 output_addr_const (stream, x);
21891 break;
21893 case CONST:
21894 if (GET_CODE (XEXP (x, 0)) == PLUS
21895 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
21897 output_addr_const (stream, x);
21898 break;
21900 /* Fall through. */
21902 default:
21903 output_operand_lossage ("Unsupported operand for code '%c'", code);
21905 return;
21907 /* An integer that we want to print in HEX. */
21908 case 'x':
21909 switch (GET_CODE (x))
21911 case CONST_INT:
21912 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
21913 break;
21915 default:
21916 output_operand_lossage ("Unsupported operand for code '%c'", code);
21918 return;
21920 case 'B':
21921 if (CONST_INT_P (x))
21923 HOST_WIDE_INT val;
21924 val = ARM_SIGN_EXTEND (~INTVAL (x));
21925 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
21927 else
21929 putc ('~', stream);
21930 output_addr_const (stream, x);
21932 return;
21934 case 'b':
21935 /* Print the log2 of a CONST_INT. */
21937 HOST_WIDE_INT val;
21939 if (!CONST_INT_P (x)
21940 || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
21941 output_operand_lossage ("Unsupported operand for code '%c'", code);
21942 else
21943 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21945 return;
21947 case 'L':
21948 /* The low 16 bits of an immediate constant. */
21949 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
21950 return;
21952 case 'i':
21953 fprintf (stream, "%s", arithmetic_instr (x, 1));
21954 return;
21956 case 'I':
21957 fprintf (stream, "%s", arithmetic_instr (x, 0));
21958 return;
21960 case 'S':
21962 HOST_WIDE_INT val;
21963 const char *shift;
21965 shift = shift_op (x, &val);
21967 if (shift)
21969 fprintf (stream, ", %s ", shift);
21970 if (val == -1)
21971 arm_print_operand (stream, XEXP (x, 1), 0);
21972 else
21973 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21976 return;
21978 /* An explanation of the 'Q', 'R' and 'H' register operands:
21980 In a pair of registers containing a DI or DF value the 'Q'
21981 operand returns the register number of the register containing
21982 the least significant part of the value. The 'R' operand returns
21983 the register number of the register containing the most
21984 significant part of the value.
21986 The 'H' operand returns the higher of the two register numbers.
21987 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21988 same as the 'Q' operand, since the most significant part of the
21989 value is held in the lower number register. The reverse is true
21990 on systems where WORDS_BIG_ENDIAN is false.
21992 The purpose of these operands is to distinguish between cases
21993 where the endian-ness of the values is important (for example
21994 when they are added together), and cases where the endian-ness
21995 is irrelevant, but the order of register operations is important.
21996 For example when loading a value from memory into a register
21997 pair, the endian-ness does not matter. Provided that the value
21998 from the lower memory address is put into the lower numbered
21999 register, and the value from the higher address is put into the
22000 higher numbered register, the load will work regardless of whether
22001 the value being loaded is big-wordian or little-wordian. The
22002 order of the two register loads can matter however, if the address
22003 of the memory location is actually held in one of the registers
22004 being overwritten by the load.
22006 The 'Q' and 'R' constraints are also available for 64-bit
22007 constants. */
22008 case 'Q':
22009 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22011 rtx part = gen_lowpart (SImode, x);
22012 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22013 return;
22016 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22018 output_operand_lossage ("invalid operand for code '%c'", code);
22019 return;
22022 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
22023 return;
22025 case 'R':
22026 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22028 machine_mode mode = GET_MODE (x);
22029 rtx part;
22031 if (mode == VOIDmode)
22032 mode = DImode;
22033 part = gen_highpart_mode (SImode, mode, x);
22034 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22035 return;
22038 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22040 output_operand_lossage ("invalid operand for code '%c'", code);
22041 return;
22044 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
22045 return;
22047 case 'H':
22048 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22050 output_operand_lossage ("invalid operand for code '%c'", code);
22051 return;
22054 asm_fprintf (stream, "%r", REGNO (x) + 1);
22055 return;
22057 case 'J':
22058 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22060 output_operand_lossage ("invalid operand for code '%c'", code);
22061 return;
22064 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
22065 return;
22067 case 'K':
22068 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22070 output_operand_lossage ("invalid operand for code '%c'", code);
22071 return;
22074 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
22075 return;
22077 case 'm':
22078 asm_fprintf (stream, "%r",
22079 REG_P (XEXP (x, 0))
22080 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
22081 return;
22083 case 'M':
22084 asm_fprintf (stream, "{%r-%r}",
22085 REGNO (x),
22086 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
22087 return;
22089 /* Like 'M', but writing doubleword vector registers, for use by Neon
22090 insns. */
22091 case 'h':
22093 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
22094 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
22095 if (numregs == 1)
22096 asm_fprintf (stream, "{d%d}", regno);
22097 else
22098 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
22100 return;
22102 case 'd':
22103 /* CONST_TRUE_RTX means always -- that's the default. */
22104 if (x == const_true_rtx)
22105 return;
22107 if (!COMPARISON_P (x))
22109 output_operand_lossage ("invalid operand for code '%c'", code);
22110 return;
22113 fputs (arm_condition_codes[get_arm_condition_code (x)],
22114 stream);
22115 return;
22117 case 'D':
22118 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
22119 want to do that. */
22120 if (x == const_true_rtx)
22122 output_operand_lossage ("instruction never executed");
22123 return;
22125 if (!COMPARISON_P (x))
22127 output_operand_lossage ("invalid operand for code '%c'", code);
22128 return;
22131 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
22132 (get_arm_condition_code (x))],
22133 stream);
22134 return;
22136 case 's':
22137 case 'V':
22138 case 'W':
22139 case 'X':
22140 case 'Y':
22141 case 'Z':
22142 /* Former Maverick support, removed after GCC-4.7. */
22143 output_operand_lossage ("obsolete Maverick format code '%c'", code);
22144 return;
22146 case 'U':
22147 if (!REG_P (x)
22148 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
22149 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
22150 /* Bad value for wCG register number. */
22152 output_operand_lossage ("invalid operand for code '%c'", code);
22153 return;
22156 else
22157 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
22158 return;
22160 /* Print an iWMMXt control register name. */
22161 case 'w':
22162 if (!CONST_INT_P (x)
22163 || INTVAL (x) < 0
22164 || INTVAL (x) >= 16)
22165 /* Bad value for wC register number. */
22167 output_operand_lossage ("invalid operand for code '%c'", code);
22168 return;
22171 else
22173 static const char * wc_reg_names [16] =
22175 "wCID", "wCon", "wCSSF", "wCASF",
22176 "wC4", "wC5", "wC6", "wC7",
22177 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
22178 "wC12", "wC13", "wC14", "wC15"
22181 fputs (wc_reg_names [INTVAL (x)], stream);
22183 return;
22185 /* Print the high single-precision register of a VFP double-precision
22186 register. */
22187 case 'p':
22189 machine_mode mode = GET_MODE (x);
22190 int regno;
22192 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
22194 output_operand_lossage ("invalid operand for code '%c'", code);
22195 return;
22198 regno = REGNO (x);
22199 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
22201 output_operand_lossage ("invalid operand for code '%c'", code);
22202 return;
22205 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
22207 return;
22209 /* Print a VFP/Neon double precision or quad precision register name. */
22210 case 'P':
22211 case 'q':
22213 machine_mode mode = GET_MODE (x);
22214 int is_quad = (code == 'q');
22215 int regno;
22217 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
22219 output_operand_lossage ("invalid operand for code '%c'", code);
22220 return;
22223 if (!REG_P (x)
22224 || !IS_VFP_REGNUM (REGNO (x)))
22226 output_operand_lossage ("invalid operand for code '%c'", code);
22227 return;
22230 regno = REGNO (x);
22231 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
22232 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
22234 output_operand_lossage ("invalid operand for code '%c'", code);
22235 return;
22238 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
22239 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
22241 return;
22243 /* These two codes print the low/high doubleword register of a Neon quad
22244 register, respectively. For pair-structure types, can also print
22245 low/high quadword registers. */
22246 case 'e':
22247 case 'f':
22249 machine_mode mode = GET_MODE (x);
22250 int regno;
22252 if ((GET_MODE_SIZE (mode) != 16
22253 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
22255 output_operand_lossage ("invalid operand for code '%c'", code);
22256 return;
22259 regno = REGNO (x);
22260 if (!NEON_REGNO_OK_FOR_QUAD (regno))
22262 output_operand_lossage ("invalid operand for code '%c'", code);
22263 return;
22266 if (GET_MODE_SIZE (mode) == 16)
22267 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
22268 + (code == 'f' ? 1 : 0));
22269 else
22270 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
22271 + (code == 'f' ? 1 : 0));
22273 return;
22275 /* Print a VFPv3 floating-point constant, represented as an integer
22276 index. */
22277 case 'G':
22279 int index = vfp3_const_double_index (x);
22280 gcc_assert (index != -1);
22281 fprintf (stream, "%d", index);
22283 return;
22285 /* Print bits representing opcode features for Neon.
22287 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
22288 and polynomials as unsigned.
22290 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22292 Bit 2 is 1 for rounding functions, 0 otherwise. */
22294 /* Identify the type as 's', 'u', 'p' or 'f'. */
22295 case 'T':
22297 HOST_WIDE_INT bits = INTVAL (x);
22298 fputc ("uspf"[bits & 3], stream);
22300 return;
22302 /* Likewise, but signed and unsigned integers are both 'i'. */
22303 case 'F':
22305 HOST_WIDE_INT bits = INTVAL (x);
22306 fputc ("iipf"[bits & 3], stream);
22308 return;
22310 /* As for 'T', but emit 'u' instead of 'p'. */
22311 case 't':
22313 HOST_WIDE_INT bits = INTVAL (x);
22314 fputc ("usuf"[bits & 3], stream);
22316 return;
22318 /* Bit 2: rounding (vs none). */
22319 case 'O':
22321 HOST_WIDE_INT bits = INTVAL (x);
22322 fputs ((bits & 4) != 0 ? "r" : "", stream);
22324 return;
22326 /* Memory operand for vld1/vst1 instruction. */
22327 case 'A':
22329 rtx addr;
22330 bool postinc = FALSE;
22331 rtx postinc_reg = NULL;
22332 unsigned align, memsize, align_bits;
22334 gcc_assert (MEM_P (x));
22335 addr = XEXP (x, 0);
22336 if (GET_CODE (addr) == POST_INC)
22338 postinc = 1;
22339 addr = XEXP (addr, 0);
22341 if (GET_CODE (addr) == POST_MODIFY)
22343 postinc_reg = XEXP( XEXP (addr, 1), 1);
22344 addr = XEXP (addr, 0);
22346 asm_fprintf (stream, "[%r", REGNO (addr));
22348 /* We know the alignment of this access, so we can emit a hint in the
22349 instruction (for some alignments) as an aid to the memory subsystem
22350 of the target. */
22351 align = MEM_ALIGN (x) >> 3;
22352 memsize = MEM_SIZE (x);
22354 /* Only certain alignment specifiers are supported by the hardware. */
22355 if (memsize == 32 && (align % 32) == 0)
22356 align_bits = 256;
22357 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
22358 align_bits = 128;
22359 else if (memsize >= 8 && (align % 8) == 0)
22360 align_bits = 64;
22361 else
22362 align_bits = 0;
22364 if (align_bits != 0)
22365 asm_fprintf (stream, ":%d", align_bits);
22367 asm_fprintf (stream, "]");
22369 if (postinc)
22370 fputs("!", stream);
22371 if (postinc_reg)
22372 asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22374 return;
22376 case 'C':
22378 rtx addr;
22380 gcc_assert (MEM_P (x));
22381 addr = XEXP (x, 0);
22382 gcc_assert (REG_P (addr));
22383 asm_fprintf (stream, "[%r]", REGNO (addr));
22385 return;
22387 /* Translate an S register number into a D register number and element index. */
22388 case 'y':
22390 machine_mode mode = GET_MODE (x);
22391 int regno;
22393 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22395 output_operand_lossage ("invalid operand for code '%c'", code);
22396 return;
22399 regno = REGNO (x);
22400 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22402 output_operand_lossage ("invalid operand for code '%c'", code);
22403 return;
22406 regno = regno - FIRST_VFP_REGNUM;
22407 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22409 return;
22411 case 'v':
22412 gcc_assert (CONST_DOUBLE_P (x));
22413 int result;
22414 result = vfp3_const_double_for_fract_bits (x);
22415 if (result == 0)
22416 result = vfp3_const_double_for_bits (x);
22417 fprintf (stream, "#%d", result);
22418 return;
22420 /* Register specifier for vld1.16/vst1.16. Translate the S register
22421 number into a D register number and element index. */
22422 case 'z':
22424 machine_mode mode = GET_MODE (x);
22425 int regno;
22427 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22429 output_operand_lossage ("invalid operand for code '%c'", code);
22430 return;
22433 regno = REGNO (x);
22434 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22436 output_operand_lossage ("invalid operand for code '%c'", code);
22437 return;
22440 regno = regno - FIRST_VFP_REGNUM;
22441 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22443 return;
22445 default:
22446 if (x == 0)
22448 output_operand_lossage ("missing operand");
22449 return;
22452 switch (GET_CODE (x))
22454 case REG:
22455 asm_fprintf (stream, "%r", REGNO (x));
22456 break;
22458 case MEM:
22459 output_address (GET_MODE (x), XEXP (x, 0));
22460 break;
22462 case CONST_DOUBLE:
22464 char fpstr[20];
22465 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22466 sizeof (fpstr), 0, 1);
22467 fprintf (stream, "#%s", fpstr);
22469 break;
22471 default:
22472 gcc_assert (GET_CODE (x) != NEG);
22473 fputc ('#', stream);
22474 if (GET_CODE (x) == HIGH)
22476 fputs (":lower16:", stream);
22477 x = XEXP (x, 0);
22480 output_addr_const (stream, x);
22481 break;
22486 /* Target hook for printing a memory address. */
22487 static void
22488 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
22490 if (TARGET_32BIT)
22492 int is_minus = GET_CODE (x) == MINUS;
22494 if (REG_P (x))
22495 asm_fprintf (stream, "[%r]", REGNO (x));
22496 else if (GET_CODE (x) == PLUS || is_minus)
22498 rtx base = XEXP (x, 0);
22499 rtx index = XEXP (x, 1);
22500 HOST_WIDE_INT offset = 0;
22501 if (!REG_P (base)
22502 || (REG_P (index) && REGNO (index) == SP_REGNUM))
22504 /* Ensure that BASE is a register. */
22505 /* (one of them must be). */
22506 /* Also ensure the SP is not used as in index register. */
22507 std::swap (base, index);
22509 switch (GET_CODE (index))
22511 case CONST_INT:
22512 offset = INTVAL (index);
22513 if (is_minus)
22514 offset = -offset;
22515 asm_fprintf (stream, "[%r, #%wd]",
22516 REGNO (base), offset);
22517 break;
22519 case REG:
22520 asm_fprintf (stream, "[%r, %s%r]",
22521 REGNO (base), is_minus ? "-" : "",
22522 REGNO (index));
22523 break;
22525 case MULT:
22526 case ASHIFTRT:
22527 case LSHIFTRT:
22528 case ASHIFT:
22529 case ROTATERT:
22531 asm_fprintf (stream, "[%r, %s%r",
22532 REGNO (base), is_minus ? "-" : "",
22533 REGNO (XEXP (index, 0)));
22534 arm_print_operand (stream, index, 'S');
22535 fputs ("]", stream);
22536 break;
22539 default:
22540 gcc_unreachable ();
22543 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22544 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22546 gcc_assert (REG_P (XEXP (x, 0)));
22548 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22549 asm_fprintf (stream, "[%r, #%s%d]!",
22550 REGNO (XEXP (x, 0)),
22551 GET_CODE (x) == PRE_DEC ? "-" : "",
22552 GET_MODE_SIZE (mode));
22553 else
22554 asm_fprintf (stream, "[%r], #%s%d",
22555 REGNO (XEXP (x, 0)),
22556 GET_CODE (x) == POST_DEC ? "-" : "",
22557 GET_MODE_SIZE (mode));
22559 else if (GET_CODE (x) == PRE_MODIFY)
22561 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22562 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22563 asm_fprintf (stream, "#%wd]!",
22564 INTVAL (XEXP (XEXP (x, 1), 1)));
22565 else
22566 asm_fprintf (stream, "%r]!",
22567 REGNO (XEXP (XEXP (x, 1), 1)));
22569 else if (GET_CODE (x) == POST_MODIFY)
22571 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22572 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22573 asm_fprintf (stream, "#%wd",
22574 INTVAL (XEXP (XEXP (x, 1), 1)));
22575 else
22576 asm_fprintf (stream, "%r",
22577 REGNO (XEXP (XEXP (x, 1), 1)));
22579 else output_addr_const (stream, x);
22581 else
22583 if (REG_P (x))
22584 asm_fprintf (stream, "[%r]", REGNO (x));
22585 else if (GET_CODE (x) == POST_INC)
22586 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22587 else if (GET_CODE (x) == PLUS)
22589 gcc_assert (REG_P (XEXP (x, 0)));
22590 if (CONST_INT_P (XEXP (x, 1)))
22591 asm_fprintf (stream, "[%r, #%wd]",
22592 REGNO (XEXP (x, 0)),
22593 INTVAL (XEXP (x, 1)));
22594 else
22595 asm_fprintf (stream, "[%r, %r]",
22596 REGNO (XEXP (x, 0)),
22597 REGNO (XEXP (x, 1)));
22599 else
22600 output_addr_const (stream, x);
22604 /* Target hook for indicating whether a punctuation character for
22605 TARGET_PRINT_OPERAND is valid. */
22606 static bool
22607 arm_print_operand_punct_valid_p (unsigned char code)
22609 return (code == '@' || code == '|' || code == '.'
22610 || code == '(' || code == ')' || code == '#'
22611 || (TARGET_32BIT && (code == '?'))
22612 || (TARGET_THUMB2 && (code == '!'))
22613 || (TARGET_THUMB && (code == '_')));
22616 /* Target hook for assembling integer objects. The ARM version needs to
22617 handle word-sized values specially. */
22618 static bool
22619 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22621 machine_mode mode;
22623 if (size == UNITS_PER_WORD && aligned_p)
22625 fputs ("\t.word\t", asm_out_file);
22626 output_addr_const (asm_out_file, x);
22628 /* Mark symbols as position independent. We only do this in the
22629 .text segment, not in the .data segment. */
22630 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22631 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22633 /* See legitimize_pic_address for an explanation of the
22634 TARGET_VXWORKS_RTP check. */
22635 /* References to weak symbols cannot be resolved locally:
22636 they may be overridden by a non-weak definition at link
22637 time. */
22638 if (!arm_pic_data_is_text_relative
22639 || (GET_CODE (x) == SYMBOL_REF
22640 && (!SYMBOL_REF_LOCAL_P (x)
22641 || (SYMBOL_REF_DECL (x)
22642 ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0))))
22643 fputs ("(GOT)", asm_out_file);
22644 else
22645 fputs ("(GOTOFF)", asm_out_file);
22647 fputc ('\n', asm_out_file);
22648 return true;
22651 mode = GET_MODE (x);
22653 if (arm_vector_mode_supported_p (mode))
22655 int i, units;
22657 gcc_assert (GET_CODE (x) == CONST_VECTOR);
22659 units = CONST_VECTOR_NUNITS (x);
22660 size = GET_MODE_UNIT_SIZE (mode);
22662 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22663 for (i = 0; i < units; i++)
22665 rtx elt = CONST_VECTOR_ELT (x, i);
22666 assemble_integer
22667 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22669 else
22670 for (i = 0; i < units; i++)
22672 rtx elt = CONST_VECTOR_ELT (x, i);
22673 assemble_real
22674 (*CONST_DOUBLE_REAL_VALUE (elt),
22675 as_a <scalar_float_mode> (GET_MODE_INNER (mode)),
22676 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22679 return true;
22682 return default_assemble_integer (x, size, aligned_p);
22685 static void
22686 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22688 section *s;
22690 if (!TARGET_AAPCS_BASED)
22692 (is_ctor ?
22693 default_named_section_asm_out_constructor
22694 : default_named_section_asm_out_destructor) (symbol, priority);
22695 return;
22698 /* Put these in the .init_array section, using a special relocation. */
22699 if (priority != DEFAULT_INIT_PRIORITY)
22701 char buf[18];
22702 sprintf (buf, "%s.%.5u",
22703 is_ctor ? ".init_array" : ".fini_array",
22704 priority);
22705 s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL_TREE);
22707 else if (is_ctor)
22708 s = ctors_section;
22709 else
22710 s = dtors_section;
22712 switch_to_section (s);
22713 assemble_align (POINTER_SIZE);
22714 fputs ("\t.word\t", asm_out_file);
22715 output_addr_const (asm_out_file, symbol);
22716 fputs ("(target1)\n", asm_out_file);
22719 /* Add a function to the list of static constructors. */
22721 static void
22722 arm_elf_asm_constructor (rtx symbol, int priority)
22724 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22727 /* Add a function to the list of static destructors. */
22729 static void
22730 arm_elf_asm_destructor (rtx symbol, int priority)
22732 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22735 /* A finite state machine takes care of noticing whether or not instructions
22736 can be conditionally executed, and thus decrease execution time and code
22737 size by deleting branch instructions. The fsm is controlled by
22738 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22740 /* The state of the fsm controlling condition codes are:
22741 0: normal, do nothing special
22742 1: make ASM_OUTPUT_OPCODE not output this instruction
22743 2: make ASM_OUTPUT_OPCODE not output this instruction
22744 3: make instructions conditional
22745 4: make instructions conditional
22747 State transitions (state->state by whom under condition):
22748 0 -> 1 final_prescan_insn if the `target' is a label
22749 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22750 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22751 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22752 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22753 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22754 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22755 (the target insn is arm_target_insn).
22757 If the jump clobbers the conditions then we use states 2 and 4.
22759 A similar thing can be done with conditional return insns.
22761 XXX In case the `target' is an unconditional branch, this conditionalising
22762 of the instructions always reduces code size, but not always execution
22763 time. But then, I want to reduce the code size to somewhere near what
22764 /bin/cc produces. */
22766 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22767 instructions. When a COND_EXEC instruction is seen the subsequent
22768 instructions are scanned so that multiple conditional instructions can be
22769 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22770 specify the length and true/false mask for the IT block. These will be
22771 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22773 /* Returns the index of the ARM condition code string in
22774 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22775 COMPARISON should be an rtx like `(eq (...) (...))'. */
22777 enum arm_cond_code
22778 maybe_get_arm_condition_code (rtx comparison)
22780 machine_mode mode = GET_MODE (XEXP (comparison, 0));
22781 enum arm_cond_code code;
22782 enum rtx_code comp_code = GET_CODE (comparison);
22784 if (GET_MODE_CLASS (mode) != MODE_CC)
22785 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
22786 XEXP (comparison, 1));
22788 switch (mode)
22790 case E_CC_DNEmode: code = ARM_NE; goto dominance;
22791 case E_CC_DEQmode: code = ARM_EQ; goto dominance;
22792 case E_CC_DGEmode: code = ARM_GE; goto dominance;
22793 case E_CC_DGTmode: code = ARM_GT; goto dominance;
22794 case E_CC_DLEmode: code = ARM_LE; goto dominance;
22795 case E_CC_DLTmode: code = ARM_LT; goto dominance;
22796 case E_CC_DGEUmode: code = ARM_CS; goto dominance;
22797 case E_CC_DGTUmode: code = ARM_HI; goto dominance;
22798 case E_CC_DLEUmode: code = ARM_LS; goto dominance;
22799 case E_CC_DLTUmode: code = ARM_CC;
22801 dominance:
22802 if (comp_code == EQ)
22803 return ARM_INVERSE_CONDITION_CODE (code);
22804 if (comp_code == NE)
22805 return code;
22806 return ARM_NV;
22808 case E_CC_NOOVmode:
22809 switch (comp_code)
22811 case NE: return ARM_NE;
22812 case EQ: return ARM_EQ;
22813 case GE: return ARM_PL;
22814 case LT: return ARM_MI;
22815 default: return ARM_NV;
22818 case E_CC_Zmode:
22819 switch (comp_code)
22821 case NE: return ARM_NE;
22822 case EQ: return ARM_EQ;
22823 default: return ARM_NV;
22826 case E_CC_Nmode:
22827 switch (comp_code)
22829 case NE: return ARM_MI;
22830 case EQ: return ARM_PL;
22831 default: return ARM_NV;
22834 case E_CCFPEmode:
22835 case E_CCFPmode:
22836 /* We can handle all cases except UNEQ and LTGT. */
22837 switch (comp_code)
22839 case GE: return ARM_GE;
22840 case GT: return ARM_GT;
22841 case LE: return ARM_LS;
22842 case LT: return ARM_MI;
22843 case NE: return ARM_NE;
22844 case EQ: return ARM_EQ;
22845 case ORDERED: return ARM_VC;
22846 case UNORDERED: return ARM_VS;
22847 case UNLT: return ARM_LT;
22848 case UNLE: return ARM_LE;
22849 case UNGT: return ARM_HI;
22850 case UNGE: return ARM_PL;
22851 /* UNEQ and LTGT do not have a representation. */
22852 case UNEQ: /* Fall through. */
22853 case LTGT: /* Fall through. */
22854 default: return ARM_NV;
22857 case E_CC_SWPmode:
22858 switch (comp_code)
22860 case NE: return ARM_NE;
22861 case EQ: return ARM_EQ;
22862 case GE: return ARM_LE;
22863 case GT: return ARM_LT;
22864 case LE: return ARM_GE;
22865 case LT: return ARM_GT;
22866 case GEU: return ARM_LS;
22867 case GTU: return ARM_CC;
22868 case LEU: return ARM_CS;
22869 case LTU: return ARM_HI;
22870 default: return ARM_NV;
22873 case E_CC_Cmode:
22874 switch (comp_code)
22876 case LTU: return ARM_CS;
22877 case GEU: return ARM_CC;
22878 case NE: return ARM_CS;
22879 case EQ: return ARM_CC;
22880 default: return ARM_NV;
22883 case E_CC_CZmode:
22884 switch (comp_code)
22886 case NE: return ARM_NE;
22887 case EQ: return ARM_EQ;
22888 case GEU: return ARM_CS;
22889 case GTU: return ARM_HI;
22890 case LEU: return ARM_LS;
22891 case LTU: return ARM_CC;
22892 default: return ARM_NV;
22895 case E_CC_NCVmode:
22896 switch (comp_code)
22898 case GE: return ARM_GE;
22899 case LT: return ARM_LT;
22900 case GEU: return ARM_CS;
22901 case LTU: return ARM_CC;
22902 default: return ARM_NV;
22905 case E_CC_Vmode:
22906 switch (comp_code)
22908 case NE: return ARM_VS;
22909 case EQ: return ARM_VC;
22910 default: return ARM_NV;
22913 case E_CCmode:
22914 switch (comp_code)
22916 case NE: return ARM_NE;
22917 case EQ: return ARM_EQ;
22918 case GE: return ARM_GE;
22919 case GT: return ARM_GT;
22920 case LE: return ARM_LE;
22921 case LT: return ARM_LT;
22922 case GEU: return ARM_CS;
22923 case GTU: return ARM_HI;
22924 case LEU: return ARM_LS;
22925 case LTU: return ARM_CC;
22926 default: return ARM_NV;
22929 default: gcc_unreachable ();
22933 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22934 static enum arm_cond_code
22935 get_arm_condition_code (rtx comparison)
22937 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
22938 gcc_assert (code != ARM_NV);
22939 return code;
22942 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. We only have condition
22943 code registers when not targetting Thumb1. The VFP condition register
22944 only exists when generating hard-float code. */
22945 static bool
22946 arm_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
22948 if (!TARGET_32BIT)
22949 return false;
22951 *p1 = CC_REGNUM;
22952 *p2 = TARGET_HARD_FLOAT ? VFPCC_REGNUM : INVALID_REGNUM;
22953 return true;
22956 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22957 instructions. */
22958 void
22959 thumb2_final_prescan_insn (rtx_insn *insn)
22961 rtx_insn *first_insn = insn;
22962 rtx body = PATTERN (insn);
22963 rtx predicate;
22964 enum arm_cond_code code;
22965 int n;
22966 int mask;
22967 int max;
22969 /* max_insns_skipped in the tune was already taken into account in the
22970 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
22971 just emit the IT blocks as we can. It does not make sense to split
22972 the IT blocks. */
22973 max = MAX_INSN_PER_IT_BLOCK;
22975 /* Remove the previous insn from the count of insns to be output. */
22976 if (arm_condexec_count)
22977 arm_condexec_count--;
22979 /* Nothing to do if we are already inside a conditional block. */
22980 if (arm_condexec_count)
22981 return;
22983 if (GET_CODE (body) != COND_EXEC)
22984 return;
22986 /* Conditional jumps are implemented directly. */
22987 if (JUMP_P (insn))
22988 return;
22990 predicate = COND_EXEC_TEST (body);
22991 arm_current_cc = get_arm_condition_code (predicate);
22993 n = get_attr_ce_count (insn);
22994 arm_condexec_count = 1;
22995 arm_condexec_mask = (1 << n) - 1;
22996 arm_condexec_masklen = n;
22997 /* See if subsequent instructions can be combined into the same block. */
22998 for (;;)
23000 insn = next_nonnote_insn (insn);
23002 /* Jumping into the middle of an IT block is illegal, so a label or
23003 barrier terminates the block. */
23004 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
23005 break;
23007 body = PATTERN (insn);
23008 /* USE and CLOBBER aren't really insns, so just skip them. */
23009 if (GET_CODE (body) == USE
23010 || GET_CODE (body) == CLOBBER)
23011 continue;
23013 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
23014 if (GET_CODE (body) != COND_EXEC)
23015 break;
23016 /* Maximum number of conditionally executed instructions in a block. */
23017 n = get_attr_ce_count (insn);
23018 if (arm_condexec_masklen + n > max)
23019 break;
23021 predicate = COND_EXEC_TEST (body);
23022 code = get_arm_condition_code (predicate);
23023 mask = (1 << n) - 1;
23024 if (arm_current_cc == code)
23025 arm_condexec_mask |= (mask << arm_condexec_masklen);
23026 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
23027 break;
23029 arm_condexec_count++;
23030 arm_condexec_masklen += n;
23032 /* A jump must be the last instruction in a conditional block. */
23033 if (JUMP_P (insn))
23034 break;
23036 /* Restore recog_data (getting the attributes of other insns can
23037 destroy this array, but final.c assumes that it remains intact
23038 across this call). */
23039 extract_constrain_insn_cached (first_insn);
23042 void
23043 arm_final_prescan_insn (rtx_insn *insn)
23045 /* BODY will hold the body of INSN. */
23046 rtx body = PATTERN (insn);
23048 /* This will be 1 if trying to repeat the trick, and things need to be
23049 reversed if it appears to fail. */
23050 int reverse = 0;
23052 /* If we start with a return insn, we only succeed if we find another one. */
23053 int seeking_return = 0;
23054 enum rtx_code return_code = UNKNOWN;
23056 /* START_INSN will hold the insn from where we start looking. This is the
23057 first insn after the following code_label if REVERSE is true. */
23058 rtx_insn *start_insn = insn;
23060 /* If in state 4, check if the target branch is reached, in order to
23061 change back to state 0. */
23062 if (arm_ccfsm_state == 4)
23064 if (insn == arm_target_insn)
23066 arm_target_insn = NULL;
23067 arm_ccfsm_state = 0;
23069 return;
23072 /* If in state 3, it is possible to repeat the trick, if this insn is an
23073 unconditional branch to a label, and immediately following this branch
23074 is the previous target label which is only used once, and the label this
23075 branch jumps to is not too far off. */
23076 if (arm_ccfsm_state == 3)
23078 if (simplejump_p (insn))
23080 start_insn = next_nonnote_insn (start_insn);
23081 if (BARRIER_P (start_insn))
23083 /* XXX Isn't this always a barrier? */
23084 start_insn = next_nonnote_insn (start_insn);
23086 if (LABEL_P (start_insn)
23087 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23088 && LABEL_NUSES (start_insn) == 1)
23089 reverse = TRUE;
23090 else
23091 return;
23093 else if (ANY_RETURN_P (body))
23095 start_insn = next_nonnote_insn (start_insn);
23096 if (BARRIER_P (start_insn))
23097 start_insn = next_nonnote_insn (start_insn);
23098 if (LABEL_P (start_insn)
23099 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23100 && LABEL_NUSES (start_insn) == 1)
23102 reverse = TRUE;
23103 seeking_return = 1;
23104 return_code = GET_CODE (body);
23106 else
23107 return;
23109 else
23110 return;
23113 gcc_assert (!arm_ccfsm_state || reverse);
23114 if (!JUMP_P (insn))
23115 return;
23117 /* This jump might be paralleled with a clobber of the condition codes
23118 the jump should always come first */
23119 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
23120 body = XVECEXP (body, 0, 0);
23122 if (reverse
23123 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
23124 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
23126 int insns_skipped;
23127 int fail = FALSE, succeed = FALSE;
23128 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
23129 int then_not_else = TRUE;
23130 rtx_insn *this_insn = start_insn;
23131 rtx label = 0;
23133 /* Register the insn jumped to. */
23134 if (reverse)
23136 if (!seeking_return)
23137 label = XEXP (SET_SRC (body), 0);
23139 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
23140 label = XEXP (XEXP (SET_SRC (body), 1), 0);
23141 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
23143 label = XEXP (XEXP (SET_SRC (body), 2), 0);
23144 then_not_else = FALSE;
23146 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
23148 seeking_return = 1;
23149 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
23151 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
23153 seeking_return = 1;
23154 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
23155 then_not_else = FALSE;
23157 else
23158 gcc_unreachable ();
23160 /* See how many insns this branch skips, and what kind of insns. If all
23161 insns are okay, and the label or unconditional branch to the same
23162 label is not too far away, succeed. */
23163 for (insns_skipped = 0;
23164 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
23166 rtx scanbody;
23168 this_insn = next_nonnote_insn (this_insn);
23169 if (!this_insn)
23170 break;
23172 switch (GET_CODE (this_insn))
23174 case CODE_LABEL:
23175 /* Succeed if it is the target label, otherwise fail since
23176 control falls in from somewhere else. */
23177 if (this_insn == label)
23179 arm_ccfsm_state = 1;
23180 succeed = TRUE;
23182 else
23183 fail = TRUE;
23184 break;
23186 case BARRIER:
23187 /* Succeed if the following insn is the target label.
23188 Otherwise fail.
23189 If return insns are used then the last insn in a function
23190 will be a barrier. */
23191 this_insn = next_nonnote_insn (this_insn);
23192 if (this_insn && this_insn == label)
23194 arm_ccfsm_state = 1;
23195 succeed = TRUE;
23197 else
23198 fail = TRUE;
23199 break;
23201 case CALL_INSN:
23202 /* The AAPCS says that conditional calls should not be
23203 used since they make interworking inefficient (the
23204 linker can't transform BL<cond> into BLX). That's
23205 only a problem if the machine has BLX. */
23206 if (arm_arch5)
23208 fail = TRUE;
23209 break;
23212 /* Succeed if the following insn is the target label, or
23213 if the following two insns are a barrier and the
23214 target label. */
23215 this_insn = next_nonnote_insn (this_insn);
23216 if (this_insn && BARRIER_P (this_insn))
23217 this_insn = next_nonnote_insn (this_insn);
23219 if (this_insn && this_insn == label
23220 && insns_skipped < max_insns_skipped)
23222 arm_ccfsm_state = 1;
23223 succeed = TRUE;
23225 else
23226 fail = TRUE;
23227 break;
23229 case JUMP_INSN:
23230 /* If this is an unconditional branch to the same label, succeed.
23231 If it is to another label, do nothing. If it is conditional,
23232 fail. */
23233 /* XXX Probably, the tests for SET and the PC are
23234 unnecessary. */
23236 scanbody = PATTERN (this_insn);
23237 if (GET_CODE (scanbody) == SET
23238 && GET_CODE (SET_DEST (scanbody)) == PC)
23240 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
23241 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
23243 arm_ccfsm_state = 2;
23244 succeed = TRUE;
23246 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
23247 fail = TRUE;
23249 /* Fail if a conditional return is undesirable (e.g. on a
23250 StrongARM), but still allow this if optimizing for size. */
23251 else if (GET_CODE (scanbody) == return_code
23252 && !use_return_insn (TRUE, NULL)
23253 && !optimize_size)
23254 fail = TRUE;
23255 else if (GET_CODE (scanbody) == return_code)
23257 arm_ccfsm_state = 2;
23258 succeed = TRUE;
23260 else if (GET_CODE (scanbody) == PARALLEL)
23262 switch (get_attr_conds (this_insn))
23264 case CONDS_NOCOND:
23265 break;
23266 default:
23267 fail = TRUE;
23268 break;
23271 else
23272 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
23274 break;
23276 case INSN:
23277 /* Instructions using or affecting the condition codes make it
23278 fail. */
23279 scanbody = PATTERN (this_insn);
23280 if (!(GET_CODE (scanbody) == SET
23281 || GET_CODE (scanbody) == PARALLEL)
23282 || get_attr_conds (this_insn) != CONDS_NOCOND)
23283 fail = TRUE;
23284 break;
23286 default:
23287 break;
23290 if (succeed)
23292 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
23293 arm_target_label = CODE_LABEL_NUMBER (label);
23294 else
23296 gcc_assert (seeking_return || arm_ccfsm_state == 2);
23298 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
23300 this_insn = next_nonnote_insn (this_insn);
23301 gcc_assert (!this_insn
23302 || (!BARRIER_P (this_insn)
23303 && !LABEL_P (this_insn)));
23305 if (!this_insn)
23307 /* Oh, dear! we ran off the end.. give up. */
23308 extract_constrain_insn_cached (insn);
23309 arm_ccfsm_state = 0;
23310 arm_target_insn = NULL;
23311 return;
23313 arm_target_insn = this_insn;
23316 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23317 what it was. */
23318 if (!reverse)
23319 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
23321 if (reverse || then_not_else)
23322 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
23325 /* Restore recog_data (getting the attributes of other insns can
23326 destroy this array, but final.c assumes that it remains intact
23327 across this call. */
23328 extract_constrain_insn_cached (insn);
23332 /* Output IT instructions. */
23333 void
23334 thumb2_asm_output_opcode (FILE * stream)
23336 char buff[5];
23337 int n;
23339 if (arm_condexec_mask)
23341 for (n = 0; n < arm_condexec_masklen; n++)
23342 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
23343 buff[n] = 0;
23344 asm_fprintf(stream, "i%s\t%s\n\t", buff,
23345 arm_condition_codes[arm_current_cc]);
23346 arm_condexec_mask = 0;
23350 /* Implement TARGET_HARD_REGNO_MODE_OK. */
23351 static bool
23352 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
23354 if (GET_MODE_CLASS (mode) == MODE_CC)
23355 return (regno == CC_REGNUM
23356 || (TARGET_HARD_FLOAT
23357 && regno == VFPCC_REGNUM));
23359 if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
23360 return false;
23362 if (TARGET_THUMB1)
23363 /* For the Thumb we only allow values bigger than SImode in
23364 registers 0 - 6, so that there is always a second low
23365 register available to hold the upper part of the value.
23366 We probably we ought to ensure that the register is the
23367 start of an even numbered register pair. */
23368 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
23370 if (TARGET_HARD_FLOAT && IS_VFP_REGNUM (regno))
23372 if (mode == SFmode || mode == SImode)
23373 return VFP_REGNO_OK_FOR_SINGLE (regno);
23375 if (mode == DFmode)
23376 return VFP_REGNO_OK_FOR_DOUBLE (regno);
23378 if (mode == HFmode)
23379 return VFP_REGNO_OK_FOR_SINGLE (regno);
23381 /* VFP registers can hold HImode values. */
23382 if (mode == HImode)
23383 return VFP_REGNO_OK_FOR_SINGLE (regno);
23385 if (TARGET_NEON)
23386 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
23387 || (VALID_NEON_QREG_MODE (mode)
23388 && NEON_REGNO_OK_FOR_QUAD (regno))
23389 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
23390 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
23391 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
23392 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
23393 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23395 return false;
23398 if (TARGET_REALLY_IWMMXT)
23400 if (IS_IWMMXT_GR_REGNUM (regno))
23401 return mode == SImode;
23403 if (IS_IWMMXT_REGNUM (regno))
23404 return VALID_IWMMXT_REG_MODE (mode);
23407 /* We allow almost any value to be stored in the general registers.
23408 Restrict doubleword quantities to even register pairs in ARM state
23409 so that we can use ldrd. Do not allow very large Neon structure
23410 opaque modes in general registers; they would use too many. */
23411 if (regno <= LAST_ARM_REGNUM)
23413 if (ARM_NUM_REGS (mode) > 4)
23414 return false;
23416 if (TARGET_THUMB2)
23417 return true;
23419 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23422 if (regno == FRAME_POINTER_REGNUM
23423 || regno == ARG_POINTER_REGNUM)
23424 /* We only allow integers in the fake hard registers. */
23425 return GET_MODE_CLASS (mode) == MODE_INT;
23427 return false;
23430 /* Implement MODES_TIEABLE_P. */
23432 bool
23433 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
23435 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23436 return true;
23438 /* We specifically want to allow elements of "structure" modes to
23439 be tieable to the structure. This more general condition allows
23440 other rarer situations too. */
23441 if (TARGET_NEON
23442 && (VALID_NEON_DREG_MODE (mode1)
23443 || VALID_NEON_QREG_MODE (mode1)
23444 || VALID_NEON_STRUCT_MODE (mode1))
23445 && (VALID_NEON_DREG_MODE (mode2)
23446 || VALID_NEON_QREG_MODE (mode2)
23447 || VALID_NEON_STRUCT_MODE (mode2)))
23448 return true;
23450 return false;
23453 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23454 not used in arm mode. */
23456 enum reg_class
23457 arm_regno_class (int regno)
23459 if (regno == PC_REGNUM)
23460 return NO_REGS;
23462 if (TARGET_THUMB1)
23464 if (regno == STACK_POINTER_REGNUM)
23465 return STACK_REG;
23466 if (regno == CC_REGNUM)
23467 return CC_REG;
23468 if (regno < 8)
23469 return LO_REGS;
23470 return HI_REGS;
23473 if (TARGET_THUMB2 && regno < 8)
23474 return LO_REGS;
23476 if ( regno <= LAST_ARM_REGNUM
23477 || regno == FRAME_POINTER_REGNUM
23478 || regno == ARG_POINTER_REGNUM)
23479 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23481 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23482 return TARGET_THUMB2 ? CC_REG : NO_REGS;
23484 if (IS_VFP_REGNUM (regno))
23486 if (regno <= D7_VFP_REGNUM)
23487 return VFP_D0_D7_REGS;
23488 else if (regno <= LAST_LO_VFP_REGNUM)
23489 return VFP_LO_REGS;
23490 else
23491 return VFP_HI_REGS;
23494 if (IS_IWMMXT_REGNUM (regno))
23495 return IWMMXT_REGS;
23497 if (IS_IWMMXT_GR_REGNUM (regno))
23498 return IWMMXT_GR_REGS;
23500 return NO_REGS;
23503 /* Handle a special case when computing the offset
23504 of an argument from the frame pointer. */
23506 arm_debugger_arg_offset (int value, rtx addr)
23508 rtx_insn *insn;
23510 /* We are only interested if dbxout_parms() failed to compute the offset. */
23511 if (value != 0)
23512 return 0;
23514 /* We can only cope with the case where the address is held in a register. */
23515 if (!REG_P (addr))
23516 return 0;
23518 /* If we are using the frame pointer to point at the argument, then
23519 an offset of 0 is correct. */
23520 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23521 return 0;
23523 /* If we are using the stack pointer to point at the
23524 argument, then an offset of 0 is correct. */
23525 /* ??? Check this is consistent with thumb2 frame layout. */
23526 if ((TARGET_THUMB || !frame_pointer_needed)
23527 && REGNO (addr) == SP_REGNUM)
23528 return 0;
23530 /* Oh dear. The argument is pointed to by a register rather
23531 than being held in a register, or being stored at a known
23532 offset from the frame pointer. Since GDB only understands
23533 those two kinds of argument we must translate the address
23534 held in the register into an offset from the frame pointer.
23535 We do this by searching through the insns for the function
23536 looking to see where this register gets its value. If the
23537 register is initialized from the frame pointer plus an offset
23538 then we are in luck and we can continue, otherwise we give up.
23540 This code is exercised by producing debugging information
23541 for a function with arguments like this:
23543 double func (double a, double b, int c, double d) {return d;}
23545 Without this code the stab for parameter 'd' will be set to
23546 an offset of 0 from the frame pointer, rather than 8. */
23548 /* The if() statement says:
23550 If the insn is a normal instruction
23551 and if the insn is setting the value in a register
23552 and if the register being set is the register holding the address of the argument
23553 and if the address is computing by an addition
23554 that involves adding to a register
23555 which is the frame pointer
23556 a constant integer
23558 then... */
23560 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23562 if ( NONJUMP_INSN_P (insn)
23563 && GET_CODE (PATTERN (insn)) == SET
23564 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23565 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23566 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23567 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23568 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23571 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23573 break;
23577 if (value == 0)
23579 debug_rtx (addr);
23580 warning (0, "unable to compute real location of stacked parameter");
23581 value = 8; /* XXX magic hack */
23584 return value;
23587 /* Implement TARGET_PROMOTED_TYPE. */
23589 static tree
23590 arm_promoted_type (const_tree t)
23592 if (SCALAR_FLOAT_TYPE_P (t)
23593 && TYPE_PRECISION (t) == 16
23594 && TYPE_MAIN_VARIANT (t) == arm_fp16_type_node)
23595 return float_type_node;
23596 return NULL_TREE;
23599 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23600 This simply adds HFmode as a supported mode; even though we don't
23601 implement arithmetic on this type directly, it's supported by
23602 optabs conversions, much the way the double-word arithmetic is
23603 special-cased in the default hook. */
23605 static bool
23606 arm_scalar_mode_supported_p (scalar_mode mode)
23608 if (mode == HFmode)
23609 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
23610 else if (ALL_FIXED_POINT_MODE_P (mode))
23611 return true;
23612 else
23613 return default_scalar_mode_supported_p (mode);
23616 /* Set the value of FLT_EVAL_METHOD.
23617 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
23619 0: evaluate all operations and constants, whose semantic type has at
23620 most the range and precision of type float, to the range and
23621 precision of float; evaluate all other operations and constants to
23622 the range and precision of the semantic type;
23624 N, where _FloatN is a supported interchange floating type
23625 evaluate all operations and constants, whose semantic type has at
23626 most the range and precision of _FloatN type, to the range and
23627 precision of the _FloatN type; evaluate all other operations and
23628 constants to the range and precision of the semantic type;
23630 If we have the ARMv8.2-A extensions then we support _Float16 in native
23631 precision, so we should set this to 16. Otherwise, we support the type,
23632 but want to evaluate expressions in float precision, so set this to
23633 0. */
23635 static enum flt_eval_method
23636 arm_excess_precision (enum excess_precision_type type)
23638 switch (type)
23640 case EXCESS_PRECISION_TYPE_FAST:
23641 case EXCESS_PRECISION_TYPE_STANDARD:
23642 /* We can calculate either in 16-bit range and precision or
23643 32-bit range and precision. Make that decision based on whether
23644 we have native support for the ARMv8.2-A 16-bit floating-point
23645 instructions or not. */
23646 return (TARGET_VFP_FP16INST
23647 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
23648 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
23649 case EXCESS_PRECISION_TYPE_IMPLICIT:
23650 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
23651 default:
23652 gcc_unreachable ();
23654 return FLT_EVAL_METHOD_UNPREDICTABLE;
23658 /* Implement TARGET_FLOATN_MODE. Make very sure that we don't provide
23659 _Float16 if we are using anything other than ieee format for 16-bit
23660 floating point. Otherwise, punt to the default implementation. */
23661 static opt_scalar_float_mode
23662 arm_floatn_mode (int n, bool extended)
23664 if (!extended && n == 16)
23666 if (arm_fp16_format == ARM_FP16_FORMAT_IEEE)
23667 return HFmode;
23668 return opt_scalar_float_mode ();
23671 return default_floatn_mode (n, extended);
23675 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23676 not to early-clobber SRC registers in the process.
23678 We assume that the operands described by SRC and DEST represent a
23679 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
23680 number of components into which the copy has been decomposed. */
23681 void
23682 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
23684 unsigned int i;
23686 if (!reg_overlap_mentioned_p (operands[0], operands[1])
23687 || REGNO (operands[0]) < REGNO (operands[1]))
23689 for (i = 0; i < count; i++)
23691 operands[2 * i] = dest[i];
23692 operands[2 * i + 1] = src[i];
23695 else
23697 for (i = 0; i < count; i++)
23699 operands[2 * i] = dest[count - i - 1];
23700 operands[2 * i + 1] = src[count - i - 1];
23705 /* Split operands into moves from op[1] + op[2] into op[0]. */
23707 void
23708 neon_split_vcombine (rtx operands[3])
23710 unsigned int dest = REGNO (operands[0]);
23711 unsigned int src1 = REGNO (operands[1]);
23712 unsigned int src2 = REGNO (operands[2]);
23713 machine_mode halfmode = GET_MODE (operands[1]);
23714 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
23715 rtx destlo, desthi;
23717 if (src1 == dest && src2 == dest + halfregs)
23719 /* No-op move. Can't split to nothing; emit something. */
23720 emit_note (NOTE_INSN_DELETED);
23721 return;
23724 /* Preserve register attributes for variable tracking. */
23725 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
23726 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
23727 GET_MODE_SIZE (halfmode));
23729 /* Special case of reversed high/low parts. Use VSWP. */
23730 if (src2 == dest && src1 == dest + halfregs)
23732 rtx x = gen_rtx_SET (destlo, operands[1]);
23733 rtx y = gen_rtx_SET (desthi, operands[2]);
23734 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
23735 return;
23738 if (!reg_overlap_mentioned_p (operands[2], destlo))
23740 /* Try to avoid unnecessary moves if part of the result
23741 is in the right place already. */
23742 if (src1 != dest)
23743 emit_move_insn (destlo, operands[1]);
23744 if (src2 != dest + halfregs)
23745 emit_move_insn (desthi, operands[2]);
23747 else
23749 if (src2 != dest + halfregs)
23750 emit_move_insn (desthi, operands[2]);
23751 if (src1 != dest)
23752 emit_move_insn (destlo, operands[1]);
23756 /* Return the number (counting from 0) of
23757 the least significant set bit in MASK. */
23759 inline static int
23760 number_of_first_bit_set (unsigned mask)
23762 return ctz_hwi (mask);
23765 /* Like emit_multi_reg_push, but allowing for a different set of
23766 registers to be described as saved. MASK is the set of registers
23767 to be saved; REAL_REGS is the set of registers to be described as
23768 saved. If REAL_REGS is 0, only describe the stack adjustment. */
23770 static rtx_insn *
23771 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
23773 unsigned long regno;
23774 rtx par[10], tmp, reg;
23775 rtx_insn *insn;
23776 int i, j;
23778 /* Build the parallel of the registers actually being stored. */
23779 for (i = 0; mask; ++i, mask &= mask - 1)
23781 regno = ctz_hwi (mask);
23782 reg = gen_rtx_REG (SImode, regno);
23784 if (i == 0)
23785 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
23786 else
23787 tmp = gen_rtx_USE (VOIDmode, reg);
23789 par[i] = tmp;
23792 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23793 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
23794 tmp = gen_frame_mem (BLKmode, tmp);
23795 tmp = gen_rtx_SET (tmp, par[0]);
23796 par[0] = tmp;
23798 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
23799 insn = emit_insn (tmp);
23801 /* Always build the stack adjustment note for unwind info. */
23802 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23803 tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
23804 par[0] = tmp;
23806 /* Build the parallel of the registers recorded as saved for unwind. */
23807 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
23809 regno = ctz_hwi (real_regs);
23810 reg = gen_rtx_REG (SImode, regno);
23812 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
23813 tmp = gen_frame_mem (SImode, tmp);
23814 tmp = gen_rtx_SET (tmp, reg);
23815 RTX_FRAME_RELATED_P (tmp) = 1;
23816 par[j + 1] = tmp;
23819 if (j == 0)
23820 tmp = par[0];
23821 else
23823 RTX_FRAME_RELATED_P (par[0]) = 1;
23824 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
23827 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
23829 return insn;
23832 /* Emit code to push or pop registers to or from the stack. F is the
23833 assembly file. MASK is the registers to pop. */
23834 static void
23835 thumb_pop (FILE *f, unsigned long mask)
23837 int regno;
23838 int lo_mask = mask & 0xFF;
23840 gcc_assert (mask);
23842 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
23844 /* Special case. Do not generate a POP PC statement here, do it in
23845 thumb_exit() */
23846 thumb_exit (f, -1);
23847 return;
23850 fprintf (f, "\tpop\t{");
23852 /* Look at the low registers first. */
23853 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
23855 if (lo_mask & 1)
23857 asm_fprintf (f, "%r", regno);
23859 if ((lo_mask & ~1) != 0)
23860 fprintf (f, ", ");
23864 if (mask & (1 << PC_REGNUM))
23866 /* Catch popping the PC. */
23867 if (TARGET_INTERWORK || TARGET_BACKTRACE || crtl->calls_eh_return
23868 || IS_CMSE_ENTRY (arm_current_func_type ()))
23870 /* The PC is never poped directly, instead
23871 it is popped into r3 and then BX is used. */
23872 fprintf (f, "}\n");
23874 thumb_exit (f, -1);
23876 return;
23878 else
23880 if (mask & 0xFF)
23881 fprintf (f, ", ");
23883 asm_fprintf (f, "%r", PC_REGNUM);
23887 fprintf (f, "}\n");
23890 /* Generate code to return from a thumb function.
23891 If 'reg_containing_return_addr' is -1, then the return address is
23892 actually on the stack, at the stack pointer. */
23893 static void
23894 thumb_exit (FILE *f, int reg_containing_return_addr)
23896 unsigned regs_available_for_popping;
23897 unsigned regs_to_pop;
23898 int pops_needed;
23899 unsigned available;
23900 unsigned required;
23901 machine_mode mode;
23902 int size;
23903 int restore_a4 = FALSE;
23905 /* Compute the registers we need to pop. */
23906 regs_to_pop = 0;
23907 pops_needed = 0;
23909 if (reg_containing_return_addr == -1)
23911 regs_to_pop |= 1 << LR_REGNUM;
23912 ++pops_needed;
23915 if (TARGET_BACKTRACE)
23917 /* Restore the (ARM) frame pointer and stack pointer. */
23918 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
23919 pops_needed += 2;
23922 /* If there is nothing to pop then just emit the BX instruction and
23923 return. */
23924 if (pops_needed == 0)
23926 if (crtl->calls_eh_return)
23927 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23929 if (IS_CMSE_ENTRY (arm_current_func_type ()))
23931 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n",
23932 reg_containing_return_addr);
23933 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
23935 else
23936 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23937 return;
23939 /* Otherwise if we are not supporting interworking and we have not created
23940 a backtrace structure and the function was not entered in ARM mode then
23941 just pop the return address straight into the PC. */
23942 else if (!TARGET_INTERWORK
23943 && !TARGET_BACKTRACE
23944 && !is_called_in_ARM_mode (current_function_decl)
23945 && !crtl->calls_eh_return
23946 && !IS_CMSE_ENTRY (arm_current_func_type ()))
23948 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
23949 return;
23952 /* Find out how many of the (return) argument registers we can corrupt. */
23953 regs_available_for_popping = 0;
23955 /* If returning via __builtin_eh_return, the bottom three registers
23956 all contain information needed for the return. */
23957 if (crtl->calls_eh_return)
23958 size = 12;
23959 else
23961 /* If we can deduce the registers used from the function's
23962 return value. This is more reliable that examining
23963 df_regs_ever_live_p () because that will be set if the register is
23964 ever used in the function, not just if the register is used
23965 to hold a return value. */
23967 if (crtl->return_rtx != 0)
23968 mode = GET_MODE (crtl->return_rtx);
23969 else
23970 mode = DECL_MODE (DECL_RESULT (current_function_decl));
23972 size = GET_MODE_SIZE (mode);
23974 if (size == 0)
23976 /* In a void function we can use any argument register.
23977 In a function that returns a structure on the stack
23978 we can use the second and third argument registers. */
23979 if (mode == VOIDmode)
23980 regs_available_for_popping =
23981 (1 << ARG_REGISTER (1))
23982 | (1 << ARG_REGISTER (2))
23983 | (1 << ARG_REGISTER (3));
23984 else
23985 regs_available_for_popping =
23986 (1 << ARG_REGISTER (2))
23987 | (1 << ARG_REGISTER (3));
23989 else if (size <= 4)
23990 regs_available_for_popping =
23991 (1 << ARG_REGISTER (2))
23992 | (1 << ARG_REGISTER (3));
23993 else if (size <= 8)
23994 regs_available_for_popping =
23995 (1 << ARG_REGISTER (3));
23998 /* Match registers to be popped with registers into which we pop them. */
23999 for (available = regs_available_for_popping,
24000 required = regs_to_pop;
24001 required != 0 && available != 0;
24002 available &= ~(available & - available),
24003 required &= ~(required & - required))
24004 -- pops_needed;
24006 /* If we have any popping registers left over, remove them. */
24007 if (available > 0)
24008 regs_available_for_popping &= ~available;
24010 /* Otherwise if we need another popping register we can use
24011 the fourth argument register. */
24012 else if (pops_needed)
24014 /* If we have not found any free argument registers and
24015 reg a4 contains the return address, we must move it. */
24016 if (regs_available_for_popping == 0
24017 && reg_containing_return_addr == LAST_ARG_REGNUM)
24019 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24020 reg_containing_return_addr = LR_REGNUM;
24022 else if (size > 12)
24024 /* Register a4 is being used to hold part of the return value,
24025 but we have dire need of a free, low register. */
24026 restore_a4 = TRUE;
24028 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
24031 if (reg_containing_return_addr != LAST_ARG_REGNUM)
24033 /* The fourth argument register is available. */
24034 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
24036 --pops_needed;
24040 /* Pop as many registers as we can. */
24041 thumb_pop (f, regs_available_for_popping);
24043 /* Process the registers we popped. */
24044 if (reg_containing_return_addr == -1)
24046 /* The return address was popped into the lowest numbered register. */
24047 regs_to_pop &= ~(1 << LR_REGNUM);
24049 reg_containing_return_addr =
24050 number_of_first_bit_set (regs_available_for_popping);
24052 /* Remove this register for the mask of available registers, so that
24053 the return address will not be corrupted by further pops. */
24054 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
24057 /* If we popped other registers then handle them here. */
24058 if (regs_available_for_popping)
24060 int frame_pointer;
24062 /* Work out which register currently contains the frame pointer. */
24063 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
24065 /* Move it into the correct place. */
24066 asm_fprintf (f, "\tmov\t%r, %r\n",
24067 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
24069 /* (Temporarily) remove it from the mask of popped registers. */
24070 regs_available_for_popping &= ~(1 << frame_pointer);
24071 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
24073 if (regs_available_for_popping)
24075 int stack_pointer;
24077 /* We popped the stack pointer as well,
24078 find the register that contains it. */
24079 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
24081 /* Move it into the stack register. */
24082 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
24084 /* At this point we have popped all necessary registers, so
24085 do not worry about restoring regs_available_for_popping
24086 to its correct value:
24088 assert (pops_needed == 0)
24089 assert (regs_available_for_popping == (1 << frame_pointer))
24090 assert (regs_to_pop == (1 << STACK_POINTER)) */
24092 else
24094 /* Since we have just move the popped value into the frame
24095 pointer, the popping register is available for reuse, and
24096 we know that we still have the stack pointer left to pop. */
24097 regs_available_for_popping |= (1 << frame_pointer);
24101 /* If we still have registers left on the stack, but we no longer have
24102 any registers into which we can pop them, then we must move the return
24103 address into the link register and make available the register that
24104 contained it. */
24105 if (regs_available_for_popping == 0 && pops_needed > 0)
24107 regs_available_for_popping |= 1 << reg_containing_return_addr;
24109 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
24110 reg_containing_return_addr);
24112 reg_containing_return_addr = LR_REGNUM;
24115 /* If we have registers left on the stack then pop some more.
24116 We know that at most we will want to pop FP and SP. */
24117 if (pops_needed > 0)
24119 int popped_into;
24120 int move_to;
24122 thumb_pop (f, regs_available_for_popping);
24124 /* We have popped either FP or SP.
24125 Move whichever one it is into the correct register. */
24126 popped_into = number_of_first_bit_set (regs_available_for_popping);
24127 move_to = number_of_first_bit_set (regs_to_pop);
24129 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
24130 --pops_needed;
24133 /* If we still have not popped everything then we must have only
24134 had one register available to us and we are now popping the SP. */
24135 if (pops_needed > 0)
24137 int popped_into;
24139 thumb_pop (f, regs_available_for_popping);
24141 popped_into = number_of_first_bit_set (regs_available_for_popping);
24143 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
24145 assert (regs_to_pop == (1 << STACK_POINTER))
24146 assert (pops_needed == 1)
24150 /* If necessary restore the a4 register. */
24151 if (restore_a4)
24153 if (reg_containing_return_addr != LR_REGNUM)
24155 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24156 reg_containing_return_addr = LR_REGNUM;
24159 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
24162 if (crtl->calls_eh_return)
24163 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
24165 /* Return to caller. */
24166 if (IS_CMSE_ENTRY (arm_current_func_type ()))
24168 /* This is for the cases where LR is not being used to contain the return
24169 address. It may therefore contain information that we might not want
24170 to leak, hence it must be cleared. The value in R0 will never be a
24171 secret at this point, so it is safe to use it, see the clearing code
24172 in 'cmse_nonsecure_entry_clear_before_return'. */
24173 if (reg_containing_return_addr != LR_REGNUM)
24174 asm_fprintf (f, "\tmov\tlr, r0\n");
24176 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr);
24177 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
24179 else
24180 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
24183 /* Scan INSN just before assembler is output for it.
24184 For Thumb-1, we track the status of the condition codes; this
24185 information is used in the cbranchsi4_insn pattern. */
24186 void
24187 thumb1_final_prescan_insn (rtx_insn *insn)
24189 if (flag_print_asm_name)
24190 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
24191 INSN_ADDRESSES (INSN_UID (insn)));
24192 /* Don't overwrite the previous setter when we get to a cbranch. */
24193 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
24195 enum attr_conds conds;
24197 if (cfun->machine->thumb1_cc_insn)
24199 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
24200 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
24201 CC_STATUS_INIT;
24203 conds = get_attr_conds (insn);
24204 if (conds == CONDS_SET)
24206 rtx set = single_set (insn);
24207 cfun->machine->thumb1_cc_insn = insn;
24208 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
24209 cfun->machine->thumb1_cc_op1 = const0_rtx;
24210 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
24211 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
24213 rtx src1 = XEXP (SET_SRC (set), 1);
24214 if (src1 == const0_rtx)
24215 cfun->machine->thumb1_cc_mode = CCmode;
24217 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
24219 /* Record the src register operand instead of dest because
24220 cprop_hardreg pass propagates src. */
24221 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
24224 else if (conds != CONDS_NOCOND)
24225 cfun->machine->thumb1_cc_insn = NULL_RTX;
24228 /* Check if unexpected far jump is used. */
24229 if (cfun->machine->lr_save_eliminated
24230 && get_attr_far_jump (insn) == FAR_JUMP_YES)
24231 internal_error("Unexpected thumb1 far jump");
24235 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
24237 unsigned HOST_WIDE_INT mask = 0xff;
24238 int i;
24240 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
24241 if (val == 0) /* XXX */
24242 return 0;
24244 for (i = 0; i < 25; i++)
24245 if ((val & (mask << i)) == val)
24246 return 1;
24248 return 0;
24251 /* Returns nonzero if the current function contains,
24252 or might contain a far jump. */
24253 static int
24254 thumb_far_jump_used_p (void)
24256 rtx_insn *insn;
24257 bool far_jump = false;
24258 unsigned int func_size = 0;
24260 /* If we have already decided that far jumps may be used,
24261 do not bother checking again, and always return true even if
24262 it turns out that they are not being used. Once we have made
24263 the decision that far jumps are present (and that hence the link
24264 register will be pushed onto the stack) we cannot go back on it. */
24265 if (cfun->machine->far_jump_used)
24266 return 1;
24268 /* If this function is not being called from the prologue/epilogue
24269 generation code then it must be being called from the
24270 INITIAL_ELIMINATION_OFFSET macro. */
24271 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
24273 /* In this case we know that we are being asked about the elimination
24274 of the arg pointer register. If that register is not being used,
24275 then there are no arguments on the stack, and we do not have to
24276 worry that a far jump might force the prologue to push the link
24277 register, changing the stack offsets. In this case we can just
24278 return false, since the presence of far jumps in the function will
24279 not affect stack offsets.
24281 If the arg pointer is live (or if it was live, but has now been
24282 eliminated and so set to dead) then we do have to test to see if
24283 the function might contain a far jump. This test can lead to some
24284 false negatives, since before reload is completed, then length of
24285 branch instructions is not known, so gcc defaults to returning their
24286 longest length, which in turn sets the far jump attribute to true.
24288 A false negative will not result in bad code being generated, but it
24289 will result in a needless push and pop of the link register. We
24290 hope that this does not occur too often.
24292 If we need doubleword stack alignment this could affect the other
24293 elimination offsets so we can't risk getting it wrong. */
24294 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
24295 cfun->machine->arg_pointer_live = 1;
24296 else if (!cfun->machine->arg_pointer_live)
24297 return 0;
24300 /* We should not change far_jump_used during or after reload, as there is
24301 no chance to change stack frame layout. */
24302 if (reload_in_progress || reload_completed)
24303 return 0;
24305 /* Check to see if the function contains a branch
24306 insn with the far jump attribute set. */
24307 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
24309 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
24311 far_jump = true;
24313 func_size += get_attr_length (insn);
24316 /* Attribute far_jump will always be true for thumb1 before
24317 shorten_branch pass. So checking far_jump attribute before
24318 shorten_branch isn't much useful.
24320 Following heuristic tries to estimate more accurately if a far jump
24321 may finally be used. The heuristic is very conservative as there is
24322 no chance to roll-back the decision of not to use far jump.
24324 Thumb1 long branch offset is -2048 to 2046. The worst case is each
24325 2-byte insn is associated with a 4 byte constant pool. Using
24326 function size 2048/3 as the threshold is conservative enough. */
24327 if (far_jump)
24329 if ((func_size * 3) >= 2048)
24331 /* Record the fact that we have decided that
24332 the function does use far jumps. */
24333 cfun->machine->far_jump_used = 1;
24334 return 1;
24338 return 0;
24341 /* Return nonzero if FUNC must be entered in ARM mode. */
24342 static bool
24343 is_called_in_ARM_mode (tree func)
24345 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
24347 /* Ignore the problem about functions whose address is taken. */
24348 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
24349 return true;
24351 #ifdef ARM_PE
24352 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
24353 #else
24354 return false;
24355 #endif
24358 /* Given the stack offsets and register mask in OFFSETS, decide how
24359 many additional registers to push instead of subtracting a constant
24360 from SP. For epilogues the principle is the same except we use pop.
24361 FOR_PROLOGUE indicates which we're generating. */
24362 static int
24363 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
24365 HOST_WIDE_INT amount;
24366 unsigned long live_regs_mask = offsets->saved_regs_mask;
24367 /* Extract a mask of the ones we can give to the Thumb's push/pop
24368 instruction. */
24369 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
24370 /* Then count how many other high registers will need to be pushed. */
24371 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24372 int n_free, reg_base, size;
24374 if (!for_prologue && frame_pointer_needed)
24375 amount = offsets->locals_base - offsets->saved_regs;
24376 else
24377 amount = offsets->outgoing_args - offsets->saved_regs;
24379 /* If the stack frame size is 512 exactly, we can save one load
24380 instruction, which should make this a win even when optimizing
24381 for speed. */
24382 if (!optimize_size && amount != 512)
24383 return 0;
24385 /* Can't do this if there are high registers to push. */
24386 if (high_regs_pushed != 0)
24387 return 0;
24389 /* Shouldn't do it in the prologue if no registers would normally
24390 be pushed at all. In the epilogue, also allow it if we'll have
24391 a pop insn for the PC. */
24392 if (l_mask == 0
24393 && (for_prologue
24394 || TARGET_BACKTRACE
24395 || (live_regs_mask & 1 << LR_REGNUM) == 0
24396 || TARGET_INTERWORK
24397 || crtl->args.pretend_args_size != 0))
24398 return 0;
24400 /* Don't do this if thumb_expand_prologue wants to emit instructions
24401 between the push and the stack frame allocation. */
24402 if (for_prologue
24403 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
24404 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
24405 return 0;
24407 reg_base = 0;
24408 n_free = 0;
24409 if (!for_prologue)
24411 size = arm_size_return_regs ();
24412 reg_base = ARM_NUM_INTS (size);
24413 live_regs_mask >>= reg_base;
24416 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
24417 && (for_prologue || call_used_regs[reg_base + n_free]))
24419 live_regs_mask >>= 1;
24420 n_free++;
24423 if (n_free == 0)
24424 return 0;
24425 gcc_assert (amount / 4 * 4 == amount);
24427 if (amount >= 512 && (amount - n_free * 4) < 512)
24428 return (amount - 508) / 4;
24429 if (amount <= n_free * 4)
24430 return amount / 4;
24431 return 0;
24434 /* The bits which aren't usefully expanded as rtl. */
24435 const char *
24436 thumb1_unexpanded_epilogue (void)
24438 arm_stack_offsets *offsets;
24439 int regno;
24440 unsigned long live_regs_mask = 0;
24441 int high_regs_pushed = 0;
24442 int extra_pop;
24443 int had_to_push_lr;
24444 int size;
24446 if (cfun->machine->return_used_this_function != 0)
24447 return "";
24449 if (IS_NAKED (arm_current_func_type ()))
24450 return "";
24452 offsets = arm_get_frame_offsets ();
24453 live_regs_mask = offsets->saved_regs_mask;
24454 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24456 /* If we can deduce the registers used from the function's return value.
24457 This is more reliable that examining df_regs_ever_live_p () because that
24458 will be set if the register is ever used in the function, not just if
24459 the register is used to hold a return value. */
24460 size = arm_size_return_regs ();
24462 extra_pop = thumb1_extra_regs_pushed (offsets, false);
24463 if (extra_pop > 0)
24465 unsigned long extra_mask = (1 << extra_pop) - 1;
24466 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
24469 /* The prolog may have pushed some high registers to use as
24470 work registers. e.g. the testsuite file:
24471 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24472 compiles to produce:
24473 push {r4, r5, r6, r7, lr}
24474 mov r7, r9
24475 mov r6, r8
24476 push {r6, r7}
24477 as part of the prolog. We have to undo that pushing here. */
24479 if (high_regs_pushed)
24481 unsigned long mask = live_regs_mask & 0xff;
24482 int next_hi_reg;
24484 /* The available low registers depend on the size of the value we are
24485 returning. */
24486 if (size <= 12)
24487 mask |= 1 << 3;
24488 if (size <= 8)
24489 mask |= 1 << 2;
24491 if (mask == 0)
24492 /* Oh dear! We have no low registers into which we can pop
24493 high registers! */
24494 internal_error
24495 ("no low registers available for popping high registers");
24497 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
24498 if (live_regs_mask & (1 << next_hi_reg))
24499 break;
24501 while (high_regs_pushed)
24503 /* Find lo register(s) into which the high register(s) can
24504 be popped. */
24505 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24507 if (mask & (1 << regno))
24508 high_regs_pushed--;
24509 if (high_regs_pushed == 0)
24510 break;
24513 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
24515 /* Pop the values into the low register(s). */
24516 thumb_pop (asm_out_file, mask);
24518 /* Move the value(s) into the high registers. */
24519 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24521 if (mask & (1 << regno))
24523 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
24524 regno);
24526 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
24527 if (live_regs_mask & (1 << next_hi_reg))
24528 break;
24532 live_regs_mask &= ~0x0f00;
24535 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
24536 live_regs_mask &= 0xff;
24538 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
24540 /* Pop the return address into the PC. */
24541 if (had_to_push_lr)
24542 live_regs_mask |= 1 << PC_REGNUM;
24544 /* Either no argument registers were pushed or a backtrace
24545 structure was created which includes an adjusted stack
24546 pointer, so just pop everything. */
24547 if (live_regs_mask)
24548 thumb_pop (asm_out_file, live_regs_mask);
24550 /* We have either just popped the return address into the
24551 PC or it is was kept in LR for the entire function.
24552 Note that thumb_pop has already called thumb_exit if the
24553 PC was in the list. */
24554 if (!had_to_push_lr)
24555 thumb_exit (asm_out_file, LR_REGNUM);
24557 else
24559 /* Pop everything but the return address. */
24560 if (live_regs_mask)
24561 thumb_pop (asm_out_file, live_regs_mask);
24563 if (had_to_push_lr)
24565 if (size > 12)
24567 /* We have no free low regs, so save one. */
24568 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
24569 LAST_ARG_REGNUM);
24572 /* Get the return address into a temporary register. */
24573 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
24575 if (size > 12)
24577 /* Move the return address to lr. */
24578 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
24579 LAST_ARG_REGNUM);
24580 /* Restore the low register. */
24581 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
24582 IP_REGNUM);
24583 regno = LR_REGNUM;
24585 else
24586 regno = LAST_ARG_REGNUM;
24588 else
24589 regno = LR_REGNUM;
24591 /* Remove the argument registers that were pushed onto the stack. */
24592 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
24593 SP_REGNUM, SP_REGNUM,
24594 crtl->args.pretend_args_size);
24596 thumb_exit (asm_out_file, regno);
24599 return "";
24602 /* Functions to save and restore machine-specific function data. */
24603 static struct machine_function *
24604 arm_init_machine_status (void)
24606 struct machine_function *machine;
24607 machine = ggc_cleared_alloc<machine_function> ();
24609 #if ARM_FT_UNKNOWN != 0
24610 machine->func_type = ARM_FT_UNKNOWN;
24611 #endif
24612 return machine;
24615 /* Return an RTX indicating where the return address to the
24616 calling function can be found. */
24618 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
24620 if (count != 0)
24621 return NULL_RTX;
24623 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
24626 /* Do anything needed before RTL is emitted for each function. */
24627 void
24628 arm_init_expanders (void)
24630 /* Arrange to initialize and mark the machine per-function status. */
24631 init_machine_status = arm_init_machine_status;
24633 /* This is to stop the combine pass optimizing away the alignment
24634 adjustment of va_arg. */
24635 /* ??? It is claimed that this should not be necessary. */
24636 if (cfun)
24637 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
24640 /* Check that FUNC is called with a different mode. */
24642 bool
24643 arm_change_mode_p (tree func)
24645 if (TREE_CODE (func) != FUNCTION_DECL)
24646 return false;
24648 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
24650 if (!callee_tree)
24651 callee_tree = target_option_default_node;
24653 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
24654 int flags = callee_opts->x_target_flags;
24656 return (TARGET_THUMB_P (flags) != TARGET_THUMB);
24659 /* Like arm_compute_initial_elimination offset. Simpler because there
24660 isn't an ABI specified frame pointer for Thumb. Instead, we set it
24661 to point at the base of the local variables after static stack
24662 space for a function has been allocated. */
24664 HOST_WIDE_INT
24665 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
24667 arm_stack_offsets *offsets;
24669 offsets = arm_get_frame_offsets ();
24671 switch (from)
24673 case ARG_POINTER_REGNUM:
24674 switch (to)
24676 case STACK_POINTER_REGNUM:
24677 return offsets->outgoing_args - offsets->saved_args;
24679 case FRAME_POINTER_REGNUM:
24680 return offsets->soft_frame - offsets->saved_args;
24682 case ARM_HARD_FRAME_POINTER_REGNUM:
24683 return offsets->saved_regs - offsets->saved_args;
24685 case THUMB_HARD_FRAME_POINTER_REGNUM:
24686 return offsets->locals_base - offsets->saved_args;
24688 default:
24689 gcc_unreachable ();
24691 break;
24693 case FRAME_POINTER_REGNUM:
24694 switch (to)
24696 case STACK_POINTER_REGNUM:
24697 return offsets->outgoing_args - offsets->soft_frame;
24699 case ARM_HARD_FRAME_POINTER_REGNUM:
24700 return offsets->saved_regs - offsets->soft_frame;
24702 case THUMB_HARD_FRAME_POINTER_REGNUM:
24703 return offsets->locals_base - offsets->soft_frame;
24705 default:
24706 gcc_unreachable ();
24708 break;
24710 default:
24711 gcc_unreachable ();
24715 /* Generate the function's prologue. */
24717 void
24718 thumb1_expand_prologue (void)
24720 rtx_insn *insn;
24722 HOST_WIDE_INT amount;
24723 HOST_WIDE_INT size;
24724 arm_stack_offsets *offsets;
24725 unsigned long func_type;
24726 int regno;
24727 unsigned long live_regs_mask;
24728 unsigned long l_mask;
24729 unsigned high_regs_pushed = 0;
24730 bool lr_needs_saving;
24732 func_type = arm_current_func_type ();
24734 /* Naked functions don't have prologues. */
24735 if (IS_NAKED (func_type))
24737 if (flag_stack_usage_info)
24738 current_function_static_stack_size = 0;
24739 return;
24742 if (IS_INTERRUPT (func_type))
24744 error ("interrupt Service Routines cannot be coded in Thumb mode");
24745 return;
24748 if (is_called_in_ARM_mode (current_function_decl))
24749 emit_insn (gen_prologue_thumb1_interwork ());
24751 offsets = arm_get_frame_offsets ();
24752 live_regs_mask = offsets->saved_regs_mask;
24753 lr_needs_saving = live_regs_mask & (1 << LR_REGNUM);
24755 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
24756 l_mask = live_regs_mask & 0x40ff;
24757 /* Then count how many other high registers will need to be pushed. */
24758 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24760 if (crtl->args.pretend_args_size)
24762 rtx x = GEN_INT (-crtl->args.pretend_args_size);
24764 if (cfun->machine->uses_anonymous_args)
24766 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
24767 unsigned long mask;
24769 mask = 1ul << (LAST_ARG_REGNUM + 1);
24770 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
24772 insn = thumb1_emit_multi_reg_push (mask, 0);
24774 else
24776 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24777 stack_pointer_rtx, x));
24779 RTX_FRAME_RELATED_P (insn) = 1;
24782 if (TARGET_BACKTRACE)
24784 HOST_WIDE_INT offset = 0;
24785 unsigned work_register;
24786 rtx work_reg, x, arm_hfp_rtx;
24788 /* We have been asked to create a stack backtrace structure.
24789 The code looks like this:
24791 0 .align 2
24792 0 func:
24793 0 sub SP, #16 Reserve space for 4 registers.
24794 2 push {R7} Push low registers.
24795 4 add R7, SP, #20 Get the stack pointer before the push.
24796 6 str R7, [SP, #8] Store the stack pointer
24797 (before reserving the space).
24798 8 mov R7, PC Get hold of the start of this code + 12.
24799 10 str R7, [SP, #16] Store it.
24800 12 mov R7, FP Get hold of the current frame pointer.
24801 14 str R7, [SP, #4] Store it.
24802 16 mov R7, LR Get hold of the current return address.
24803 18 str R7, [SP, #12] Store it.
24804 20 add R7, SP, #16 Point at the start of the
24805 backtrace structure.
24806 22 mov FP, R7 Put this value into the frame pointer. */
24808 work_register = thumb_find_work_register (live_regs_mask);
24809 work_reg = gen_rtx_REG (SImode, work_register);
24810 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
24812 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24813 stack_pointer_rtx, GEN_INT (-16)));
24814 RTX_FRAME_RELATED_P (insn) = 1;
24816 if (l_mask)
24818 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
24819 RTX_FRAME_RELATED_P (insn) = 1;
24820 lr_needs_saving = false;
24822 offset = bit_count (l_mask) * UNITS_PER_WORD;
24825 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
24826 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24828 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
24829 x = gen_frame_mem (SImode, x);
24830 emit_move_insn (x, work_reg);
24832 /* Make sure that the instruction fetching the PC is in the right place
24833 to calculate "start of backtrace creation code + 12". */
24834 /* ??? The stores using the common WORK_REG ought to be enough to
24835 prevent the scheduler from doing anything weird. Failing that
24836 we could always move all of the following into an UNSPEC_VOLATILE. */
24837 if (l_mask)
24839 x = gen_rtx_REG (SImode, PC_REGNUM);
24840 emit_move_insn (work_reg, x);
24842 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24843 x = gen_frame_mem (SImode, x);
24844 emit_move_insn (x, work_reg);
24846 emit_move_insn (work_reg, arm_hfp_rtx);
24848 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24849 x = gen_frame_mem (SImode, x);
24850 emit_move_insn (x, work_reg);
24852 else
24854 emit_move_insn (work_reg, arm_hfp_rtx);
24856 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24857 x = gen_frame_mem (SImode, x);
24858 emit_move_insn (x, work_reg);
24860 x = gen_rtx_REG (SImode, PC_REGNUM);
24861 emit_move_insn (work_reg, x);
24863 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24864 x = gen_frame_mem (SImode, x);
24865 emit_move_insn (x, work_reg);
24868 x = gen_rtx_REG (SImode, LR_REGNUM);
24869 emit_move_insn (work_reg, x);
24871 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
24872 x = gen_frame_mem (SImode, x);
24873 emit_move_insn (x, work_reg);
24875 x = GEN_INT (offset + 12);
24876 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24878 emit_move_insn (arm_hfp_rtx, work_reg);
24880 /* Optimization: If we are not pushing any low registers but we are going
24881 to push some high registers then delay our first push. This will just
24882 be a push of LR and we can combine it with the push of the first high
24883 register. */
24884 else if ((l_mask & 0xff) != 0
24885 || (high_regs_pushed == 0 && lr_needs_saving))
24887 unsigned long mask = l_mask;
24888 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
24889 insn = thumb1_emit_multi_reg_push (mask, mask);
24890 RTX_FRAME_RELATED_P (insn) = 1;
24891 lr_needs_saving = false;
24894 if (high_regs_pushed)
24896 unsigned pushable_regs;
24897 unsigned next_hi_reg;
24898 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
24899 : crtl->args.info.nregs;
24900 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
24902 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
24903 if (live_regs_mask & (1 << next_hi_reg))
24904 break;
24906 /* Here we need to mask out registers used for passing arguments
24907 even if they can be pushed. This is to avoid using them to stash the high
24908 registers. Such kind of stash may clobber the use of arguments. */
24909 pushable_regs = l_mask & (~arg_regs_mask);
24910 if (lr_needs_saving)
24911 pushable_regs &= ~(1 << LR_REGNUM);
24913 if (pushable_regs == 0)
24914 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
24916 while (high_regs_pushed > 0)
24918 unsigned long real_regs_mask = 0;
24919 unsigned long push_mask = 0;
24921 for (regno = LR_REGNUM; regno >= 0; regno --)
24923 if (pushable_regs & (1 << regno))
24925 emit_move_insn (gen_rtx_REG (SImode, regno),
24926 gen_rtx_REG (SImode, next_hi_reg));
24928 high_regs_pushed --;
24929 real_regs_mask |= (1 << next_hi_reg);
24930 push_mask |= (1 << regno);
24932 if (high_regs_pushed)
24934 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
24935 next_hi_reg --)
24936 if (live_regs_mask & (1 << next_hi_reg))
24937 break;
24939 else
24940 break;
24944 /* If we had to find a work register and we have not yet
24945 saved the LR then add it to the list of regs to push. */
24946 if (lr_needs_saving)
24948 push_mask |= 1 << LR_REGNUM;
24949 real_regs_mask |= 1 << LR_REGNUM;
24950 lr_needs_saving = false;
24953 insn = thumb1_emit_multi_reg_push (push_mask, real_regs_mask);
24954 RTX_FRAME_RELATED_P (insn) = 1;
24958 /* Load the pic register before setting the frame pointer,
24959 so we can use r7 as a temporary work register. */
24960 if (flag_pic && arm_pic_register != INVALID_REGNUM)
24961 arm_load_pic_register (live_regs_mask);
24963 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
24964 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
24965 stack_pointer_rtx);
24967 size = offsets->outgoing_args - offsets->saved_args;
24968 if (flag_stack_usage_info)
24969 current_function_static_stack_size = size;
24971 /* If we have a frame, then do stack checking. FIXME: not implemented. */
24972 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK && size)
24973 sorry ("-fstack-check=specific for Thumb-1");
24975 amount = offsets->outgoing_args - offsets->saved_regs;
24976 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
24977 if (amount)
24979 if (amount < 512)
24981 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
24982 GEN_INT (- amount)));
24983 RTX_FRAME_RELATED_P (insn) = 1;
24985 else
24987 rtx reg, dwarf;
24989 /* The stack decrement is too big for an immediate value in a single
24990 insn. In theory we could issue multiple subtracts, but after
24991 three of them it becomes more space efficient to place the full
24992 value in the constant pool and load into a register. (Also the
24993 ARM debugger really likes to see only one stack decrement per
24994 function). So instead we look for a scratch register into which
24995 we can load the decrement, and then we subtract this from the
24996 stack pointer. Unfortunately on the thumb the only available
24997 scratch registers are the argument registers, and we cannot use
24998 these as they may hold arguments to the function. Instead we
24999 attempt to locate a call preserved register which is used by this
25000 function. If we can find one, then we know that it will have
25001 been pushed at the start of the prologue and so we can corrupt
25002 it now. */
25003 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
25004 if (live_regs_mask & (1 << regno))
25005 break;
25007 gcc_assert(regno <= LAST_LO_REGNUM);
25009 reg = gen_rtx_REG (SImode, regno);
25011 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
25013 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25014 stack_pointer_rtx, reg));
25016 dwarf = gen_rtx_SET (stack_pointer_rtx,
25017 plus_constant (Pmode, stack_pointer_rtx,
25018 -amount));
25019 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
25020 RTX_FRAME_RELATED_P (insn) = 1;
25024 if (frame_pointer_needed)
25025 thumb_set_frame_pointer (offsets);
25027 /* If we are profiling, make sure no instructions are scheduled before
25028 the call to mcount. Similarly if the user has requested no
25029 scheduling in the prolog. Similarly if we want non-call exceptions
25030 using the EABI unwinder, to prevent faulting instructions from being
25031 swapped with a stack adjustment. */
25032 if (crtl->profile || !TARGET_SCHED_PROLOG
25033 || (arm_except_unwind_info (&global_options) == UI_TARGET
25034 && cfun->can_throw_non_call_exceptions))
25035 emit_insn (gen_blockage ());
25037 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
25038 if (live_regs_mask & 0xff)
25039 cfun->machine->lr_save_eliminated = 0;
25042 /* Clear caller saved registers not used to pass return values and leaked
25043 condition flags before exiting a cmse_nonsecure_entry function. */
25045 void
25046 cmse_nonsecure_entry_clear_before_return (void)
25048 uint64_t to_clear_mask[2];
25049 uint32_t padding_bits_to_clear = 0;
25050 uint32_t * padding_bits_to_clear_ptr = &padding_bits_to_clear;
25051 int regno, maxregno = IP_REGNUM;
25052 tree result_type;
25053 rtx result_rtl;
25055 to_clear_mask[0] = (1ULL << (NUM_ARG_REGS)) - 1;
25056 to_clear_mask[0] |= (1ULL << IP_REGNUM);
25058 /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
25059 registers. We also check that TARGET_HARD_FLOAT and !TARGET_THUMB1 hold
25060 to make sure the instructions used to clear them are present. */
25061 if (TARGET_HARD_FLOAT && !TARGET_THUMB1)
25063 uint64_t float_mask = (1ULL << (D7_VFP_REGNUM + 1)) - 1;
25064 maxregno = LAST_VFP_REGNUM;
25066 float_mask &= ~((1ULL << FIRST_VFP_REGNUM) - 1);
25067 to_clear_mask[0] |= float_mask;
25069 float_mask = (1ULL << (maxregno - 63)) - 1;
25070 to_clear_mask[1] = float_mask;
25072 /* Make sure we don't clear the two scratch registers used to clear the
25073 relevant FPSCR bits in output_return_instruction. */
25074 emit_use (gen_rtx_REG (SImode, IP_REGNUM));
25075 to_clear_mask[0] &= ~(1ULL << IP_REGNUM);
25076 emit_use (gen_rtx_REG (SImode, 4));
25077 to_clear_mask[0] &= ~(1ULL << 4);
25080 /* If the user has defined registers to be caller saved, these are no longer
25081 restored by the function before returning and must thus be cleared for
25082 security purposes. */
25083 for (regno = NUM_ARG_REGS; regno < LAST_VFP_REGNUM; regno++)
25085 /* We do not touch registers that can be used to pass arguments as per
25086 the AAPCS, since these should never be made callee-saved by user
25087 options. */
25088 if (IN_RANGE (regno, FIRST_VFP_REGNUM, D7_VFP_REGNUM))
25089 continue;
25090 if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM))
25091 continue;
25092 if (call_used_regs[regno])
25093 to_clear_mask[regno / 64] |= (1ULL << (regno % 64));
25096 /* Make sure we do not clear the registers used to return the result in. */
25097 result_type = TREE_TYPE (DECL_RESULT (current_function_decl));
25098 if (!VOID_TYPE_P (result_type))
25100 result_rtl = arm_function_value (result_type, current_function_decl, 0);
25102 /* No need to check that we return in registers, because we don't
25103 support returning on stack yet. */
25104 to_clear_mask[0]
25105 &= ~compute_not_to_clear_mask (result_type, result_rtl, 0,
25106 padding_bits_to_clear_ptr);
25109 if (padding_bits_to_clear != 0)
25111 rtx reg_rtx;
25112 /* Padding bits to clear is not 0 so we know we are dealing with
25113 returning a composite type, which only uses r0. Let's make sure that
25114 r1-r3 is cleared too, we will use r1 as a scratch register. */
25115 gcc_assert ((to_clear_mask[0] & 0xe) == 0xe);
25117 reg_rtx = gen_rtx_REG (SImode, R1_REGNUM);
25119 /* Fill the lower half of the negated padding_bits_to_clear. */
25120 emit_move_insn (reg_rtx,
25121 GEN_INT ((((~padding_bits_to_clear) << 16u) >> 16u)));
25123 /* Also fill the top half of the negated padding_bits_to_clear. */
25124 if (((~padding_bits_to_clear) >> 16) > 0)
25125 emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode, reg_rtx,
25126 GEN_INT (16),
25127 GEN_INT (16)),
25128 GEN_INT ((~padding_bits_to_clear) >> 16)));
25130 emit_insn (gen_andsi3 (gen_rtx_REG (SImode, R0_REGNUM),
25131 gen_rtx_REG (SImode, R0_REGNUM),
25132 reg_rtx));
25135 for (regno = R0_REGNUM; regno <= maxregno; regno++)
25137 if (!(to_clear_mask[regno / 64] & (1ULL << (regno % 64))))
25138 continue;
25140 if (IS_VFP_REGNUM (regno))
25142 /* If regno is an even vfp register and its successor is also to
25143 be cleared, use vmov. */
25144 if (TARGET_VFP_DOUBLE
25145 && VFP_REGNO_OK_FOR_DOUBLE (regno)
25146 && to_clear_mask[regno / 64] & (1ULL << ((regno % 64) + 1)))
25148 emit_move_insn (gen_rtx_REG (DFmode, regno),
25149 CONST1_RTX (DFmode));
25150 emit_use (gen_rtx_REG (DFmode, regno));
25151 regno++;
25153 else
25155 emit_move_insn (gen_rtx_REG (SFmode, regno),
25156 CONST1_RTX (SFmode));
25157 emit_use (gen_rtx_REG (SFmode, regno));
25160 else
25162 if (TARGET_THUMB1)
25164 if (regno == R0_REGNUM)
25165 emit_move_insn (gen_rtx_REG (SImode, regno),
25166 const0_rtx);
25167 else
25168 /* R0 has either been cleared before, see code above, or it
25169 holds a return value, either way it is not secret
25170 information. */
25171 emit_move_insn (gen_rtx_REG (SImode, regno),
25172 gen_rtx_REG (SImode, R0_REGNUM));
25173 emit_use (gen_rtx_REG (SImode, regno));
25175 else
25177 emit_move_insn (gen_rtx_REG (SImode, regno),
25178 gen_rtx_REG (SImode, LR_REGNUM));
25179 emit_use (gen_rtx_REG (SImode, regno));
25185 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
25186 POP instruction can be generated. LR should be replaced by PC. All
25187 the checks required are already done by USE_RETURN_INSN (). Hence,
25188 all we really need to check here is if single register is to be
25189 returned, or multiple register return. */
25190 void
25191 thumb2_expand_return (bool simple_return)
25193 int i, num_regs;
25194 unsigned long saved_regs_mask;
25195 arm_stack_offsets *offsets;
25197 offsets = arm_get_frame_offsets ();
25198 saved_regs_mask = offsets->saved_regs_mask;
25200 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
25201 if (saved_regs_mask & (1 << i))
25202 num_regs++;
25204 if (!simple_return && saved_regs_mask)
25206 /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
25207 functions or adapt code to handle according to ACLE. This path should
25208 not be reachable for cmse_nonsecure_entry functions though we prefer
25209 to assert it for now to ensure that future code changes do not silently
25210 change this behavior. */
25211 gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
25212 if (num_regs == 1)
25214 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25215 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
25216 rtx addr = gen_rtx_MEM (SImode,
25217 gen_rtx_POST_INC (SImode,
25218 stack_pointer_rtx));
25219 set_mem_alias_set (addr, get_frame_alias_set ());
25220 XVECEXP (par, 0, 0) = ret_rtx;
25221 XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
25222 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
25223 emit_jump_insn (par);
25225 else
25227 saved_regs_mask &= ~ (1 << LR_REGNUM);
25228 saved_regs_mask |= (1 << PC_REGNUM);
25229 arm_emit_multi_reg_pop (saved_regs_mask);
25232 else
25234 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25235 cmse_nonsecure_entry_clear_before_return ();
25236 emit_jump_insn (simple_return_rtx);
25240 void
25241 thumb1_expand_epilogue (void)
25243 HOST_WIDE_INT amount;
25244 arm_stack_offsets *offsets;
25245 int regno;
25247 /* Naked functions don't have prologues. */
25248 if (IS_NAKED (arm_current_func_type ()))
25249 return;
25251 offsets = arm_get_frame_offsets ();
25252 amount = offsets->outgoing_args - offsets->saved_regs;
25254 if (frame_pointer_needed)
25256 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
25257 amount = offsets->locals_base - offsets->saved_regs;
25259 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
25261 gcc_assert (amount >= 0);
25262 if (amount)
25264 emit_insn (gen_blockage ());
25266 if (amount < 512)
25267 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25268 GEN_INT (amount)));
25269 else
25271 /* r3 is always free in the epilogue. */
25272 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
25274 emit_insn (gen_movsi (reg, GEN_INT (amount)));
25275 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
25279 /* Emit a USE (stack_pointer_rtx), so that
25280 the stack adjustment will not be deleted. */
25281 emit_insn (gen_force_register_use (stack_pointer_rtx));
25283 if (crtl->profile || !TARGET_SCHED_PROLOG)
25284 emit_insn (gen_blockage ());
25286 /* Emit a clobber for each insn that will be restored in the epilogue,
25287 so that flow2 will get register lifetimes correct. */
25288 for (regno = 0; regno < 13; regno++)
25289 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
25290 emit_clobber (gen_rtx_REG (SImode, regno));
25292 if (! df_regs_ever_live_p (LR_REGNUM))
25293 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
25295 /* Clear all caller-saved regs that are not used to return. */
25296 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25297 cmse_nonsecure_entry_clear_before_return ();
25300 /* Epilogue code for APCS frame. */
25301 static void
25302 arm_expand_epilogue_apcs_frame (bool really_return)
25304 unsigned long func_type;
25305 unsigned long saved_regs_mask;
25306 int num_regs = 0;
25307 int i;
25308 int floats_from_frame = 0;
25309 arm_stack_offsets *offsets;
25311 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
25312 func_type = arm_current_func_type ();
25314 /* Get frame offsets for ARM. */
25315 offsets = arm_get_frame_offsets ();
25316 saved_regs_mask = offsets->saved_regs_mask;
25318 /* Find the offset of the floating-point save area in the frame. */
25319 floats_from_frame
25320 = (offsets->saved_args
25321 + arm_compute_static_chain_stack_bytes ()
25322 - offsets->frame);
25324 /* Compute how many core registers saved and how far away the floats are. */
25325 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25326 if (saved_regs_mask & (1 << i))
25328 num_regs++;
25329 floats_from_frame += 4;
25332 if (TARGET_HARD_FLOAT)
25334 int start_reg;
25335 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
25337 /* The offset is from IP_REGNUM. */
25338 int saved_size = arm_get_vfp_saved_size ();
25339 if (saved_size > 0)
25341 rtx_insn *insn;
25342 floats_from_frame += saved_size;
25343 insn = emit_insn (gen_addsi3 (ip_rtx,
25344 hard_frame_pointer_rtx,
25345 GEN_INT (-floats_from_frame)));
25346 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
25347 ip_rtx, hard_frame_pointer_rtx);
25350 /* Generate VFP register multi-pop. */
25351 start_reg = FIRST_VFP_REGNUM;
25353 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
25354 /* Look for a case where a reg does not need restoring. */
25355 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25356 && (!df_regs_ever_live_p (i + 1)
25357 || call_used_regs[i + 1]))
25359 if (start_reg != i)
25360 arm_emit_vfp_multi_reg_pop (start_reg,
25361 (i - start_reg) / 2,
25362 gen_rtx_REG (SImode,
25363 IP_REGNUM));
25364 start_reg = i + 2;
25367 /* Restore the remaining regs that we have discovered (or possibly
25368 even all of them, if the conditional in the for loop never
25369 fired). */
25370 if (start_reg != i)
25371 arm_emit_vfp_multi_reg_pop (start_reg,
25372 (i - start_reg) / 2,
25373 gen_rtx_REG (SImode, IP_REGNUM));
25376 if (TARGET_IWMMXT)
25378 /* The frame pointer is guaranteed to be non-double-word aligned, as
25379 it is set to double-word-aligned old_stack_pointer - 4. */
25380 rtx_insn *insn;
25381 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
25383 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
25384 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25386 rtx addr = gen_frame_mem (V2SImode,
25387 plus_constant (Pmode, hard_frame_pointer_rtx,
25388 - lrm_count * 4));
25389 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25390 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25391 gen_rtx_REG (V2SImode, i),
25392 NULL_RTX);
25393 lrm_count += 2;
25397 /* saved_regs_mask should contain IP which contains old stack pointer
25398 at the time of activation creation. Since SP and IP are adjacent registers,
25399 we can restore the value directly into SP. */
25400 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
25401 saved_regs_mask &= ~(1 << IP_REGNUM);
25402 saved_regs_mask |= (1 << SP_REGNUM);
25404 /* There are two registers left in saved_regs_mask - LR and PC. We
25405 only need to restore LR (the return address), but to
25406 save time we can load it directly into PC, unless we need a
25407 special function exit sequence, or we are not really returning. */
25408 if (really_return
25409 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
25410 && !crtl->calls_eh_return)
25411 /* Delete LR from the register mask, so that LR on
25412 the stack is loaded into the PC in the register mask. */
25413 saved_regs_mask &= ~(1 << LR_REGNUM);
25414 else
25415 saved_regs_mask &= ~(1 << PC_REGNUM);
25417 num_regs = bit_count (saved_regs_mask);
25418 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
25420 rtx_insn *insn;
25421 emit_insn (gen_blockage ());
25422 /* Unwind the stack to just below the saved registers. */
25423 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25424 hard_frame_pointer_rtx,
25425 GEN_INT (- 4 * num_regs)));
25427 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
25428 stack_pointer_rtx, hard_frame_pointer_rtx);
25431 arm_emit_multi_reg_pop (saved_regs_mask);
25433 if (IS_INTERRUPT (func_type))
25435 /* Interrupt handlers will have pushed the
25436 IP onto the stack, so restore it now. */
25437 rtx_insn *insn;
25438 rtx addr = gen_rtx_MEM (SImode,
25439 gen_rtx_POST_INC (SImode,
25440 stack_pointer_rtx));
25441 set_mem_alias_set (addr, get_frame_alias_set ());
25442 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
25443 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25444 gen_rtx_REG (SImode, IP_REGNUM),
25445 NULL_RTX);
25448 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
25449 return;
25451 if (crtl->calls_eh_return)
25452 emit_insn (gen_addsi3 (stack_pointer_rtx,
25453 stack_pointer_rtx,
25454 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25456 if (IS_STACKALIGN (func_type))
25457 /* Restore the original stack pointer. Before prologue, the stack was
25458 realigned and the original stack pointer saved in r0. For details,
25459 see comment in arm_expand_prologue. */
25460 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25462 emit_jump_insn (simple_return_rtx);
25465 /* Generate RTL to represent ARM epilogue. Really_return is true if the
25466 function is not a sibcall. */
25467 void
25468 arm_expand_epilogue (bool really_return)
25470 unsigned long func_type;
25471 unsigned long saved_regs_mask;
25472 int num_regs = 0;
25473 int i;
25474 int amount;
25475 arm_stack_offsets *offsets;
25477 func_type = arm_current_func_type ();
25479 /* Naked functions don't have epilogue. Hence, generate return pattern, and
25480 let output_return_instruction take care of instruction emission if any. */
25481 if (IS_NAKED (func_type)
25482 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
25484 if (really_return)
25485 emit_jump_insn (simple_return_rtx);
25486 return;
25489 /* If we are throwing an exception, then we really must be doing a
25490 return, so we can't tail-call. */
25491 gcc_assert (!crtl->calls_eh_return || really_return);
25493 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
25495 arm_expand_epilogue_apcs_frame (really_return);
25496 return;
25499 /* Get frame offsets for ARM. */
25500 offsets = arm_get_frame_offsets ();
25501 saved_regs_mask = offsets->saved_regs_mask;
25502 num_regs = bit_count (saved_regs_mask);
25504 if (frame_pointer_needed)
25506 rtx_insn *insn;
25507 /* Restore stack pointer if necessary. */
25508 if (TARGET_ARM)
25510 /* In ARM mode, frame pointer points to first saved register.
25511 Restore stack pointer to last saved register. */
25512 amount = offsets->frame - offsets->saved_regs;
25514 /* Force out any pending memory operations that reference stacked data
25515 before stack de-allocation occurs. */
25516 emit_insn (gen_blockage ());
25517 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25518 hard_frame_pointer_rtx,
25519 GEN_INT (amount)));
25520 arm_add_cfa_adjust_cfa_note (insn, amount,
25521 stack_pointer_rtx,
25522 hard_frame_pointer_rtx);
25524 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25525 deleted. */
25526 emit_insn (gen_force_register_use (stack_pointer_rtx));
25528 else
25530 /* In Thumb-2 mode, the frame pointer points to the last saved
25531 register. */
25532 amount = offsets->locals_base - offsets->saved_regs;
25533 if (amount)
25535 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
25536 hard_frame_pointer_rtx,
25537 GEN_INT (amount)));
25538 arm_add_cfa_adjust_cfa_note (insn, amount,
25539 hard_frame_pointer_rtx,
25540 hard_frame_pointer_rtx);
25543 /* Force out any pending memory operations that reference stacked data
25544 before stack de-allocation occurs. */
25545 emit_insn (gen_blockage ());
25546 insn = emit_insn (gen_movsi (stack_pointer_rtx,
25547 hard_frame_pointer_rtx));
25548 arm_add_cfa_adjust_cfa_note (insn, 0,
25549 stack_pointer_rtx,
25550 hard_frame_pointer_rtx);
25551 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25552 deleted. */
25553 emit_insn (gen_force_register_use (stack_pointer_rtx));
25556 else
25558 /* Pop off outgoing args and local frame to adjust stack pointer to
25559 last saved register. */
25560 amount = offsets->outgoing_args - offsets->saved_regs;
25561 if (amount)
25563 rtx_insn *tmp;
25564 /* Force out any pending memory operations that reference stacked data
25565 before stack de-allocation occurs. */
25566 emit_insn (gen_blockage ());
25567 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
25568 stack_pointer_rtx,
25569 GEN_INT (amount)));
25570 arm_add_cfa_adjust_cfa_note (tmp, amount,
25571 stack_pointer_rtx, stack_pointer_rtx);
25572 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25573 not deleted. */
25574 emit_insn (gen_force_register_use (stack_pointer_rtx));
25578 if (TARGET_HARD_FLOAT)
25580 /* Generate VFP register multi-pop. */
25581 int end_reg = LAST_VFP_REGNUM + 1;
25583 /* Scan the registers in reverse order. We need to match
25584 any groupings made in the prologue and generate matching
25585 vldm operations. The need to match groups is because,
25586 unlike pop, vldm can only do consecutive regs. */
25587 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
25588 /* Look for a case where a reg does not need restoring. */
25589 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25590 && (!df_regs_ever_live_p (i + 1)
25591 || call_used_regs[i + 1]))
25593 /* Restore the regs discovered so far (from reg+2 to
25594 end_reg). */
25595 if (end_reg > i + 2)
25596 arm_emit_vfp_multi_reg_pop (i + 2,
25597 (end_reg - (i + 2)) / 2,
25598 stack_pointer_rtx);
25599 end_reg = i;
25602 /* Restore the remaining regs that we have discovered (or possibly
25603 even all of them, if the conditional in the for loop never
25604 fired). */
25605 if (end_reg > i + 2)
25606 arm_emit_vfp_multi_reg_pop (i + 2,
25607 (end_reg - (i + 2)) / 2,
25608 stack_pointer_rtx);
25611 if (TARGET_IWMMXT)
25612 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
25613 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25615 rtx_insn *insn;
25616 rtx addr = gen_rtx_MEM (V2SImode,
25617 gen_rtx_POST_INC (SImode,
25618 stack_pointer_rtx));
25619 set_mem_alias_set (addr, get_frame_alias_set ());
25620 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25621 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25622 gen_rtx_REG (V2SImode, i),
25623 NULL_RTX);
25624 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25625 stack_pointer_rtx, stack_pointer_rtx);
25628 if (saved_regs_mask)
25630 rtx insn;
25631 bool return_in_pc = false;
25633 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
25634 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
25635 && !IS_CMSE_ENTRY (func_type)
25636 && !IS_STACKALIGN (func_type)
25637 && really_return
25638 && crtl->args.pretend_args_size == 0
25639 && saved_regs_mask & (1 << LR_REGNUM)
25640 && !crtl->calls_eh_return)
25642 saved_regs_mask &= ~(1 << LR_REGNUM);
25643 saved_regs_mask |= (1 << PC_REGNUM);
25644 return_in_pc = true;
25647 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
25649 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25650 if (saved_regs_mask & (1 << i))
25652 rtx addr = gen_rtx_MEM (SImode,
25653 gen_rtx_POST_INC (SImode,
25654 stack_pointer_rtx));
25655 set_mem_alias_set (addr, get_frame_alias_set ());
25657 if (i == PC_REGNUM)
25659 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25660 XVECEXP (insn, 0, 0) = ret_rtx;
25661 XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
25662 addr);
25663 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
25664 insn = emit_jump_insn (insn);
25666 else
25668 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
25669 addr));
25670 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25671 gen_rtx_REG (SImode, i),
25672 NULL_RTX);
25673 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25674 stack_pointer_rtx,
25675 stack_pointer_rtx);
25679 else
25681 if (TARGET_LDRD
25682 && current_tune->prefer_ldrd_strd
25683 && !optimize_function_for_size_p (cfun))
25685 if (TARGET_THUMB2)
25686 thumb2_emit_ldrd_pop (saved_regs_mask);
25687 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
25688 arm_emit_ldrd_pop (saved_regs_mask);
25689 else
25690 arm_emit_multi_reg_pop (saved_regs_mask);
25692 else
25693 arm_emit_multi_reg_pop (saved_regs_mask);
25696 if (return_in_pc)
25697 return;
25700 amount
25701 = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
25702 if (amount)
25704 int i, j;
25705 rtx dwarf = NULL_RTX;
25706 rtx_insn *tmp =
25707 emit_insn (gen_addsi3 (stack_pointer_rtx,
25708 stack_pointer_rtx,
25709 GEN_INT (amount)));
25711 RTX_FRAME_RELATED_P (tmp) = 1;
25713 if (cfun->machine->uses_anonymous_args)
25715 /* Restore pretend args. Refer arm_expand_prologue on how to save
25716 pretend_args in stack. */
25717 int num_regs = crtl->args.pretend_args_size / 4;
25718 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
25719 for (j = 0, i = 0; j < num_regs; i++)
25720 if (saved_regs_mask & (1 << i))
25722 rtx reg = gen_rtx_REG (SImode, i);
25723 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
25724 j++;
25726 REG_NOTES (tmp) = dwarf;
25728 arm_add_cfa_adjust_cfa_note (tmp, amount,
25729 stack_pointer_rtx, stack_pointer_rtx);
25732 /* Clear all caller-saved regs that are not used to return. */
25733 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25735 /* CMSE_ENTRY always returns. */
25736 gcc_assert (really_return);
25737 cmse_nonsecure_entry_clear_before_return ();
25740 if (!really_return)
25741 return;
25743 if (crtl->calls_eh_return)
25744 emit_insn (gen_addsi3 (stack_pointer_rtx,
25745 stack_pointer_rtx,
25746 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25748 if (IS_STACKALIGN (func_type))
25749 /* Restore the original stack pointer. Before prologue, the stack was
25750 realigned and the original stack pointer saved in r0. For details,
25751 see comment in arm_expand_prologue. */
25752 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25754 emit_jump_insn (simple_return_rtx);
25757 /* Implementation of insn prologue_thumb1_interwork. This is the first
25758 "instruction" of a function called in ARM mode. Swap to thumb mode. */
25760 const char *
25761 thumb1_output_interwork (void)
25763 const char * name;
25764 FILE *f = asm_out_file;
25766 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
25767 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
25768 == SYMBOL_REF);
25769 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25771 /* Generate code sequence to switch us into Thumb mode. */
25772 /* The .code 32 directive has already been emitted by
25773 ASM_DECLARE_FUNCTION_NAME. */
25774 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
25775 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
25777 /* Generate a label, so that the debugger will notice the
25778 change in instruction sets. This label is also used by
25779 the assembler to bypass the ARM code when this function
25780 is called from a Thumb encoded function elsewhere in the
25781 same file. Hence the definition of STUB_NAME here must
25782 agree with the definition in gas/config/tc-arm.c. */
25784 #define STUB_NAME ".real_start_of"
25786 fprintf (f, "\t.code\t16\n");
25787 #ifdef ARM_PE
25788 if (arm_dllexport_name_p (name))
25789 name = arm_strip_name_encoding (name);
25790 #endif
25791 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
25792 fprintf (f, "\t.thumb_func\n");
25793 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
25795 return "";
25798 /* Handle the case of a double word load into a low register from
25799 a computed memory address. The computed address may involve a
25800 register which is overwritten by the load. */
25801 const char *
25802 thumb_load_double_from_address (rtx *operands)
25804 rtx addr;
25805 rtx base;
25806 rtx offset;
25807 rtx arg1;
25808 rtx arg2;
25810 gcc_assert (REG_P (operands[0]));
25811 gcc_assert (MEM_P (operands[1]));
25813 /* Get the memory address. */
25814 addr = XEXP (operands[1], 0);
25816 /* Work out how the memory address is computed. */
25817 switch (GET_CODE (addr))
25819 case REG:
25820 operands[2] = adjust_address (operands[1], SImode, 4);
25822 if (REGNO (operands[0]) == REGNO (addr))
25824 output_asm_insn ("ldr\t%H0, %2", operands);
25825 output_asm_insn ("ldr\t%0, %1", operands);
25827 else
25829 output_asm_insn ("ldr\t%0, %1", operands);
25830 output_asm_insn ("ldr\t%H0, %2", operands);
25832 break;
25834 case CONST:
25835 /* Compute <address> + 4 for the high order load. */
25836 operands[2] = adjust_address (operands[1], SImode, 4);
25838 output_asm_insn ("ldr\t%0, %1", operands);
25839 output_asm_insn ("ldr\t%H0, %2", operands);
25840 break;
25842 case PLUS:
25843 arg1 = XEXP (addr, 0);
25844 arg2 = XEXP (addr, 1);
25846 if (CONSTANT_P (arg1))
25847 base = arg2, offset = arg1;
25848 else
25849 base = arg1, offset = arg2;
25851 gcc_assert (REG_P (base));
25853 /* Catch the case of <address> = <reg> + <reg> */
25854 if (REG_P (offset))
25856 int reg_offset = REGNO (offset);
25857 int reg_base = REGNO (base);
25858 int reg_dest = REGNO (operands[0]);
25860 /* Add the base and offset registers together into the
25861 higher destination register. */
25862 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
25863 reg_dest + 1, reg_base, reg_offset);
25865 /* Load the lower destination register from the address in
25866 the higher destination register. */
25867 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
25868 reg_dest, reg_dest + 1);
25870 /* Load the higher destination register from its own address
25871 plus 4. */
25872 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
25873 reg_dest + 1, reg_dest + 1);
25875 else
25877 /* Compute <address> + 4 for the high order load. */
25878 operands[2] = adjust_address (operands[1], SImode, 4);
25880 /* If the computed address is held in the low order register
25881 then load the high order register first, otherwise always
25882 load the low order register first. */
25883 if (REGNO (operands[0]) == REGNO (base))
25885 output_asm_insn ("ldr\t%H0, %2", operands);
25886 output_asm_insn ("ldr\t%0, %1", operands);
25888 else
25890 output_asm_insn ("ldr\t%0, %1", operands);
25891 output_asm_insn ("ldr\t%H0, %2", operands);
25894 break;
25896 case LABEL_REF:
25897 /* With no registers to worry about we can just load the value
25898 directly. */
25899 operands[2] = adjust_address (operands[1], SImode, 4);
25901 output_asm_insn ("ldr\t%H0, %2", operands);
25902 output_asm_insn ("ldr\t%0, %1", operands);
25903 break;
25905 default:
25906 gcc_unreachable ();
25909 return "";
25912 const char *
25913 thumb_output_move_mem_multiple (int n, rtx *operands)
25915 switch (n)
25917 case 2:
25918 if (REGNO (operands[4]) > REGNO (operands[5]))
25919 std::swap (operands[4], operands[5]);
25921 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
25922 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
25923 break;
25925 case 3:
25926 if (REGNO (operands[4]) > REGNO (operands[5]))
25927 std::swap (operands[4], operands[5]);
25928 if (REGNO (operands[5]) > REGNO (operands[6]))
25929 std::swap (operands[5], operands[6]);
25930 if (REGNO (operands[4]) > REGNO (operands[5]))
25931 std::swap (operands[4], operands[5]);
25933 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
25934 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
25935 break;
25937 default:
25938 gcc_unreachable ();
25941 return "";
25944 /* Output a call-via instruction for thumb state. */
25945 const char *
25946 thumb_call_via_reg (rtx reg)
25948 int regno = REGNO (reg);
25949 rtx *labelp;
25951 gcc_assert (regno < LR_REGNUM);
25953 /* If we are in the normal text section we can use a single instance
25954 per compilation unit. If we are doing function sections, then we need
25955 an entry per section, since we can't rely on reachability. */
25956 if (in_section == text_section)
25958 thumb_call_reg_needed = 1;
25960 if (thumb_call_via_label[regno] == NULL)
25961 thumb_call_via_label[regno] = gen_label_rtx ();
25962 labelp = thumb_call_via_label + regno;
25964 else
25966 if (cfun->machine->call_via[regno] == NULL)
25967 cfun->machine->call_via[regno] = gen_label_rtx ();
25968 labelp = cfun->machine->call_via + regno;
25971 output_asm_insn ("bl\t%a0", labelp);
25972 return "";
25975 /* Routines for generating rtl. */
25976 void
25977 thumb_expand_movmemqi (rtx *operands)
25979 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
25980 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
25981 HOST_WIDE_INT len = INTVAL (operands[2]);
25982 HOST_WIDE_INT offset = 0;
25984 while (len >= 12)
25986 emit_insn (gen_movmem12b (out, in, out, in));
25987 len -= 12;
25990 if (len >= 8)
25992 emit_insn (gen_movmem8b (out, in, out, in));
25993 len -= 8;
25996 if (len >= 4)
25998 rtx reg = gen_reg_rtx (SImode);
25999 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
26000 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
26001 len -= 4;
26002 offset += 4;
26005 if (len >= 2)
26007 rtx reg = gen_reg_rtx (HImode);
26008 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
26009 plus_constant (Pmode, in,
26010 offset))));
26011 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
26012 offset)),
26013 reg));
26014 len -= 2;
26015 offset += 2;
26018 if (len)
26020 rtx reg = gen_reg_rtx (QImode);
26021 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
26022 plus_constant (Pmode, in,
26023 offset))));
26024 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
26025 offset)),
26026 reg));
26030 void
26031 thumb_reload_out_hi (rtx *operands)
26033 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
26036 /* Return the length of a function name prefix
26037 that starts with the character 'c'. */
26038 static int
26039 arm_get_strip_length (int c)
26041 switch (c)
26043 ARM_NAME_ENCODING_LENGTHS
26044 default: return 0;
26048 /* Return a pointer to a function's name with any
26049 and all prefix encodings stripped from it. */
26050 const char *
26051 arm_strip_name_encoding (const char *name)
26053 int skip;
26055 while ((skip = arm_get_strip_length (* name)))
26056 name += skip;
26058 return name;
26061 /* If there is a '*' anywhere in the name's prefix, then
26062 emit the stripped name verbatim, otherwise prepend an
26063 underscore if leading underscores are being used. */
26064 void
26065 arm_asm_output_labelref (FILE *stream, const char *name)
26067 int skip;
26068 int verbatim = 0;
26070 while ((skip = arm_get_strip_length (* name)))
26072 verbatim |= (*name == '*');
26073 name += skip;
26076 if (verbatim)
26077 fputs (name, stream);
26078 else
26079 asm_fprintf (stream, "%U%s", name);
26082 /* This function is used to emit an EABI tag and its associated value.
26083 We emit the numerical value of the tag in case the assembler does not
26084 support textual tags. (Eg gas prior to 2.20). If requested we include
26085 the tag name in a comment so that anyone reading the assembler output
26086 will know which tag is being set.
26088 This function is not static because arm-c.c needs it too. */
26090 void
26091 arm_emit_eabi_attribute (const char *name, int num, int val)
26093 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
26094 if (flag_verbose_asm || flag_debug_asm)
26095 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
26096 asm_fprintf (asm_out_file, "\n");
26099 /* This function is used to print CPU tuning information as comment
26100 in assembler file. Pointers are not printed for now. */
26102 void
26103 arm_print_tune_info (void)
26105 asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune parameters\n");
26106 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "constant_limit:\t%d\n",
26107 current_tune->constant_limit);
26108 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26109 "max_insns_skipped:\t%d\n", current_tune->max_insns_skipped);
26110 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26111 "prefetch.num_slots:\t%d\n", current_tune->prefetch.num_slots);
26112 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26113 "prefetch.l1_cache_size:\t%d\n",
26114 current_tune->prefetch.l1_cache_size);
26115 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26116 "prefetch.l1_cache_line_size:\t%d\n",
26117 current_tune->prefetch.l1_cache_line_size);
26118 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26119 "prefer_constant_pool:\t%d\n",
26120 (int) current_tune->prefer_constant_pool);
26121 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26122 "branch_cost:\t(s:speed, p:predictable)\n");
26123 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\ts&p\tcost\n");
26124 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t00\t%d\n",
26125 current_tune->branch_cost (false, false));
26126 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t01\t%d\n",
26127 current_tune->branch_cost (false, true));
26128 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t10\t%d\n",
26129 current_tune->branch_cost (true, false));
26130 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t11\t%d\n",
26131 current_tune->branch_cost (true, true));
26132 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26133 "prefer_ldrd_strd:\t%d\n",
26134 (int) current_tune->prefer_ldrd_strd);
26135 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26136 "logical_op_non_short_circuit:\t[%d,%d]\n",
26137 (int) current_tune->logical_op_non_short_circuit_thumb,
26138 (int) current_tune->logical_op_non_short_circuit_arm);
26139 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26140 "prefer_neon_for_64bits:\t%d\n",
26141 (int) current_tune->prefer_neon_for_64bits);
26142 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26143 "disparage_flag_setting_t16_encodings:\t%d\n",
26144 (int) current_tune->disparage_flag_setting_t16_encodings);
26145 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26146 "string_ops_prefer_neon:\t%d\n",
26147 (int) current_tune->string_ops_prefer_neon);
26148 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26149 "max_insns_inline_memset:\t%d\n",
26150 current_tune->max_insns_inline_memset);
26151 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "fusible_ops:\t%u\n",
26152 current_tune->fusible_ops);
26153 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "sched_autopref:\t%d\n",
26154 (int) current_tune->sched_autopref);
26157 /* Print .arch and .arch_extension directives corresponding to the
26158 current architecture configuration. */
26159 static void
26160 arm_print_asm_arch_directives ()
26162 const arch_option *arch
26163 = arm_parse_arch_option_name (all_architectures, "-march",
26164 arm_active_target.arch_name);
26165 auto_sbitmap opt_bits (isa_num_bits);
26167 gcc_assert (arch);
26169 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_active_target.arch_name);
26170 if (!arch->common.extensions)
26171 return;
26173 for (const struct cpu_arch_extension *opt = arch->common.extensions;
26174 opt->name != NULL;
26175 opt++)
26177 if (!opt->remove)
26179 arm_initialize_isa (opt_bits, opt->isa_bits);
26181 /* If every feature bit of this option is set in the target
26182 ISA specification, print out the option name. However,
26183 don't print anything if all the bits are part of the
26184 FPU specification. */
26185 if (bitmap_subset_p (opt_bits, arm_active_target.isa)
26186 && !bitmap_subset_p (opt_bits, isa_all_fpubits))
26187 asm_fprintf (asm_out_file, "\t.arch_extension %s\n", opt->name);
26192 static void
26193 arm_file_start (void)
26195 int val;
26197 if (TARGET_BPABI)
26199 /* We don't have a specified CPU. Use the architecture to
26200 generate the tags.
26202 Note: it might be better to do this unconditionally, then the
26203 assembler would not need to know about all new CPU names as
26204 they are added. */
26205 if (!arm_active_target.core_name)
26207 /* armv7ve doesn't support any extensions. */
26208 if (strcmp (arm_active_target.arch_name, "armv7ve") == 0)
26210 /* Keep backward compatability for assemblers
26211 which don't support armv7ve. */
26212 asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
26213 asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
26214 asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
26215 asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
26216 asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
26218 else
26219 arm_print_asm_arch_directives ();
26221 else if (strncmp (arm_active_target.core_name, "generic", 7) == 0)
26222 asm_fprintf (asm_out_file, "\t.arch %s\n",
26223 arm_active_target.core_name + 8);
26224 else
26226 const char* truncated_name
26227 = arm_rewrite_selected_cpu (arm_active_target.core_name);
26228 asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
26231 if (print_tune_info)
26232 arm_print_tune_info ();
26234 if (! TARGET_SOFT_FLOAT)
26236 if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
26237 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
26239 if (TARGET_HARD_FLOAT_ABI)
26240 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
26243 /* Some of these attributes only apply when the corresponding features
26244 are used. However we don't have any easy way of figuring this out.
26245 Conservatively record the setting that would have been used. */
26247 if (flag_rounding_math)
26248 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
26250 if (!flag_unsafe_math_optimizations)
26252 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
26253 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
26255 if (flag_signaling_nans)
26256 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
26258 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
26259 flag_finite_math_only ? 1 : 3);
26261 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
26262 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
26263 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
26264 flag_short_enums ? 1 : 2);
26266 /* Tag_ABI_optimization_goals. */
26267 if (optimize_size)
26268 val = 4;
26269 else if (optimize >= 2)
26270 val = 2;
26271 else if (optimize)
26272 val = 1;
26273 else
26274 val = 6;
26275 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
26277 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
26278 unaligned_access);
26280 if (arm_fp16_format)
26281 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
26282 (int) arm_fp16_format);
26284 if (arm_lang_output_object_attributes_hook)
26285 arm_lang_output_object_attributes_hook();
26288 default_file_start ();
26291 static void
26292 arm_file_end (void)
26294 int regno;
26296 if (NEED_INDICATE_EXEC_STACK)
26297 /* Add .note.GNU-stack. */
26298 file_end_indicate_exec_stack ();
26300 if (! thumb_call_reg_needed)
26301 return;
26303 switch_to_section (text_section);
26304 asm_fprintf (asm_out_file, "\t.code 16\n");
26305 ASM_OUTPUT_ALIGN (asm_out_file, 1);
26307 for (regno = 0; regno < LR_REGNUM; regno++)
26309 rtx label = thumb_call_via_label[regno];
26311 if (label != 0)
26313 targetm.asm_out.internal_label (asm_out_file, "L",
26314 CODE_LABEL_NUMBER (label));
26315 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
26320 #ifndef ARM_PE
26321 /* Symbols in the text segment can be accessed without indirecting via the
26322 constant pool; it may take an extra binary operation, but this is still
26323 faster than indirecting via memory. Don't do this when not optimizing,
26324 since we won't be calculating al of the offsets necessary to do this
26325 simplification. */
26327 static void
26328 arm_encode_section_info (tree decl, rtx rtl, int first)
26330 if (optimize > 0 && TREE_CONSTANT (decl))
26331 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
26333 default_encode_section_info (decl, rtl, first);
26335 #endif /* !ARM_PE */
26337 static void
26338 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
26340 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
26341 && !strcmp (prefix, "L"))
26343 arm_ccfsm_state = 0;
26344 arm_target_insn = NULL;
26346 default_internal_label (stream, prefix, labelno);
26349 /* Output code to add DELTA to the first argument, and then jump
26350 to FUNCTION. Used for C++ multiple inheritance. */
26352 static void
26353 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26354 HOST_WIDE_INT, tree function)
26356 static int thunk_label = 0;
26357 char label[256];
26358 char labelpc[256];
26359 int mi_delta = delta;
26360 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
26361 int shift = 0;
26362 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
26363 ? 1 : 0);
26364 if (mi_delta < 0)
26365 mi_delta = - mi_delta;
26367 final_start_function (emit_barrier (), file, 1);
26369 if (TARGET_THUMB1)
26371 int labelno = thunk_label++;
26372 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
26373 /* Thunks are entered in arm mode when available. */
26374 if (TARGET_THUMB1_ONLY)
26376 /* push r3 so we can use it as a temporary. */
26377 /* TODO: Omit this save if r3 is not used. */
26378 fputs ("\tpush {r3}\n", file);
26379 fputs ("\tldr\tr3, ", file);
26381 else
26383 fputs ("\tldr\tr12, ", file);
26385 assemble_name (file, label);
26386 fputc ('\n', file);
26387 if (flag_pic)
26389 /* If we are generating PIC, the ldr instruction below loads
26390 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
26391 the address of the add + 8, so we have:
26393 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
26394 = target + 1.
26396 Note that we have "+ 1" because some versions of GNU ld
26397 don't set the low bit of the result for R_ARM_REL32
26398 relocations against thumb function symbols.
26399 On ARMv6M this is +4, not +8. */
26400 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
26401 assemble_name (file, labelpc);
26402 fputs (":\n", file);
26403 if (TARGET_THUMB1_ONLY)
26405 /* This is 2 insns after the start of the thunk, so we know it
26406 is 4-byte aligned. */
26407 fputs ("\tadd\tr3, pc, r3\n", file);
26408 fputs ("\tmov r12, r3\n", file);
26410 else
26411 fputs ("\tadd\tr12, pc, r12\n", file);
26413 else if (TARGET_THUMB1_ONLY)
26414 fputs ("\tmov r12, r3\n", file);
26416 if (TARGET_THUMB1_ONLY)
26418 if (mi_delta > 255)
26420 fputs ("\tldr\tr3, ", file);
26421 assemble_name (file, label);
26422 fputs ("+4\n", file);
26423 asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
26424 mi_op, this_regno, this_regno);
26426 else if (mi_delta != 0)
26428 /* Thumb1 unified syntax requires s suffix in instruction name when
26429 one of the operands is immediate. */
26430 asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
26431 mi_op, this_regno, this_regno,
26432 mi_delta);
26435 else
26437 /* TODO: Use movw/movt for large constants when available. */
26438 while (mi_delta != 0)
26440 if ((mi_delta & (3 << shift)) == 0)
26441 shift += 2;
26442 else
26444 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
26445 mi_op, this_regno, this_regno,
26446 mi_delta & (0xff << shift));
26447 mi_delta &= ~(0xff << shift);
26448 shift += 8;
26452 if (TARGET_THUMB1)
26454 if (TARGET_THUMB1_ONLY)
26455 fputs ("\tpop\t{r3}\n", file);
26457 fprintf (file, "\tbx\tr12\n");
26458 ASM_OUTPUT_ALIGN (file, 2);
26459 assemble_name (file, label);
26460 fputs (":\n", file);
26461 if (flag_pic)
26463 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
26464 rtx tem = XEXP (DECL_RTL (function), 0);
26465 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
26466 pipeline offset is four rather than eight. Adjust the offset
26467 accordingly. */
26468 tem = plus_constant (GET_MODE (tem), tem,
26469 TARGET_THUMB1_ONLY ? -3 : -7);
26470 tem = gen_rtx_MINUS (GET_MODE (tem),
26471 tem,
26472 gen_rtx_SYMBOL_REF (Pmode,
26473 ggc_strdup (labelpc)));
26474 assemble_integer (tem, 4, BITS_PER_WORD, 1);
26476 else
26477 /* Output ".word .LTHUNKn". */
26478 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
26480 if (TARGET_THUMB1_ONLY && mi_delta > 255)
26481 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
26483 else
26485 fputs ("\tb\t", file);
26486 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
26487 if (NEED_PLT_RELOC)
26488 fputs ("(PLT)", file);
26489 fputc ('\n', file);
26492 final_end_function ();
26495 /* MI thunk handling for TARGET_32BIT. */
26497 static void
26498 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26499 HOST_WIDE_INT vcall_offset, tree function)
26501 /* On ARM, this_regno is R0 or R1 depending on
26502 whether the function returns an aggregate or not.
26504 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
26505 function)
26506 ? R1_REGNUM : R0_REGNUM);
26508 rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
26509 rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
26510 reload_completed = 1;
26511 emit_note (NOTE_INSN_PROLOGUE_END);
26513 /* Add DELTA to THIS_RTX. */
26514 if (delta != 0)
26515 arm_split_constant (PLUS, Pmode, NULL_RTX,
26516 delta, this_rtx, this_rtx, false);
26518 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
26519 if (vcall_offset != 0)
26521 /* Load *THIS_RTX. */
26522 emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
26523 /* Compute *THIS_RTX + VCALL_OFFSET. */
26524 arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
26525 false);
26526 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
26527 emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
26528 emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
26531 /* Generate a tail call to the target function. */
26532 if (!TREE_USED (function))
26534 assemble_external (function);
26535 TREE_USED (function) = 1;
26537 rtx funexp = XEXP (DECL_RTL (function), 0);
26538 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
26539 rtx_insn * insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
26540 SIBLING_CALL_P (insn) = 1;
26542 insn = get_insns ();
26543 shorten_branches (insn);
26544 final_start_function (insn, file, 1);
26545 final (insn, file, 1);
26546 final_end_function ();
26548 /* Stop pretending this is a post-reload pass. */
26549 reload_completed = 0;
26552 /* Output code to add DELTA to the first argument, and then jump
26553 to FUNCTION. Used for C++ multiple inheritance. */
26555 static void
26556 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
26557 HOST_WIDE_INT vcall_offset, tree function)
26559 if (TARGET_32BIT)
26560 arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
26561 else
26562 arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
26566 arm_emit_vector_const (FILE *file, rtx x)
26568 int i;
26569 const char * pattern;
26571 gcc_assert (GET_CODE (x) == CONST_VECTOR);
26573 switch (GET_MODE (x))
26575 case E_V2SImode: pattern = "%08x"; break;
26576 case E_V4HImode: pattern = "%04x"; break;
26577 case E_V8QImode: pattern = "%02x"; break;
26578 default: gcc_unreachable ();
26581 fprintf (file, "0x");
26582 for (i = CONST_VECTOR_NUNITS (x); i--;)
26584 rtx element;
26586 element = CONST_VECTOR_ELT (x, i);
26587 fprintf (file, pattern, INTVAL (element));
26590 return 1;
26593 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
26594 HFmode constant pool entries are actually loaded with ldr. */
26595 void
26596 arm_emit_fp16_const (rtx c)
26598 long bits;
26600 bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
26601 if (WORDS_BIG_ENDIAN)
26602 assemble_zeros (2);
26603 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
26604 if (!WORDS_BIG_ENDIAN)
26605 assemble_zeros (2);
26608 const char *
26609 arm_output_load_gr (rtx *operands)
26611 rtx reg;
26612 rtx offset;
26613 rtx wcgr;
26614 rtx sum;
26616 if (!MEM_P (operands [1])
26617 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
26618 || !REG_P (reg = XEXP (sum, 0))
26619 || !CONST_INT_P (offset = XEXP (sum, 1))
26620 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
26621 return "wldrw%?\t%0, %1";
26623 /* Fix up an out-of-range load of a GR register. */
26624 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
26625 wcgr = operands[0];
26626 operands[0] = reg;
26627 output_asm_insn ("ldr%?\t%0, %1", operands);
26629 operands[0] = wcgr;
26630 operands[1] = reg;
26631 output_asm_insn ("tmcr%?\t%0, %1", operands);
26632 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
26634 return "";
26637 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
26639 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
26640 named arg and all anonymous args onto the stack.
26641 XXX I know the prologue shouldn't be pushing registers, but it is faster
26642 that way. */
26644 static void
26645 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
26646 machine_mode mode,
26647 tree type,
26648 int *pretend_size,
26649 int second_time ATTRIBUTE_UNUSED)
26651 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
26652 int nregs;
26654 cfun->machine->uses_anonymous_args = 1;
26655 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
26657 nregs = pcum->aapcs_ncrn;
26658 if (nregs & 1)
26660 int res = arm_needs_doubleword_align (mode, type);
26661 if (res < 0 && warn_psabi)
26662 inform (input_location, "parameter passing for argument of "
26663 "type %qT changed in GCC 7.1", type);
26664 else if (res > 0)
26665 nregs++;
26668 else
26669 nregs = pcum->nregs;
26671 if (nregs < NUM_ARG_REGS)
26672 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
26675 /* We can't rely on the caller doing the proper promotion when
26676 using APCS or ATPCS. */
26678 static bool
26679 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
26681 return !TARGET_AAPCS_BASED;
26684 static machine_mode
26685 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
26686 machine_mode mode,
26687 int *punsignedp ATTRIBUTE_UNUSED,
26688 const_tree fntype ATTRIBUTE_UNUSED,
26689 int for_return ATTRIBUTE_UNUSED)
26691 if (GET_MODE_CLASS (mode) == MODE_INT
26692 && GET_MODE_SIZE (mode) < 4)
26693 return SImode;
26695 return mode;
26699 static bool
26700 arm_default_short_enums (void)
26702 return ARM_DEFAULT_SHORT_ENUMS;
26706 /* AAPCS requires that anonymous bitfields affect structure alignment. */
26708 static bool
26709 arm_align_anon_bitfield (void)
26711 return TARGET_AAPCS_BASED;
26715 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
26717 static tree
26718 arm_cxx_guard_type (void)
26720 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
26724 /* The EABI says test the least significant bit of a guard variable. */
26726 static bool
26727 arm_cxx_guard_mask_bit (void)
26729 return TARGET_AAPCS_BASED;
26733 /* The EABI specifies that all array cookies are 8 bytes long. */
26735 static tree
26736 arm_get_cookie_size (tree type)
26738 tree size;
26740 if (!TARGET_AAPCS_BASED)
26741 return default_cxx_get_cookie_size (type);
26743 size = build_int_cst (sizetype, 8);
26744 return size;
26748 /* The EABI says that array cookies should also contain the element size. */
26750 static bool
26751 arm_cookie_has_size (void)
26753 return TARGET_AAPCS_BASED;
26757 /* The EABI says constructors and destructors should return a pointer to
26758 the object constructed/destroyed. */
26760 static bool
26761 arm_cxx_cdtor_returns_this (void)
26763 return TARGET_AAPCS_BASED;
26766 /* The EABI says that an inline function may never be the key
26767 method. */
26769 static bool
26770 arm_cxx_key_method_may_be_inline (void)
26772 return !TARGET_AAPCS_BASED;
26775 static void
26776 arm_cxx_determine_class_data_visibility (tree decl)
26778 if (!TARGET_AAPCS_BASED
26779 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
26780 return;
26782 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
26783 is exported. However, on systems without dynamic vague linkage,
26784 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
26785 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
26786 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
26787 else
26788 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
26789 DECL_VISIBILITY_SPECIFIED (decl) = 1;
26792 static bool
26793 arm_cxx_class_data_always_comdat (void)
26795 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26796 vague linkage if the class has no key function. */
26797 return !TARGET_AAPCS_BASED;
26801 /* The EABI says __aeabi_atexit should be used to register static
26802 destructors. */
26804 static bool
26805 arm_cxx_use_aeabi_atexit (void)
26807 return TARGET_AAPCS_BASED;
26811 void
26812 arm_set_return_address (rtx source, rtx scratch)
26814 arm_stack_offsets *offsets;
26815 HOST_WIDE_INT delta;
26816 rtx addr;
26817 unsigned long saved_regs;
26819 offsets = arm_get_frame_offsets ();
26820 saved_regs = offsets->saved_regs_mask;
26822 if ((saved_regs & (1 << LR_REGNUM)) == 0)
26823 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26824 else
26826 if (frame_pointer_needed)
26827 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
26828 else
26830 /* LR will be the first saved register. */
26831 delta = offsets->outgoing_args - (offsets->frame + 4);
26834 if (delta >= 4096)
26836 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
26837 GEN_INT (delta & ~4095)));
26838 addr = scratch;
26839 delta &= 4095;
26841 else
26842 addr = stack_pointer_rtx;
26844 addr = plus_constant (Pmode, addr, delta);
26846 /* The store needs to be marked as frame related in order to prevent
26847 DSE from deleting it as dead if it is based on fp. */
26848 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26849 RTX_FRAME_RELATED_P (insn) = 1;
26850 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26855 void
26856 thumb_set_return_address (rtx source, rtx scratch)
26858 arm_stack_offsets *offsets;
26859 HOST_WIDE_INT delta;
26860 HOST_WIDE_INT limit;
26861 int reg;
26862 rtx addr;
26863 unsigned long mask;
26865 emit_use (source);
26867 offsets = arm_get_frame_offsets ();
26868 mask = offsets->saved_regs_mask;
26869 if (mask & (1 << LR_REGNUM))
26871 limit = 1024;
26872 /* Find the saved regs. */
26873 if (frame_pointer_needed)
26875 delta = offsets->soft_frame - offsets->saved_args;
26876 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
26877 if (TARGET_THUMB1)
26878 limit = 128;
26880 else
26882 delta = offsets->outgoing_args - offsets->saved_args;
26883 reg = SP_REGNUM;
26885 /* Allow for the stack frame. */
26886 if (TARGET_THUMB1 && TARGET_BACKTRACE)
26887 delta -= 16;
26888 /* The link register is always the first saved register. */
26889 delta -= 4;
26891 /* Construct the address. */
26892 addr = gen_rtx_REG (SImode, reg);
26893 if (delta > limit)
26895 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
26896 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
26897 addr = scratch;
26899 else
26900 addr = plus_constant (Pmode, addr, delta);
26902 /* The store needs to be marked as frame related in order to prevent
26903 DSE from deleting it as dead if it is based on fp. */
26904 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26905 RTX_FRAME_RELATED_P (insn) = 1;
26906 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26908 else
26909 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26912 /* Implements target hook vector_mode_supported_p. */
26913 bool
26914 arm_vector_mode_supported_p (machine_mode mode)
26916 /* Neon also supports V2SImode, etc. listed in the clause below. */
26917 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
26918 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
26919 || mode == V2DImode || mode == V8HFmode))
26920 return true;
26922 if ((TARGET_NEON || TARGET_IWMMXT)
26923 && ((mode == V2SImode)
26924 || (mode == V4HImode)
26925 || (mode == V8QImode)))
26926 return true;
26928 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
26929 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
26930 || mode == V2HAmode))
26931 return true;
26933 return false;
26936 /* Implements target hook array_mode_supported_p. */
26938 static bool
26939 arm_array_mode_supported_p (machine_mode mode,
26940 unsigned HOST_WIDE_INT nelems)
26942 if (TARGET_NEON
26943 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
26944 && (nelems >= 2 && nelems <= 4))
26945 return true;
26947 return false;
26950 /* Use the option -mvectorize-with-neon-double to override the use of quardword
26951 registers when autovectorizing for Neon, at least until multiple vector
26952 widths are supported properly by the middle-end. */
26954 static machine_mode
26955 arm_preferred_simd_mode (scalar_mode mode)
26957 if (TARGET_NEON)
26958 switch (mode)
26960 case E_SFmode:
26961 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
26962 case E_SImode:
26963 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
26964 case E_HImode:
26965 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
26966 case E_QImode:
26967 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
26968 case E_DImode:
26969 if (!TARGET_NEON_VECTORIZE_DOUBLE)
26970 return V2DImode;
26971 break;
26973 default:;
26976 if (TARGET_REALLY_IWMMXT)
26977 switch (mode)
26979 case E_SImode:
26980 return V2SImode;
26981 case E_HImode:
26982 return V4HImode;
26983 case E_QImode:
26984 return V8QImode;
26986 default:;
26989 return word_mode;
26992 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
26994 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
26995 using r0-r4 for function arguments, r7 for the stack frame and don't have
26996 enough left over to do doubleword arithmetic. For Thumb-2 all the
26997 potentially problematic instructions accept high registers so this is not
26998 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
26999 that require many low registers. */
27000 static bool
27001 arm_class_likely_spilled_p (reg_class_t rclass)
27003 if ((TARGET_THUMB1 && rclass == LO_REGS)
27004 || rclass == CC_REG)
27005 return true;
27007 return false;
27010 /* Implements target hook small_register_classes_for_mode_p. */
27011 bool
27012 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
27014 return TARGET_THUMB1;
27017 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
27018 ARM insns and therefore guarantee that the shift count is modulo 256.
27019 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
27020 guarantee no particular behavior for out-of-range counts. */
27022 static unsigned HOST_WIDE_INT
27023 arm_shift_truncation_mask (machine_mode mode)
27025 return mode == SImode ? 255 : 0;
27029 /* Map internal gcc register numbers to DWARF2 register numbers. */
27031 unsigned int
27032 arm_dbx_register_number (unsigned int regno)
27034 if (regno < 16)
27035 return regno;
27037 if (IS_VFP_REGNUM (regno))
27039 /* See comment in arm_dwarf_register_span. */
27040 if (VFP_REGNO_OK_FOR_SINGLE (regno))
27041 return 64 + regno - FIRST_VFP_REGNUM;
27042 else
27043 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
27046 if (IS_IWMMXT_GR_REGNUM (regno))
27047 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
27049 if (IS_IWMMXT_REGNUM (regno))
27050 return 112 + regno - FIRST_IWMMXT_REGNUM;
27052 return DWARF_FRAME_REGISTERS;
27055 /* Dwarf models VFPv3 registers as 32 64-bit registers.
27056 GCC models tham as 64 32-bit registers, so we need to describe this to
27057 the DWARF generation code. Other registers can use the default. */
27058 static rtx
27059 arm_dwarf_register_span (rtx rtl)
27061 machine_mode mode;
27062 unsigned regno;
27063 rtx parts[16];
27064 int nregs;
27065 int i;
27067 regno = REGNO (rtl);
27068 if (!IS_VFP_REGNUM (regno))
27069 return NULL_RTX;
27071 /* XXX FIXME: The EABI defines two VFP register ranges:
27072 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
27073 256-287: D0-D31
27074 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
27075 corresponding D register. Until GDB supports this, we shall use the
27076 legacy encodings. We also use these encodings for D0-D15 for
27077 compatibility with older debuggers. */
27078 mode = GET_MODE (rtl);
27079 if (GET_MODE_SIZE (mode) < 8)
27080 return NULL_RTX;
27082 if (VFP_REGNO_OK_FOR_SINGLE (regno))
27084 nregs = GET_MODE_SIZE (mode) / 4;
27085 for (i = 0; i < nregs; i += 2)
27086 if (TARGET_BIG_END)
27088 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
27089 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
27091 else
27093 parts[i] = gen_rtx_REG (SImode, regno + i);
27094 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
27097 else
27099 nregs = GET_MODE_SIZE (mode) / 8;
27100 for (i = 0; i < nregs; i++)
27101 parts[i] = gen_rtx_REG (DImode, regno + i);
27104 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
27107 #if ARM_UNWIND_INFO
27108 /* Emit unwind directives for a store-multiple instruction or stack pointer
27109 push during alignment.
27110 These should only ever be generated by the function prologue code, so
27111 expect them to have a particular form.
27112 The store-multiple instruction sometimes pushes pc as the last register,
27113 although it should not be tracked into unwind information, or for -Os
27114 sometimes pushes some dummy registers before first register that needs
27115 to be tracked in unwind information; such dummy registers are there just
27116 to avoid separate stack adjustment, and will not be restored in the
27117 epilogue. */
27119 static void
27120 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
27122 int i;
27123 HOST_WIDE_INT offset;
27124 HOST_WIDE_INT nregs;
27125 int reg_size;
27126 unsigned reg;
27127 unsigned lastreg;
27128 unsigned padfirst = 0, padlast = 0;
27129 rtx e;
27131 e = XVECEXP (p, 0, 0);
27132 gcc_assert (GET_CODE (e) == SET);
27134 /* First insn will adjust the stack pointer. */
27135 gcc_assert (GET_CODE (e) == SET
27136 && REG_P (SET_DEST (e))
27137 && REGNO (SET_DEST (e)) == SP_REGNUM
27138 && GET_CODE (SET_SRC (e)) == PLUS);
27140 offset = -INTVAL (XEXP (SET_SRC (e), 1));
27141 nregs = XVECLEN (p, 0) - 1;
27142 gcc_assert (nregs);
27144 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
27145 if (reg < 16)
27147 /* For -Os dummy registers can be pushed at the beginning to
27148 avoid separate stack pointer adjustment. */
27149 e = XVECEXP (p, 0, 1);
27150 e = XEXP (SET_DEST (e), 0);
27151 if (GET_CODE (e) == PLUS)
27152 padfirst = INTVAL (XEXP (e, 1));
27153 gcc_assert (padfirst == 0 || optimize_size);
27154 /* The function prologue may also push pc, but not annotate it as it is
27155 never restored. We turn this into a stack pointer adjustment. */
27156 e = XVECEXP (p, 0, nregs);
27157 e = XEXP (SET_DEST (e), 0);
27158 if (GET_CODE (e) == PLUS)
27159 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
27160 else
27161 padlast = offset - 4;
27162 gcc_assert (padlast == 0 || padlast == 4);
27163 if (padlast == 4)
27164 fprintf (asm_out_file, "\t.pad #4\n");
27165 reg_size = 4;
27166 fprintf (asm_out_file, "\t.save {");
27168 else if (IS_VFP_REGNUM (reg))
27170 reg_size = 8;
27171 fprintf (asm_out_file, "\t.vsave {");
27173 else
27174 /* Unknown register type. */
27175 gcc_unreachable ();
27177 /* If the stack increment doesn't match the size of the saved registers,
27178 something has gone horribly wrong. */
27179 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
27181 offset = padfirst;
27182 lastreg = 0;
27183 /* The remaining insns will describe the stores. */
27184 for (i = 1; i <= nregs; i++)
27186 /* Expect (set (mem <addr>) (reg)).
27187 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
27188 e = XVECEXP (p, 0, i);
27189 gcc_assert (GET_CODE (e) == SET
27190 && MEM_P (SET_DEST (e))
27191 && REG_P (SET_SRC (e)));
27193 reg = REGNO (SET_SRC (e));
27194 gcc_assert (reg >= lastreg);
27196 if (i != 1)
27197 fprintf (asm_out_file, ", ");
27198 /* We can't use %r for vfp because we need to use the
27199 double precision register names. */
27200 if (IS_VFP_REGNUM (reg))
27201 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
27202 else
27203 asm_fprintf (asm_out_file, "%r", reg);
27205 if (flag_checking)
27207 /* Check that the addresses are consecutive. */
27208 e = XEXP (SET_DEST (e), 0);
27209 if (GET_CODE (e) == PLUS)
27210 gcc_assert (REG_P (XEXP (e, 0))
27211 && REGNO (XEXP (e, 0)) == SP_REGNUM
27212 && CONST_INT_P (XEXP (e, 1))
27213 && offset == INTVAL (XEXP (e, 1)));
27214 else
27215 gcc_assert (i == 1
27216 && REG_P (e)
27217 && REGNO (e) == SP_REGNUM);
27218 offset += reg_size;
27221 fprintf (asm_out_file, "}\n");
27222 if (padfirst)
27223 fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
27226 /* Emit unwind directives for a SET. */
27228 static void
27229 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
27231 rtx e0;
27232 rtx e1;
27233 unsigned reg;
27235 e0 = XEXP (p, 0);
27236 e1 = XEXP (p, 1);
27237 switch (GET_CODE (e0))
27239 case MEM:
27240 /* Pushing a single register. */
27241 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
27242 || !REG_P (XEXP (XEXP (e0, 0), 0))
27243 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
27244 abort ();
27246 asm_fprintf (asm_out_file, "\t.save ");
27247 if (IS_VFP_REGNUM (REGNO (e1)))
27248 asm_fprintf(asm_out_file, "{d%d}\n",
27249 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
27250 else
27251 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
27252 break;
27254 case REG:
27255 if (REGNO (e0) == SP_REGNUM)
27257 /* A stack increment. */
27258 if (GET_CODE (e1) != PLUS
27259 || !REG_P (XEXP (e1, 0))
27260 || REGNO (XEXP (e1, 0)) != SP_REGNUM
27261 || !CONST_INT_P (XEXP (e1, 1)))
27262 abort ();
27264 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
27265 -INTVAL (XEXP (e1, 1)));
27267 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
27269 HOST_WIDE_INT offset;
27271 if (GET_CODE (e1) == PLUS)
27273 if (!REG_P (XEXP (e1, 0))
27274 || !CONST_INT_P (XEXP (e1, 1)))
27275 abort ();
27276 reg = REGNO (XEXP (e1, 0));
27277 offset = INTVAL (XEXP (e1, 1));
27278 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
27279 HARD_FRAME_POINTER_REGNUM, reg,
27280 offset);
27282 else if (REG_P (e1))
27284 reg = REGNO (e1);
27285 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
27286 HARD_FRAME_POINTER_REGNUM, reg);
27288 else
27289 abort ();
27291 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
27293 /* Move from sp to reg. */
27294 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
27296 else if (GET_CODE (e1) == PLUS
27297 && REG_P (XEXP (e1, 0))
27298 && REGNO (XEXP (e1, 0)) == SP_REGNUM
27299 && CONST_INT_P (XEXP (e1, 1)))
27301 /* Set reg to offset from sp. */
27302 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
27303 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
27305 else
27306 abort ();
27307 break;
27309 default:
27310 abort ();
27315 /* Emit unwind directives for the given insn. */
27317 static void
27318 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
27320 rtx note, pat;
27321 bool handled_one = false;
27323 if (arm_except_unwind_info (&global_options) != UI_TARGET)
27324 return;
27326 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27327 && (TREE_NOTHROW (current_function_decl)
27328 || crtl->all_throwers_are_sibcalls))
27329 return;
27331 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
27332 return;
27334 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
27336 switch (REG_NOTE_KIND (note))
27338 case REG_FRAME_RELATED_EXPR:
27339 pat = XEXP (note, 0);
27340 goto found;
27342 case REG_CFA_REGISTER:
27343 pat = XEXP (note, 0);
27344 if (pat == NULL)
27346 pat = PATTERN (insn);
27347 if (GET_CODE (pat) == PARALLEL)
27348 pat = XVECEXP (pat, 0, 0);
27351 /* Only emitted for IS_STACKALIGN re-alignment. */
27353 rtx dest, src;
27354 unsigned reg;
27356 src = SET_SRC (pat);
27357 dest = SET_DEST (pat);
27359 gcc_assert (src == stack_pointer_rtx);
27360 reg = REGNO (dest);
27361 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
27362 reg + 0x90, reg);
27364 handled_one = true;
27365 break;
27367 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
27368 to get correct dwarf information for shrink-wrap. We should not
27369 emit unwind information for it because these are used either for
27370 pretend arguments or notes to adjust sp and restore registers from
27371 stack. */
27372 case REG_CFA_DEF_CFA:
27373 case REG_CFA_ADJUST_CFA:
27374 case REG_CFA_RESTORE:
27375 return;
27377 case REG_CFA_EXPRESSION:
27378 case REG_CFA_OFFSET:
27379 /* ??? Only handling here what we actually emit. */
27380 gcc_unreachable ();
27382 default:
27383 break;
27386 if (handled_one)
27387 return;
27388 pat = PATTERN (insn);
27389 found:
27391 switch (GET_CODE (pat))
27393 case SET:
27394 arm_unwind_emit_set (asm_out_file, pat);
27395 break;
27397 case SEQUENCE:
27398 /* Store multiple. */
27399 arm_unwind_emit_sequence (asm_out_file, pat);
27400 break;
27402 default:
27403 abort();
27408 /* Output a reference from a function exception table to the type_info
27409 object X. The EABI specifies that the symbol should be relocated by
27410 an R_ARM_TARGET2 relocation. */
27412 static bool
27413 arm_output_ttype (rtx x)
27415 fputs ("\t.word\t", asm_out_file);
27416 output_addr_const (asm_out_file, x);
27417 /* Use special relocations for symbol references. */
27418 if (!CONST_INT_P (x))
27419 fputs ("(TARGET2)", asm_out_file);
27420 fputc ('\n', asm_out_file);
27422 return TRUE;
27425 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
27427 static void
27428 arm_asm_emit_except_personality (rtx personality)
27430 fputs ("\t.personality\t", asm_out_file);
27431 output_addr_const (asm_out_file, personality);
27432 fputc ('\n', asm_out_file);
27434 #endif /* ARM_UNWIND_INFO */
27436 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
27438 static void
27439 arm_asm_init_sections (void)
27441 #if ARM_UNWIND_INFO
27442 exception_section = get_unnamed_section (0, output_section_asm_op,
27443 "\t.handlerdata");
27444 #endif /* ARM_UNWIND_INFO */
27446 #ifdef OBJECT_FORMAT_ELF
27447 if (target_pure_code)
27448 text_section->unnamed.data = "\t.section .text,\"0x20000006\",%progbits";
27449 #endif
27452 /* Output unwind directives for the start/end of a function. */
27454 void
27455 arm_output_fn_unwind (FILE * f, bool prologue)
27457 if (arm_except_unwind_info (&global_options) != UI_TARGET)
27458 return;
27460 if (prologue)
27461 fputs ("\t.fnstart\n", f);
27462 else
27464 /* If this function will never be unwound, then mark it as such.
27465 The came condition is used in arm_unwind_emit to suppress
27466 the frame annotations. */
27467 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27468 && (TREE_NOTHROW (current_function_decl)
27469 || crtl->all_throwers_are_sibcalls))
27470 fputs("\t.cantunwind\n", f);
27472 fputs ("\t.fnend\n", f);
27476 static bool
27477 arm_emit_tls_decoration (FILE *fp, rtx x)
27479 enum tls_reloc reloc;
27480 rtx val;
27482 val = XVECEXP (x, 0, 0);
27483 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
27485 output_addr_const (fp, val);
27487 switch (reloc)
27489 case TLS_GD32:
27490 fputs ("(tlsgd)", fp);
27491 break;
27492 case TLS_LDM32:
27493 fputs ("(tlsldm)", fp);
27494 break;
27495 case TLS_LDO32:
27496 fputs ("(tlsldo)", fp);
27497 break;
27498 case TLS_IE32:
27499 fputs ("(gottpoff)", fp);
27500 break;
27501 case TLS_LE32:
27502 fputs ("(tpoff)", fp);
27503 break;
27504 case TLS_DESCSEQ:
27505 fputs ("(tlsdesc)", fp);
27506 break;
27507 default:
27508 gcc_unreachable ();
27511 switch (reloc)
27513 case TLS_GD32:
27514 case TLS_LDM32:
27515 case TLS_IE32:
27516 case TLS_DESCSEQ:
27517 fputs (" + (. - ", fp);
27518 output_addr_const (fp, XVECEXP (x, 0, 2));
27519 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
27520 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
27521 output_addr_const (fp, XVECEXP (x, 0, 3));
27522 fputc (')', fp);
27523 break;
27524 default:
27525 break;
27528 return TRUE;
27531 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
27533 static void
27534 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
27536 gcc_assert (size == 4);
27537 fputs ("\t.word\t", file);
27538 output_addr_const (file, x);
27539 fputs ("(tlsldo)", file);
27542 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
27544 static bool
27545 arm_output_addr_const_extra (FILE *fp, rtx x)
27547 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
27548 return arm_emit_tls_decoration (fp, x);
27549 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
27551 char label[256];
27552 int labelno = INTVAL (XVECEXP (x, 0, 0));
27554 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
27555 assemble_name_raw (fp, label);
27557 return TRUE;
27559 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
27561 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
27562 if (GOT_PCREL)
27563 fputs ("+.", fp);
27564 fputs ("-(", fp);
27565 output_addr_const (fp, XVECEXP (x, 0, 0));
27566 fputc (')', fp);
27567 return TRUE;
27569 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
27571 output_addr_const (fp, XVECEXP (x, 0, 0));
27572 if (GOT_PCREL)
27573 fputs ("+.", fp);
27574 fputs ("-(", fp);
27575 output_addr_const (fp, XVECEXP (x, 0, 1));
27576 fputc (')', fp);
27577 return TRUE;
27579 else if (GET_CODE (x) == CONST_VECTOR)
27580 return arm_emit_vector_const (fp, x);
27582 return FALSE;
27585 /* Output assembly for a shift instruction.
27586 SET_FLAGS determines how the instruction modifies the condition codes.
27587 0 - Do not set condition codes.
27588 1 - Set condition codes.
27589 2 - Use smallest instruction. */
27590 const char *
27591 arm_output_shift(rtx * operands, int set_flags)
27593 char pattern[100];
27594 static const char flag_chars[3] = {'?', '.', '!'};
27595 const char *shift;
27596 HOST_WIDE_INT val;
27597 char c;
27599 c = flag_chars[set_flags];
27600 shift = shift_op(operands[3], &val);
27601 if (shift)
27603 if (val != -1)
27604 operands[2] = GEN_INT(val);
27605 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
27607 else
27608 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
27610 output_asm_insn (pattern, operands);
27611 return "";
27614 /* Output assembly for a WMMX immediate shift instruction. */
27615 const char *
27616 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
27618 int shift = INTVAL (operands[2]);
27619 char templ[50];
27620 machine_mode opmode = GET_MODE (operands[0]);
27622 gcc_assert (shift >= 0);
27624 /* If the shift value in the register versions is > 63 (for D qualifier),
27625 31 (for W qualifier) or 15 (for H qualifier). */
27626 if (((opmode == V4HImode) && (shift > 15))
27627 || ((opmode == V2SImode) && (shift > 31))
27628 || ((opmode == DImode) && (shift > 63)))
27630 if (wror_or_wsra)
27632 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27633 output_asm_insn (templ, operands);
27634 if (opmode == DImode)
27636 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
27637 output_asm_insn (templ, operands);
27640 else
27642 /* The destination register will contain all zeros. */
27643 sprintf (templ, "wzero\t%%0");
27644 output_asm_insn (templ, operands);
27646 return "";
27649 if ((opmode == DImode) && (shift > 32))
27651 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27652 output_asm_insn (templ, operands);
27653 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
27654 output_asm_insn (templ, operands);
27656 else
27658 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
27659 output_asm_insn (templ, operands);
27661 return "";
27664 /* Output assembly for a WMMX tinsr instruction. */
27665 const char *
27666 arm_output_iwmmxt_tinsr (rtx *operands)
27668 int mask = INTVAL (operands[3]);
27669 int i;
27670 char templ[50];
27671 int units = mode_nunits[GET_MODE (operands[0])];
27672 gcc_assert ((mask & (mask - 1)) == 0);
27673 for (i = 0; i < units; ++i)
27675 if ((mask & 0x01) == 1)
27677 break;
27679 mask >>= 1;
27681 gcc_assert (i < units);
27683 switch (GET_MODE (operands[0]))
27685 case E_V8QImode:
27686 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
27687 break;
27688 case E_V4HImode:
27689 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
27690 break;
27691 case E_V2SImode:
27692 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
27693 break;
27694 default:
27695 gcc_unreachable ();
27696 break;
27698 output_asm_insn (templ, operands);
27700 return "";
27703 /* Output a Thumb-1 casesi dispatch sequence. */
27704 const char *
27705 thumb1_output_casesi (rtx *operands)
27707 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
27709 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27711 switch (GET_MODE(diff_vec))
27713 case E_QImode:
27714 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27715 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
27716 case E_HImode:
27717 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27718 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
27719 case E_SImode:
27720 return "bl\t%___gnu_thumb1_case_si";
27721 default:
27722 gcc_unreachable ();
27726 /* Output a Thumb-2 casesi instruction. */
27727 const char *
27728 thumb2_output_casesi (rtx *operands)
27730 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
27732 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27734 output_asm_insn ("cmp\t%0, %1", operands);
27735 output_asm_insn ("bhi\t%l3", operands);
27736 switch (GET_MODE(diff_vec))
27738 case E_QImode:
27739 return "tbb\t[%|pc, %0]";
27740 case E_HImode:
27741 return "tbh\t[%|pc, %0, lsl #1]";
27742 case E_SImode:
27743 if (flag_pic)
27745 output_asm_insn ("adr\t%4, %l2", operands);
27746 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
27747 output_asm_insn ("add\t%4, %4, %5", operands);
27748 return "bx\t%4";
27750 else
27752 output_asm_insn ("adr\t%4, %l2", operands);
27753 return "ldr\t%|pc, [%4, %0, lsl #2]";
27755 default:
27756 gcc_unreachable ();
27760 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
27761 per-core tuning structs. */
27762 static int
27763 arm_issue_rate (void)
27765 return current_tune->issue_rate;
27768 /* Return how many instructions should scheduler lookahead to choose the
27769 best one. */
27770 static int
27771 arm_first_cycle_multipass_dfa_lookahead (void)
27773 int issue_rate = arm_issue_rate ();
27775 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
27778 /* Enable modeling of L2 auto-prefetcher. */
27779 static int
27780 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
27782 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
27785 const char *
27786 arm_mangle_type (const_tree type)
27788 /* The ARM ABI documents (10th October 2008) say that "__va_list"
27789 has to be managled as if it is in the "std" namespace. */
27790 if (TARGET_AAPCS_BASED
27791 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
27792 return "St9__va_list";
27794 /* Half-precision float. */
27795 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
27796 return "Dh";
27798 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
27799 builtin type. */
27800 if (TYPE_NAME (type) != NULL)
27801 return arm_mangle_builtin_type (type);
27803 /* Use the default mangling. */
27804 return NULL;
27807 /* Order of allocation of core registers for Thumb: this allocation is
27808 written over the corresponding initial entries of the array
27809 initialized with REG_ALLOC_ORDER. We allocate all low registers
27810 first. Saving and restoring a low register is usually cheaper than
27811 using a call-clobbered high register. */
27813 static const int thumb_core_reg_alloc_order[] =
27815 3, 2, 1, 0, 4, 5, 6, 7,
27816 12, 14, 8, 9, 10, 11
27819 /* Adjust register allocation order when compiling for Thumb. */
27821 void
27822 arm_order_regs_for_local_alloc (void)
27824 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
27825 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
27826 if (TARGET_THUMB)
27827 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
27828 sizeof (thumb_core_reg_alloc_order));
27831 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
27833 bool
27834 arm_frame_pointer_required (void)
27836 if (SUBTARGET_FRAME_POINTER_REQUIRED)
27837 return true;
27839 /* If the function receives nonlocal gotos, it needs to save the frame
27840 pointer in the nonlocal_goto_save_area object. */
27841 if (cfun->has_nonlocal_label)
27842 return true;
27844 /* The frame pointer is required for non-leaf APCS frames. */
27845 if (TARGET_ARM && TARGET_APCS_FRAME && !crtl->is_leaf)
27846 return true;
27848 /* If we are probing the stack in the prologue, we will have a faulting
27849 instruction prior to the stack adjustment and this requires a frame
27850 pointer if we want to catch the exception using the EABI unwinder. */
27851 if (!IS_INTERRUPT (arm_current_func_type ())
27852 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK
27853 && arm_except_unwind_info (&global_options) == UI_TARGET
27854 && cfun->can_throw_non_call_exceptions)
27856 HOST_WIDE_INT size = get_frame_size ();
27858 /* That's irrelevant if there is no stack adjustment. */
27859 if (size <= 0)
27860 return false;
27862 /* That's relevant only if there is a stack probe. */
27863 if (crtl->is_leaf && !cfun->calls_alloca)
27865 /* We don't have the final size of the frame so adjust. */
27866 size += 32 * UNITS_PER_WORD;
27867 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
27868 return true;
27870 else
27871 return true;
27874 return false;
27877 /* Only thumb1 can't support conditional execution, so return true if
27878 the target is not thumb1. */
27879 static bool
27880 arm_have_conditional_execution (void)
27882 return !TARGET_THUMB1;
27885 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
27886 static HOST_WIDE_INT
27887 arm_vector_alignment (const_tree type)
27889 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
27891 if (TARGET_AAPCS_BASED)
27892 align = MIN (align, 64);
27894 return align;
27897 static unsigned int
27898 arm_autovectorize_vector_sizes (void)
27900 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
27903 static bool
27904 arm_vector_alignment_reachable (const_tree type, bool is_packed)
27906 /* Vectors which aren't in packed structures will not be less aligned than
27907 the natural alignment of their element type, so this is safe. */
27908 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27909 return !is_packed;
27911 return default_builtin_vector_alignment_reachable (type, is_packed);
27914 static bool
27915 arm_builtin_support_vector_misalignment (machine_mode mode,
27916 const_tree type, int misalignment,
27917 bool is_packed)
27919 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27921 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
27923 if (is_packed)
27924 return align == 1;
27926 /* If the misalignment is unknown, we should be able to handle the access
27927 so long as it is not to a member of a packed data structure. */
27928 if (misalignment == -1)
27929 return true;
27931 /* Return true if the misalignment is a multiple of the natural alignment
27932 of the vector's element type. This is probably always going to be
27933 true in practice, since we've already established that this isn't a
27934 packed access. */
27935 return ((misalignment % align) == 0);
27938 return default_builtin_support_vector_misalignment (mode, type, misalignment,
27939 is_packed);
27942 static void
27943 arm_conditional_register_usage (void)
27945 int regno;
27947 if (TARGET_THUMB1 && optimize_size)
27949 /* When optimizing for size on Thumb-1, it's better not
27950 to use the HI regs, because of the overhead of
27951 stacking them. */
27952 for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
27953 fixed_regs[regno] = call_used_regs[regno] = 1;
27956 /* The link register can be clobbered by any branch insn,
27957 but we have no way to track that at present, so mark
27958 it as unavailable. */
27959 if (TARGET_THUMB1)
27960 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
27962 if (TARGET_32BIT && TARGET_HARD_FLOAT)
27964 /* VFPv3 registers are disabled when earlier VFP
27965 versions are selected due to the definition of
27966 LAST_VFP_REGNUM. */
27967 for (regno = FIRST_VFP_REGNUM;
27968 regno <= LAST_VFP_REGNUM; ++ regno)
27970 fixed_regs[regno] = 0;
27971 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
27972 || regno >= FIRST_VFP_REGNUM + 32;
27976 if (TARGET_REALLY_IWMMXT)
27978 regno = FIRST_IWMMXT_GR_REGNUM;
27979 /* The 2002/10/09 revision of the XScale ABI has wCG0
27980 and wCG1 as call-preserved registers. The 2002/11/21
27981 revision changed this so that all wCG registers are
27982 scratch registers. */
27983 for (regno = FIRST_IWMMXT_GR_REGNUM;
27984 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
27985 fixed_regs[regno] = 0;
27986 /* The XScale ABI has wR0 - wR9 as scratch registers,
27987 the rest as call-preserved registers. */
27988 for (regno = FIRST_IWMMXT_REGNUM;
27989 regno <= LAST_IWMMXT_REGNUM; ++ regno)
27991 fixed_regs[regno] = 0;
27992 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
27996 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
27998 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27999 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
28001 else if (TARGET_APCS_STACK)
28003 fixed_regs[10] = 1;
28004 call_used_regs[10] = 1;
28006 /* -mcaller-super-interworking reserves r11 for calls to
28007 _interwork_r11_call_via_rN(). Making the register global
28008 is an easy way of ensuring that it remains valid for all
28009 calls. */
28010 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
28011 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
28013 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28014 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28015 if (TARGET_CALLER_INTERWORKING)
28016 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28018 SUBTARGET_CONDITIONAL_REGISTER_USAGE
28021 static reg_class_t
28022 arm_preferred_rename_class (reg_class_t rclass)
28024 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
28025 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
28026 and code size can be reduced. */
28027 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
28028 return LO_REGS;
28029 else
28030 return NO_REGS;
28033 /* Compute the attribute "length" of insn "*push_multi".
28034 So this function MUST be kept in sync with that insn pattern. */
28036 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
28038 int i, regno, hi_reg;
28039 int num_saves = XVECLEN (parallel_op, 0);
28041 /* ARM mode. */
28042 if (TARGET_ARM)
28043 return 4;
28044 /* Thumb1 mode. */
28045 if (TARGET_THUMB1)
28046 return 2;
28048 /* Thumb2 mode. */
28049 regno = REGNO (first_op);
28050 /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
28051 list is 8-bit. Normally this means all registers in the list must be
28052 LO_REGS, that is (R0 -R7). If any HI_REGS used, then we must use 32-bit
28053 encodings. There is one exception for PUSH that LR in HI_REGS can be used
28054 with 16-bit encoding. */
28055 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28056 for (i = 1; i < num_saves && !hi_reg; i++)
28058 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
28059 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28062 if (!hi_reg)
28063 return 2;
28064 return 4;
28067 /* Compute the attribute "length" of insn. Currently, this function is used
28068 for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
28069 "*pop_multiple_with_writeback_and_return". OPERANDS is the toplevel PARALLEL
28070 rtx, RETURN_PC is true if OPERANDS contains return insn. WRITE_BACK_P is
28071 true if OPERANDS contains insn which explicit updates base register. */
28074 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
28076 /* ARM mode. */
28077 if (TARGET_ARM)
28078 return 4;
28079 /* Thumb1 mode. */
28080 if (TARGET_THUMB1)
28081 return 2;
28083 rtx parallel_op = operands[0];
28084 /* Initialize to elements number of PARALLEL. */
28085 unsigned indx = XVECLEN (parallel_op, 0) - 1;
28086 /* Initialize the value to base register. */
28087 unsigned regno = REGNO (operands[1]);
28088 /* Skip return and write back pattern.
28089 We only need register pop pattern for later analysis. */
28090 unsigned first_indx = 0;
28091 first_indx += return_pc ? 1 : 0;
28092 first_indx += write_back_p ? 1 : 0;
28094 /* A pop operation can be done through LDM or POP. If the base register is SP
28095 and if it's with write back, then a LDM will be alias of POP. */
28096 bool pop_p = (regno == SP_REGNUM && write_back_p);
28097 bool ldm_p = !pop_p;
28099 /* Check base register for LDM. */
28100 if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
28101 return 4;
28103 /* Check each register in the list. */
28104 for (; indx >= first_indx; indx--)
28106 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
28107 /* For POP, PC in HI_REGS can be used with 16-bit encoding. See similar
28108 comment in arm_attr_length_push_multi. */
28109 if (REGNO_REG_CLASS (regno) == HI_REGS
28110 && (regno != PC_REGNUM || ldm_p))
28111 return 4;
28114 return 2;
28117 /* Compute the number of instructions emitted by output_move_double. */
28119 arm_count_output_move_double_insns (rtx *operands)
28121 int count;
28122 rtx ops[2];
28123 /* output_move_double may modify the operands array, so call it
28124 here on a copy of the array. */
28125 ops[0] = operands[0];
28126 ops[1] = operands[1];
28127 output_move_double (ops, false, &count);
28128 return count;
28132 vfp3_const_double_for_fract_bits (rtx operand)
28134 REAL_VALUE_TYPE r0;
28136 if (!CONST_DOUBLE_P (operand))
28137 return 0;
28139 r0 = *CONST_DOUBLE_REAL_VALUE (operand);
28140 if (exact_real_inverse (DFmode, &r0)
28141 && !REAL_VALUE_NEGATIVE (r0))
28143 if (exact_real_truncate (DFmode, &r0))
28145 HOST_WIDE_INT value = real_to_integer (&r0);
28146 value = value & 0xffffffff;
28147 if ((value != 0) && ( (value & (value - 1)) == 0))
28149 int ret = exact_log2 (value);
28150 gcc_assert (IN_RANGE (ret, 0, 31));
28151 return ret;
28155 return 0;
28158 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
28159 log2 is in [1, 32], return that log2. Otherwise return -1.
28160 This is used in the patterns for vcvt.s32.f32 floating-point to
28161 fixed-point conversions. */
28164 vfp3_const_double_for_bits (rtx x)
28166 const REAL_VALUE_TYPE *r;
28168 if (!CONST_DOUBLE_P (x))
28169 return -1;
28171 r = CONST_DOUBLE_REAL_VALUE (x);
28173 if (REAL_VALUE_NEGATIVE (*r)
28174 || REAL_VALUE_ISNAN (*r)
28175 || REAL_VALUE_ISINF (*r)
28176 || !real_isinteger (r, SFmode))
28177 return -1;
28179 HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
28181 /* The exact_log2 above will have returned -1 if this is
28182 not an exact log2. */
28183 if (!IN_RANGE (hwint, 1, 32))
28184 return -1;
28186 return hwint;
28190 /* Emit a memory barrier around an atomic sequence according to MODEL. */
28192 static void
28193 arm_pre_atomic_barrier (enum memmodel model)
28195 if (need_atomic_barrier_p (model, true))
28196 emit_insn (gen_memory_barrier ());
28199 static void
28200 arm_post_atomic_barrier (enum memmodel model)
28202 if (need_atomic_barrier_p (model, false))
28203 emit_insn (gen_memory_barrier ());
28206 /* Emit the load-exclusive and store-exclusive instructions.
28207 Use acquire and release versions if necessary. */
28209 static void
28210 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
28212 rtx (*gen) (rtx, rtx);
28214 if (acq)
28216 switch (mode)
28218 case E_QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
28219 case E_HImode: gen = gen_arm_load_acquire_exclusivehi; break;
28220 case E_SImode: gen = gen_arm_load_acquire_exclusivesi; break;
28221 case E_DImode: gen = gen_arm_load_acquire_exclusivedi; break;
28222 default:
28223 gcc_unreachable ();
28226 else
28228 switch (mode)
28230 case E_QImode: gen = gen_arm_load_exclusiveqi; break;
28231 case E_HImode: gen = gen_arm_load_exclusivehi; break;
28232 case E_SImode: gen = gen_arm_load_exclusivesi; break;
28233 case E_DImode: gen = gen_arm_load_exclusivedi; break;
28234 default:
28235 gcc_unreachable ();
28239 emit_insn (gen (rval, mem));
28242 static void
28243 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
28244 rtx mem, bool rel)
28246 rtx (*gen) (rtx, rtx, rtx);
28248 if (rel)
28250 switch (mode)
28252 case E_QImode: gen = gen_arm_store_release_exclusiveqi; break;
28253 case E_HImode: gen = gen_arm_store_release_exclusivehi; break;
28254 case E_SImode: gen = gen_arm_store_release_exclusivesi; break;
28255 case E_DImode: gen = gen_arm_store_release_exclusivedi; break;
28256 default:
28257 gcc_unreachable ();
28260 else
28262 switch (mode)
28264 case E_QImode: gen = gen_arm_store_exclusiveqi; break;
28265 case E_HImode: gen = gen_arm_store_exclusivehi; break;
28266 case E_SImode: gen = gen_arm_store_exclusivesi; break;
28267 case E_DImode: gen = gen_arm_store_exclusivedi; break;
28268 default:
28269 gcc_unreachable ();
28273 emit_insn (gen (bval, rval, mem));
28276 /* Mark the previous jump instruction as unlikely. */
28278 static void
28279 emit_unlikely_jump (rtx insn)
28281 rtx_insn *jump = emit_jump_insn (insn);
28282 add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
28285 /* Expand a compare and swap pattern. */
28287 void
28288 arm_expand_compare_and_swap (rtx operands[])
28290 rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
28291 machine_mode mode;
28292 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx);
28294 bval = operands[0];
28295 rval = operands[1];
28296 mem = operands[2];
28297 oldval = operands[3];
28298 newval = operands[4];
28299 is_weak = operands[5];
28300 mod_s = operands[6];
28301 mod_f = operands[7];
28302 mode = GET_MODE (mem);
28304 /* Normally the succ memory model must be stronger than fail, but in the
28305 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
28306 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
28308 if (TARGET_HAVE_LDACQ
28309 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
28310 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
28311 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
28313 switch (mode)
28315 case E_QImode:
28316 case E_HImode:
28317 /* For narrow modes, we're going to perform the comparison in SImode,
28318 so do the zero-extension now. */
28319 rval = gen_reg_rtx (SImode);
28320 oldval = convert_modes (SImode, mode, oldval, true);
28321 /* FALLTHRU */
28323 case E_SImode:
28324 /* Force the value into a register if needed. We waited until after
28325 the zero-extension above to do this properly. */
28326 if (!arm_add_operand (oldval, SImode))
28327 oldval = force_reg (SImode, oldval);
28328 break;
28330 case E_DImode:
28331 if (!cmpdi_operand (oldval, mode))
28332 oldval = force_reg (mode, oldval);
28333 break;
28335 default:
28336 gcc_unreachable ();
28339 if (TARGET_THUMB1)
28341 switch (mode)
28343 case E_QImode: gen = gen_atomic_compare_and_swapt1qi_1; break;
28344 case E_HImode: gen = gen_atomic_compare_and_swapt1hi_1; break;
28345 case E_SImode: gen = gen_atomic_compare_and_swapt1si_1; break;
28346 case E_DImode: gen = gen_atomic_compare_and_swapt1di_1; break;
28347 default:
28348 gcc_unreachable ();
28351 else
28353 switch (mode)
28355 case E_QImode: gen = gen_atomic_compare_and_swap32qi_1; break;
28356 case E_HImode: gen = gen_atomic_compare_and_swap32hi_1; break;
28357 case E_SImode: gen = gen_atomic_compare_and_swap32si_1; break;
28358 case E_DImode: gen = gen_atomic_compare_and_swap32di_1; break;
28359 default:
28360 gcc_unreachable ();
28364 bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CC_Zmode, CC_REGNUM);
28365 emit_insn (gen (bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f));
28367 if (mode == QImode || mode == HImode)
28368 emit_move_insn (operands[1], gen_lowpart (mode, rval));
28370 /* In all cases, we arrange for success to be signaled by Z set.
28371 This arrangement allows for the boolean result to be used directly
28372 in a subsequent branch, post optimization. For Thumb-1 targets, the
28373 boolean negation of the result is also stored in bval because Thumb-1
28374 backend lacks dependency tracking for CC flag due to flag-setting not
28375 being represented at RTL level. */
28376 if (TARGET_THUMB1)
28377 emit_insn (gen_cstoresi_eq0_thumb1 (bval, bdst));
28378 else
28380 x = gen_rtx_EQ (SImode, bdst, const0_rtx);
28381 emit_insn (gen_rtx_SET (bval, x));
28385 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
28386 another memory store between the load-exclusive and store-exclusive can
28387 reset the monitor from Exclusive to Open state. This means we must wait
28388 until after reload to split the pattern, lest we get a register spill in
28389 the middle of the atomic sequence. Success of the compare and swap is
28390 indicated by the Z flag set for 32bit targets and by neg_bval being zero
28391 for Thumb-1 targets (ie. negation of the boolean value returned by
28392 atomic_compare_and_swapmode standard pattern in operand 0). */
28394 void
28395 arm_split_compare_and_swap (rtx operands[])
28397 rtx rval, mem, oldval, newval, neg_bval;
28398 machine_mode mode;
28399 enum memmodel mod_s, mod_f;
28400 bool is_weak;
28401 rtx_code_label *label1, *label2;
28402 rtx x, cond;
28404 rval = operands[1];
28405 mem = operands[2];
28406 oldval = operands[3];
28407 newval = operands[4];
28408 is_weak = (operands[5] != const0_rtx);
28409 mod_s = memmodel_from_int (INTVAL (operands[6]));
28410 mod_f = memmodel_from_int (INTVAL (operands[7]));
28411 neg_bval = TARGET_THUMB1 ? operands[0] : operands[8];
28412 mode = GET_MODE (mem);
28414 bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
28416 bool use_acquire = TARGET_HAVE_LDACQ
28417 && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28418 || is_mm_release (mod_s));
28420 bool use_release = TARGET_HAVE_LDACQ
28421 && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28422 || is_mm_acquire (mod_s));
28424 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
28425 a full barrier is emitted after the store-release. */
28426 if (is_armv8_sync)
28427 use_acquire = false;
28429 /* Checks whether a barrier is needed and emits one accordingly. */
28430 if (!(use_acquire || use_release))
28431 arm_pre_atomic_barrier (mod_s);
28433 label1 = NULL;
28434 if (!is_weak)
28436 label1 = gen_label_rtx ();
28437 emit_label (label1);
28439 label2 = gen_label_rtx ();
28441 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
28443 /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
28444 as required to communicate with arm_expand_compare_and_swap. */
28445 if (TARGET_32BIT)
28447 cond = arm_gen_compare_reg (NE, rval, oldval, neg_bval);
28448 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28449 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
28450 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
28451 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
28453 else
28455 emit_move_insn (neg_bval, const1_rtx);
28456 cond = gen_rtx_NE (VOIDmode, rval, oldval);
28457 if (thumb1_cmpneg_operand (oldval, SImode))
28458 emit_unlikely_jump (gen_cbranchsi4_scratch (neg_bval, rval, oldval,
28459 label2, cond));
28460 else
28461 emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2));
28464 arm_emit_store_exclusive (mode, neg_bval, mem, newval, use_release);
28466 /* Weak or strong, we want EQ to be true for success, so that we
28467 match the flags that we got from the compare above. */
28468 if (TARGET_32BIT)
28470 cond = gen_rtx_REG (CCmode, CC_REGNUM);
28471 x = gen_rtx_COMPARE (CCmode, neg_bval, const0_rtx);
28472 emit_insn (gen_rtx_SET (cond, x));
28475 if (!is_weak)
28477 /* Z is set to boolean value of !neg_bval, as required to communicate
28478 with arm_expand_compare_and_swap. */
28479 x = gen_rtx_NE (VOIDmode, neg_bval, const0_rtx);
28480 emit_unlikely_jump (gen_cbranchsi4 (x, neg_bval, const0_rtx, label1));
28483 if (!is_mm_relaxed (mod_f))
28484 emit_label (label2);
28486 /* Checks whether a barrier is needed and emits one accordingly. */
28487 if (is_armv8_sync
28488 || !(use_acquire || use_release))
28489 arm_post_atomic_barrier (mod_s);
28491 if (is_mm_relaxed (mod_f))
28492 emit_label (label2);
28495 /* Split an atomic operation pattern. Operation is given by CODE and is one
28496 of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
28497 operation). Operation is performed on the content at MEM and on VALUE
28498 following the memory model MODEL_RTX. The content at MEM before and after
28499 the operation is returned in OLD_OUT and NEW_OUT respectively while the
28500 success of the operation is returned in COND. Using a scratch register or
28501 an operand register for these determines what result is returned for that
28502 pattern. */
28504 void
28505 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
28506 rtx value, rtx model_rtx, rtx cond)
28508 enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
28509 machine_mode mode = GET_MODE (mem);
28510 machine_mode wmode = (mode == DImode ? DImode : SImode);
28511 rtx_code_label *label;
28512 bool all_low_regs, bind_old_new;
28513 rtx x;
28515 bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
28517 bool use_acquire = TARGET_HAVE_LDACQ
28518 && !(is_mm_relaxed (model) || is_mm_consume (model)
28519 || is_mm_release (model));
28521 bool use_release = TARGET_HAVE_LDACQ
28522 && !(is_mm_relaxed (model) || is_mm_consume (model)
28523 || is_mm_acquire (model));
28525 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
28526 a full barrier is emitted after the store-release. */
28527 if (is_armv8_sync)
28528 use_acquire = false;
28530 /* Checks whether a barrier is needed and emits one accordingly. */
28531 if (!(use_acquire || use_release))
28532 arm_pre_atomic_barrier (model);
28534 label = gen_label_rtx ();
28535 emit_label (label);
28537 if (new_out)
28538 new_out = gen_lowpart (wmode, new_out);
28539 if (old_out)
28540 old_out = gen_lowpart (wmode, old_out);
28541 else
28542 old_out = new_out;
28543 value = simplify_gen_subreg (wmode, value, mode, 0);
28545 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
28547 /* Does the operation require destination and first operand to use the same
28548 register? This is decided by register constraints of relevant insn
28549 patterns in thumb1.md. */
28550 gcc_assert (!new_out || REG_P (new_out));
28551 all_low_regs = REG_P (value) && REGNO_REG_CLASS (REGNO (value)) == LO_REGS
28552 && new_out && REGNO_REG_CLASS (REGNO (new_out)) == LO_REGS
28553 && REGNO_REG_CLASS (REGNO (old_out)) == LO_REGS;
28554 bind_old_new =
28555 (TARGET_THUMB1
28556 && code != SET
28557 && code != MINUS
28558 && (code != PLUS || (!all_low_regs && !satisfies_constraint_L (value))));
28560 /* We want to return the old value while putting the result of the operation
28561 in the same register as the old value so copy the old value over to the
28562 destination register and use that register for the operation. */
28563 if (old_out && bind_old_new)
28565 emit_move_insn (new_out, old_out);
28566 old_out = new_out;
28569 switch (code)
28571 case SET:
28572 new_out = value;
28573 break;
28575 case NOT:
28576 x = gen_rtx_AND (wmode, old_out, value);
28577 emit_insn (gen_rtx_SET (new_out, x));
28578 x = gen_rtx_NOT (wmode, new_out);
28579 emit_insn (gen_rtx_SET (new_out, x));
28580 break;
28582 case MINUS:
28583 if (CONST_INT_P (value))
28585 value = GEN_INT (-INTVAL (value));
28586 code = PLUS;
28588 /* FALLTHRU */
28590 case PLUS:
28591 if (mode == DImode)
28593 /* DImode plus/minus need to clobber flags. */
28594 /* The adddi3 and subdi3 patterns are incorrectly written so that
28595 they require matching operands, even when we could easily support
28596 three operands. Thankfully, this can be fixed up post-splitting,
28597 as the individual add+adc patterns do accept three operands and
28598 post-reload cprop can make these moves go away. */
28599 emit_move_insn (new_out, old_out);
28600 if (code == PLUS)
28601 x = gen_adddi3 (new_out, new_out, value);
28602 else
28603 x = gen_subdi3 (new_out, new_out, value);
28604 emit_insn (x);
28605 break;
28607 /* FALLTHRU */
28609 default:
28610 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
28611 emit_insn (gen_rtx_SET (new_out, x));
28612 break;
28615 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
28616 use_release);
28618 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28619 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
28621 /* Checks whether a barrier is needed and emits one accordingly. */
28622 if (is_armv8_sync
28623 || !(use_acquire || use_release))
28624 arm_post_atomic_barrier (model);
28627 #define MAX_VECT_LEN 16
28629 struct expand_vec_perm_d
28631 rtx target, op0, op1;
28632 unsigned char perm[MAX_VECT_LEN];
28633 machine_mode vmode;
28634 unsigned char nelt;
28635 bool one_vector_p;
28636 bool testing_p;
28639 /* Generate a variable permutation. */
28641 static void
28642 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
28644 machine_mode vmode = GET_MODE (target);
28645 bool one_vector_p = rtx_equal_p (op0, op1);
28647 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
28648 gcc_checking_assert (GET_MODE (op0) == vmode);
28649 gcc_checking_assert (GET_MODE (op1) == vmode);
28650 gcc_checking_assert (GET_MODE (sel) == vmode);
28651 gcc_checking_assert (TARGET_NEON);
28653 if (one_vector_p)
28655 if (vmode == V8QImode)
28656 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
28657 else
28658 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
28660 else
28662 rtx pair;
28664 if (vmode == V8QImode)
28666 pair = gen_reg_rtx (V16QImode);
28667 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
28668 pair = gen_lowpart (TImode, pair);
28669 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
28671 else
28673 pair = gen_reg_rtx (OImode);
28674 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
28675 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
28680 void
28681 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
28683 machine_mode vmode = GET_MODE (target);
28684 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
28685 bool one_vector_p = rtx_equal_p (op0, op1);
28686 rtx rmask[MAX_VECT_LEN], mask;
28688 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28689 numbering of elements for big-endian, we must reverse the order. */
28690 gcc_checking_assert (!BYTES_BIG_ENDIAN);
28692 /* The VTBL instruction does not use a modulo index, so we must take care
28693 of that ourselves. */
28694 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
28695 for (i = 0; i < nelt; ++i)
28696 rmask[i] = mask;
28697 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
28698 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
28700 arm_expand_vec_perm_1 (target, op0, op1, sel);
28703 /* Map lane ordering between architectural lane order, and GCC lane order,
28704 taking into account ABI. See comment above output_move_neon for details. */
28706 static int
28707 neon_endian_lane_map (machine_mode mode, int lane)
28709 if (BYTES_BIG_ENDIAN)
28711 int nelems = GET_MODE_NUNITS (mode);
28712 /* Reverse lane order. */
28713 lane = (nelems - 1 - lane);
28714 /* Reverse D register order, to match ABI. */
28715 if (GET_MODE_SIZE (mode) == 16)
28716 lane = lane ^ (nelems / 2);
28718 return lane;
28721 /* Some permutations index into pairs of vectors, this is a helper function
28722 to map indexes into those pairs of vectors. */
28724 static int
28725 neon_pair_endian_lane_map (machine_mode mode, int lane)
28727 int nelem = GET_MODE_NUNITS (mode);
28728 if (BYTES_BIG_ENDIAN)
28729 lane =
28730 neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
28731 return lane;
28734 /* Generate or test for an insn that supports a constant permutation. */
28736 /* Recognize patterns for the VUZP insns. */
28738 static bool
28739 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
28741 unsigned int i, odd, mask, nelt = d->nelt;
28742 rtx out0, out1, in0, in1;
28743 rtx (*gen)(rtx, rtx, rtx, rtx);
28744 int first_elem;
28745 int swap_nelt;
28747 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28748 return false;
28750 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
28751 big endian pattern on 64 bit vectors, so we correct for that. */
28752 swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
28753 && GET_MODE_SIZE (d->vmode) == 8 ? d->nelt : 0;
28755 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
28757 if (first_elem == neon_endian_lane_map (d->vmode, 0))
28758 odd = 0;
28759 else if (first_elem == neon_endian_lane_map (d->vmode, 1))
28760 odd = 1;
28761 else
28762 return false;
28763 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28765 for (i = 0; i < nelt; i++)
28767 unsigned elt =
28768 (neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
28769 if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
28770 return false;
28773 /* Success! */
28774 if (d->testing_p)
28775 return true;
28777 switch (d->vmode)
28779 case E_V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
28780 case E_V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
28781 case E_V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
28782 case E_V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
28783 case E_V8HFmode: gen = gen_neon_vuzpv8hf_internal; break;
28784 case E_V4HFmode: gen = gen_neon_vuzpv4hf_internal; break;
28785 case E_V4SImode: gen = gen_neon_vuzpv4si_internal; break;
28786 case E_V2SImode: gen = gen_neon_vuzpv2si_internal; break;
28787 case E_V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
28788 case E_V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
28789 default:
28790 gcc_unreachable ();
28793 in0 = d->op0;
28794 in1 = d->op1;
28795 if (swap_nelt != 0)
28796 std::swap (in0, in1);
28798 out0 = d->target;
28799 out1 = gen_reg_rtx (d->vmode);
28800 if (odd)
28801 std::swap (out0, out1);
28803 emit_insn (gen (out0, in0, in1, out1));
28804 return true;
28807 /* Recognize patterns for the VZIP insns. */
28809 static bool
28810 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
28812 unsigned int i, high, mask, nelt = d->nelt;
28813 rtx out0, out1, in0, in1;
28814 rtx (*gen)(rtx, rtx, rtx, rtx);
28815 int first_elem;
28816 bool is_swapped;
28818 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28819 return false;
28821 is_swapped = BYTES_BIG_ENDIAN;
28823 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
28825 high = nelt / 2;
28826 if (first_elem == neon_endian_lane_map (d->vmode, high))
28828 else if (first_elem == neon_endian_lane_map (d->vmode, 0))
28829 high = 0;
28830 else
28831 return false;
28832 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28834 for (i = 0; i < nelt / 2; i++)
28836 unsigned elt =
28837 neon_pair_endian_lane_map (d->vmode, i + high) & mask;
28838 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
28839 != elt)
28840 return false;
28841 elt =
28842 neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
28843 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
28844 != elt)
28845 return false;
28848 /* Success! */
28849 if (d->testing_p)
28850 return true;
28852 switch (d->vmode)
28854 case E_V16QImode: gen = gen_neon_vzipv16qi_internal; break;
28855 case E_V8QImode: gen = gen_neon_vzipv8qi_internal; break;
28856 case E_V8HImode: gen = gen_neon_vzipv8hi_internal; break;
28857 case E_V4HImode: gen = gen_neon_vzipv4hi_internal; break;
28858 case E_V8HFmode: gen = gen_neon_vzipv8hf_internal; break;
28859 case E_V4HFmode: gen = gen_neon_vzipv4hf_internal; break;
28860 case E_V4SImode: gen = gen_neon_vzipv4si_internal; break;
28861 case E_V2SImode: gen = gen_neon_vzipv2si_internal; break;
28862 case E_V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
28863 case E_V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
28864 default:
28865 gcc_unreachable ();
28868 in0 = d->op0;
28869 in1 = d->op1;
28870 if (is_swapped)
28871 std::swap (in0, in1);
28873 out0 = d->target;
28874 out1 = gen_reg_rtx (d->vmode);
28875 if (high)
28876 std::swap (out0, out1);
28878 emit_insn (gen (out0, in0, in1, out1));
28879 return true;
28882 /* Recognize patterns for the VREV insns. */
28884 static bool
28885 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
28887 unsigned int i, j, diff, nelt = d->nelt;
28888 rtx (*gen)(rtx, rtx);
28890 if (!d->one_vector_p)
28891 return false;
28893 diff = d->perm[0];
28894 switch (diff)
28896 case 7:
28897 switch (d->vmode)
28899 case E_V16QImode: gen = gen_neon_vrev64v16qi; break;
28900 case E_V8QImode: gen = gen_neon_vrev64v8qi; break;
28901 default:
28902 return false;
28904 break;
28905 case 3:
28906 switch (d->vmode)
28908 case E_V16QImode: gen = gen_neon_vrev32v16qi; break;
28909 case E_V8QImode: gen = gen_neon_vrev32v8qi; break;
28910 case E_V8HImode: gen = gen_neon_vrev64v8hi; break;
28911 case E_V4HImode: gen = gen_neon_vrev64v4hi; break;
28912 case E_V8HFmode: gen = gen_neon_vrev64v8hf; break;
28913 case E_V4HFmode: gen = gen_neon_vrev64v4hf; break;
28914 default:
28915 return false;
28917 break;
28918 case 1:
28919 switch (d->vmode)
28921 case E_V16QImode: gen = gen_neon_vrev16v16qi; break;
28922 case E_V8QImode: gen = gen_neon_vrev16v8qi; break;
28923 case E_V8HImode: gen = gen_neon_vrev32v8hi; break;
28924 case E_V4HImode: gen = gen_neon_vrev32v4hi; break;
28925 case E_V4SImode: gen = gen_neon_vrev64v4si; break;
28926 case E_V2SImode: gen = gen_neon_vrev64v2si; break;
28927 case E_V4SFmode: gen = gen_neon_vrev64v4sf; break;
28928 case E_V2SFmode: gen = gen_neon_vrev64v2sf; break;
28929 default:
28930 return false;
28932 break;
28933 default:
28934 return false;
28937 for (i = 0; i < nelt ; i += diff + 1)
28938 for (j = 0; j <= diff; j += 1)
28940 /* This is guaranteed to be true as the value of diff
28941 is 7, 3, 1 and we should have enough elements in the
28942 queue to generate this. Getting a vector mask with a
28943 value of diff other than these values implies that
28944 something is wrong by the time we get here. */
28945 gcc_assert (i + j < nelt);
28946 if (d->perm[i + j] != i + diff - j)
28947 return false;
28950 /* Success! */
28951 if (d->testing_p)
28952 return true;
28954 emit_insn (gen (d->target, d->op0));
28955 return true;
28958 /* Recognize patterns for the VTRN insns. */
28960 static bool
28961 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
28963 unsigned int i, odd, mask, nelt = d->nelt;
28964 rtx out0, out1, in0, in1;
28965 rtx (*gen)(rtx, rtx, rtx, rtx);
28967 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28968 return false;
28970 /* Note that these are little-endian tests. Adjust for big-endian later. */
28971 if (d->perm[0] == 0)
28972 odd = 0;
28973 else if (d->perm[0] == 1)
28974 odd = 1;
28975 else
28976 return false;
28977 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28979 for (i = 0; i < nelt; i += 2)
28981 if (d->perm[i] != i + odd)
28982 return false;
28983 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
28984 return false;
28987 /* Success! */
28988 if (d->testing_p)
28989 return true;
28991 switch (d->vmode)
28993 case E_V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
28994 case E_V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
28995 case E_V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
28996 case E_V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
28997 case E_V8HFmode: gen = gen_neon_vtrnv8hf_internal; break;
28998 case E_V4HFmode: gen = gen_neon_vtrnv4hf_internal; break;
28999 case E_V4SImode: gen = gen_neon_vtrnv4si_internal; break;
29000 case E_V2SImode: gen = gen_neon_vtrnv2si_internal; break;
29001 case E_V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
29002 case E_V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
29003 default:
29004 gcc_unreachable ();
29007 in0 = d->op0;
29008 in1 = d->op1;
29009 if (BYTES_BIG_ENDIAN)
29011 std::swap (in0, in1);
29012 odd = !odd;
29015 out0 = d->target;
29016 out1 = gen_reg_rtx (d->vmode);
29017 if (odd)
29018 std::swap (out0, out1);
29020 emit_insn (gen (out0, in0, in1, out1));
29021 return true;
29024 /* Recognize patterns for the VEXT insns. */
29026 static bool
29027 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
29029 unsigned int i, nelt = d->nelt;
29030 rtx (*gen) (rtx, rtx, rtx, rtx);
29031 rtx offset;
29033 unsigned int location;
29035 unsigned int next = d->perm[0] + 1;
29037 /* TODO: Handle GCC's numbering of elements for big-endian. */
29038 if (BYTES_BIG_ENDIAN)
29039 return false;
29041 /* Check if the extracted indexes are increasing by one. */
29042 for (i = 1; i < nelt; next++, i++)
29044 /* If we hit the most significant element of the 2nd vector in
29045 the previous iteration, no need to test further. */
29046 if (next == 2 * nelt)
29047 return false;
29049 /* If we are operating on only one vector: it could be a
29050 rotation. If there are only two elements of size < 64, let
29051 arm_evpc_neon_vrev catch it. */
29052 if (d->one_vector_p && (next == nelt))
29054 if ((nelt == 2) && (d->vmode != V2DImode))
29055 return false;
29056 else
29057 next = 0;
29060 if (d->perm[i] != next)
29061 return false;
29064 location = d->perm[0];
29066 switch (d->vmode)
29068 case E_V16QImode: gen = gen_neon_vextv16qi; break;
29069 case E_V8QImode: gen = gen_neon_vextv8qi; break;
29070 case E_V4HImode: gen = gen_neon_vextv4hi; break;
29071 case E_V8HImode: gen = gen_neon_vextv8hi; break;
29072 case E_V2SImode: gen = gen_neon_vextv2si; break;
29073 case E_V4SImode: gen = gen_neon_vextv4si; break;
29074 case E_V4HFmode: gen = gen_neon_vextv4hf; break;
29075 case E_V8HFmode: gen = gen_neon_vextv8hf; break;
29076 case E_V2SFmode: gen = gen_neon_vextv2sf; break;
29077 case E_V4SFmode: gen = gen_neon_vextv4sf; break;
29078 case E_V2DImode: gen = gen_neon_vextv2di; break;
29079 default:
29080 return false;
29083 /* Success! */
29084 if (d->testing_p)
29085 return true;
29087 offset = GEN_INT (location);
29088 emit_insn (gen (d->target, d->op0, d->op1, offset));
29089 return true;
29092 /* The NEON VTBL instruction is a fully variable permuation that's even
29093 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
29094 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
29095 can do slightly better by expanding this as a constant where we don't
29096 have to apply a mask. */
29098 static bool
29099 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
29101 rtx rperm[MAX_VECT_LEN], sel;
29102 machine_mode vmode = d->vmode;
29103 unsigned int i, nelt = d->nelt;
29105 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
29106 numbering of elements for big-endian, we must reverse the order. */
29107 if (BYTES_BIG_ENDIAN)
29108 return false;
29110 if (d->testing_p)
29111 return true;
29113 /* Generic code will try constant permutation twice. Once with the
29114 original mode and again with the elements lowered to QImode.
29115 So wait and don't do the selector expansion ourselves. */
29116 if (vmode != V8QImode && vmode != V16QImode)
29117 return false;
29119 for (i = 0; i < nelt; ++i)
29120 rperm[i] = GEN_INT (d->perm[i]);
29121 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
29122 sel = force_reg (vmode, sel);
29124 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
29125 return true;
29128 static bool
29129 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
29131 /* Check if the input mask matches vext before reordering the
29132 operands. */
29133 if (TARGET_NEON)
29134 if (arm_evpc_neon_vext (d))
29135 return true;
29137 /* The pattern matching functions above are written to look for a small
29138 number to begin the sequence (0, 1, N/2). If we begin with an index
29139 from the second operand, we can swap the operands. */
29140 if (d->perm[0] >= d->nelt)
29142 unsigned i, nelt = d->nelt;
29144 for (i = 0; i < nelt; ++i)
29145 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
29147 std::swap (d->op0, d->op1);
29150 if (TARGET_NEON)
29152 if (arm_evpc_neon_vuzp (d))
29153 return true;
29154 if (arm_evpc_neon_vzip (d))
29155 return true;
29156 if (arm_evpc_neon_vrev (d))
29157 return true;
29158 if (arm_evpc_neon_vtrn (d))
29159 return true;
29160 return arm_evpc_neon_vtbl (d);
29162 return false;
29165 /* Expand a vec_perm_const pattern. */
29167 bool
29168 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
29170 struct expand_vec_perm_d d;
29171 int i, nelt, which;
29173 d.target = target;
29174 d.op0 = op0;
29175 d.op1 = op1;
29177 d.vmode = GET_MODE (target);
29178 gcc_assert (VECTOR_MODE_P (d.vmode));
29179 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
29180 d.testing_p = false;
29182 for (i = which = 0; i < nelt; ++i)
29184 rtx e = XVECEXP (sel, 0, i);
29185 int ei = INTVAL (e) & (2 * nelt - 1);
29186 which |= (ei < nelt ? 1 : 2);
29187 d.perm[i] = ei;
29190 switch (which)
29192 default:
29193 gcc_unreachable();
29195 case 3:
29196 d.one_vector_p = false;
29197 if (!rtx_equal_p (op0, op1))
29198 break;
29200 /* The elements of PERM do not suggest that only the first operand
29201 is used, but both operands are identical. Allow easier matching
29202 of the permutation by folding the permutation into the single
29203 input vector. */
29204 /* FALLTHRU */
29205 case 2:
29206 for (i = 0; i < nelt; ++i)
29207 d.perm[i] &= nelt - 1;
29208 d.op0 = op1;
29209 d.one_vector_p = true;
29210 break;
29212 case 1:
29213 d.op1 = op0;
29214 d.one_vector_p = true;
29215 break;
29218 return arm_expand_vec_perm_const_1 (&d);
29221 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
29223 static bool
29224 arm_vectorize_vec_perm_const_ok (machine_mode vmode,
29225 const unsigned char *sel)
29227 struct expand_vec_perm_d d;
29228 unsigned int i, nelt, which;
29229 bool ret;
29231 d.vmode = vmode;
29232 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
29233 d.testing_p = true;
29234 memcpy (d.perm, sel, nelt);
29236 /* Categorize the set of elements in the selector. */
29237 for (i = which = 0; i < nelt; ++i)
29239 unsigned char e = d.perm[i];
29240 gcc_assert (e < 2 * nelt);
29241 which |= (e < nelt ? 1 : 2);
29244 /* For all elements from second vector, fold the elements to first. */
29245 if (which == 2)
29246 for (i = 0; i < nelt; ++i)
29247 d.perm[i] -= nelt;
29249 /* Check whether the mask can be applied to the vector type. */
29250 d.one_vector_p = (which != 3);
29252 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
29253 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
29254 if (!d.one_vector_p)
29255 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
29257 start_sequence ();
29258 ret = arm_expand_vec_perm_const_1 (&d);
29259 end_sequence ();
29261 return ret;
29264 bool
29265 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
29267 /* If we are soft float and we do not have ldrd
29268 then all auto increment forms are ok. */
29269 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
29270 return true;
29272 switch (code)
29274 /* Post increment and Pre Decrement are supported for all
29275 instruction forms except for vector forms. */
29276 case ARM_POST_INC:
29277 case ARM_PRE_DEC:
29278 if (VECTOR_MODE_P (mode))
29280 if (code != ARM_PRE_DEC)
29281 return true;
29282 else
29283 return false;
29286 return true;
29288 case ARM_POST_DEC:
29289 case ARM_PRE_INC:
29290 /* Without LDRD and mode size greater than
29291 word size, there is no point in auto-incrementing
29292 because ldm and stm will not have these forms. */
29293 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
29294 return false;
29296 /* Vector and floating point modes do not support
29297 these auto increment forms. */
29298 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
29299 return false;
29301 return true;
29303 default:
29304 return false;
29308 return false;
29311 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
29312 on ARM, since we know that shifts by negative amounts are no-ops.
29313 Additionally, the default expansion code is not available or suitable
29314 for post-reload insn splits (this can occur when the register allocator
29315 chooses not to do a shift in NEON).
29317 This function is used in both initial expand and post-reload splits, and
29318 handles all kinds of 64-bit shifts.
29320 Input requirements:
29321 - It is safe for the input and output to be the same register, but
29322 early-clobber rules apply for the shift amount and scratch registers.
29323 - Shift by register requires both scratch registers. In all other cases
29324 the scratch registers may be NULL.
29325 - Ashiftrt by a register also clobbers the CC register. */
29326 void
29327 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
29328 rtx amount, rtx scratch1, rtx scratch2)
29330 rtx out_high = gen_highpart (SImode, out);
29331 rtx out_low = gen_lowpart (SImode, out);
29332 rtx in_high = gen_highpart (SImode, in);
29333 rtx in_low = gen_lowpart (SImode, in);
29335 /* Terminology:
29336 in = the register pair containing the input value.
29337 out = the destination register pair.
29338 up = the high- or low-part of each pair.
29339 down = the opposite part to "up".
29340 In a shift, we can consider bits to shift from "up"-stream to
29341 "down"-stream, so in a left-shift "up" is the low-part and "down"
29342 is the high-part of each register pair. */
29344 rtx out_up = code == ASHIFT ? out_low : out_high;
29345 rtx out_down = code == ASHIFT ? out_high : out_low;
29346 rtx in_up = code == ASHIFT ? in_low : in_high;
29347 rtx in_down = code == ASHIFT ? in_high : in_low;
29349 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
29350 gcc_assert (out
29351 && (REG_P (out) || GET_CODE (out) == SUBREG)
29352 && GET_MODE (out) == DImode);
29353 gcc_assert (in
29354 && (REG_P (in) || GET_CODE (in) == SUBREG)
29355 && GET_MODE (in) == DImode);
29356 gcc_assert (amount
29357 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
29358 && GET_MODE (amount) == SImode)
29359 || CONST_INT_P (amount)));
29360 gcc_assert (scratch1 == NULL
29361 || (GET_CODE (scratch1) == SCRATCH)
29362 || (GET_MODE (scratch1) == SImode
29363 && REG_P (scratch1)));
29364 gcc_assert (scratch2 == NULL
29365 || (GET_CODE (scratch2) == SCRATCH)
29366 || (GET_MODE (scratch2) == SImode
29367 && REG_P (scratch2)));
29368 gcc_assert (!REG_P (out) || !REG_P (amount)
29369 || !HARD_REGISTER_P (out)
29370 || (REGNO (out) != REGNO (amount)
29371 && REGNO (out) + 1 != REGNO (amount)));
29373 /* Macros to make following code more readable. */
29374 #define SUB_32(DEST,SRC) \
29375 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
29376 #define RSB_32(DEST,SRC) \
29377 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
29378 #define SUB_S_32(DEST,SRC) \
29379 gen_addsi3_compare0 ((DEST), (SRC), \
29380 GEN_INT (-32))
29381 #define SET(DEST,SRC) \
29382 gen_rtx_SET ((DEST), (SRC))
29383 #define SHIFT(CODE,SRC,AMOUNT) \
29384 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
29385 #define LSHIFT(CODE,SRC,AMOUNT) \
29386 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
29387 SImode, (SRC), (AMOUNT))
29388 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
29389 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
29390 SImode, (SRC), (AMOUNT))
29391 #define ORR(A,B) \
29392 gen_rtx_IOR (SImode, (A), (B))
29393 #define BRANCH(COND,LABEL) \
29394 gen_arm_cond_branch ((LABEL), \
29395 gen_rtx_ ## COND (CCmode, cc_reg, \
29396 const0_rtx), \
29397 cc_reg)
29399 /* Shifts by register and shifts by constant are handled separately. */
29400 if (CONST_INT_P (amount))
29402 /* We have a shift-by-constant. */
29404 /* First, handle out-of-range shift amounts.
29405 In both cases we try to match the result an ARM instruction in a
29406 shift-by-register would give. This helps reduce execution
29407 differences between optimization levels, but it won't stop other
29408 parts of the compiler doing different things. This is "undefined
29409 behavior, in any case. */
29410 if (INTVAL (amount) <= 0)
29411 emit_insn (gen_movdi (out, in));
29412 else if (INTVAL (amount) >= 64)
29414 if (code == ASHIFTRT)
29416 rtx const31_rtx = GEN_INT (31);
29417 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
29418 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
29420 else
29421 emit_insn (gen_movdi (out, const0_rtx));
29424 /* Now handle valid shifts. */
29425 else if (INTVAL (amount) < 32)
29427 /* Shifts by a constant less than 32. */
29428 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
29430 /* Clearing the out register in DImode first avoids lots
29431 of spilling and results in less stack usage.
29432 Later this redundant insn is completely removed.
29433 Do that only if "in" and "out" are different registers. */
29434 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29435 emit_insn (SET (out, const0_rtx));
29436 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29437 emit_insn (SET (out_down,
29438 ORR (REV_LSHIFT (code, in_up, reverse_amount),
29439 out_down)));
29440 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29442 else
29444 /* Shifts by a constant greater than 31. */
29445 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
29447 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29448 emit_insn (SET (out, const0_rtx));
29449 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
29450 if (code == ASHIFTRT)
29451 emit_insn (gen_ashrsi3 (out_up, in_up,
29452 GEN_INT (31)));
29453 else
29454 emit_insn (SET (out_up, const0_rtx));
29457 else
29459 /* We have a shift-by-register. */
29460 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
29462 /* This alternative requires the scratch registers. */
29463 gcc_assert (scratch1 && REG_P (scratch1));
29464 gcc_assert (scratch2 && REG_P (scratch2));
29466 /* We will need the values "amount-32" and "32-amount" later.
29467 Swapping them around now allows the later code to be more general. */
29468 switch (code)
29470 case ASHIFT:
29471 emit_insn (SUB_32 (scratch1, amount));
29472 emit_insn (RSB_32 (scratch2, amount));
29473 break;
29474 case ASHIFTRT:
29475 emit_insn (RSB_32 (scratch1, amount));
29476 /* Also set CC = amount > 32. */
29477 emit_insn (SUB_S_32 (scratch2, amount));
29478 break;
29479 case LSHIFTRT:
29480 emit_insn (RSB_32 (scratch1, amount));
29481 emit_insn (SUB_32 (scratch2, amount));
29482 break;
29483 default:
29484 gcc_unreachable ();
29487 /* Emit code like this:
29489 arithmetic-left:
29490 out_down = in_down << amount;
29491 out_down = (in_up << (amount - 32)) | out_down;
29492 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
29493 out_up = in_up << amount;
29495 arithmetic-right:
29496 out_down = in_down >> amount;
29497 out_down = (in_up << (32 - amount)) | out_down;
29498 if (amount < 32)
29499 out_down = ((signed)in_up >> (amount - 32)) | out_down;
29500 out_up = in_up << amount;
29502 logical-right:
29503 out_down = in_down >> amount;
29504 out_down = (in_up << (32 - amount)) | out_down;
29505 if (amount < 32)
29506 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
29507 out_up = in_up << amount;
29509 The ARM and Thumb2 variants are the same but implemented slightly
29510 differently. If this were only called during expand we could just
29511 use the Thumb2 case and let combine do the right thing, but this
29512 can also be called from post-reload splitters. */
29514 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29516 if (!TARGET_THUMB2)
29518 /* Emit code for ARM mode. */
29519 emit_insn (SET (out_down,
29520 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
29521 if (code == ASHIFTRT)
29523 rtx_code_label *done_label = gen_label_rtx ();
29524 emit_jump_insn (BRANCH (LT, done_label));
29525 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
29526 out_down)));
29527 emit_label (done_label);
29529 else
29530 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
29531 out_down)));
29533 else
29535 /* Emit code for Thumb2 mode.
29536 Thumb2 can't do shift and or in one insn. */
29537 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
29538 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
29540 if (code == ASHIFTRT)
29542 rtx_code_label *done_label = gen_label_rtx ();
29543 emit_jump_insn (BRANCH (LT, done_label));
29544 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
29545 emit_insn (SET (out_down, ORR (out_down, scratch2)));
29546 emit_label (done_label);
29548 else
29550 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
29551 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
29555 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29558 #undef SUB_32
29559 #undef RSB_32
29560 #undef SUB_S_32
29561 #undef SET
29562 #undef SHIFT
29563 #undef LSHIFT
29564 #undef REV_LSHIFT
29565 #undef ORR
29566 #undef BRANCH
29569 /* Returns true if the pattern is a valid symbolic address, which is either a
29570 symbol_ref or (symbol_ref + addend).
29572 According to the ARM ELF ABI, the initial addend of REL-type relocations
29573 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
29574 literal field of the instruction as a 16-bit signed value in the range
29575 -32768 <= A < 32768. */
29577 bool
29578 arm_valid_symbolic_address_p (rtx addr)
29580 rtx xop0, xop1 = NULL_RTX;
29581 rtx tmp = addr;
29583 if (GET_CODE (tmp) == SYMBOL_REF || GET_CODE (tmp) == LABEL_REF)
29584 return true;
29586 /* (const (plus: symbol_ref const_int)) */
29587 if (GET_CODE (addr) == CONST)
29588 tmp = XEXP (addr, 0);
29590 if (GET_CODE (tmp) == PLUS)
29592 xop0 = XEXP (tmp, 0);
29593 xop1 = XEXP (tmp, 1);
29595 if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
29596 return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
29599 return false;
29602 /* Returns true if a valid comparison operation and makes
29603 the operands in a form that is valid. */
29604 bool
29605 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
29607 enum rtx_code code = GET_CODE (*comparison);
29608 int code_int;
29609 machine_mode mode = (GET_MODE (*op1) == VOIDmode)
29610 ? GET_MODE (*op2) : GET_MODE (*op1);
29612 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
29614 if (code == UNEQ || code == LTGT)
29615 return false;
29617 code_int = (int)code;
29618 arm_canonicalize_comparison (&code_int, op1, op2, 0);
29619 PUT_CODE (*comparison, (enum rtx_code)code_int);
29621 switch (mode)
29623 case E_SImode:
29624 if (!arm_add_operand (*op1, mode))
29625 *op1 = force_reg (mode, *op1);
29626 if (!arm_add_operand (*op2, mode))
29627 *op2 = force_reg (mode, *op2);
29628 return true;
29630 case E_DImode:
29631 if (!cmpdi_operand (*op1, mode))
29632 *op1 = force_reg (mode, *op1);
29633 if (!cmpdi_operand (*op2, mode))
29634 *op2 = force_reg (mode, *op2);
29635 return true;
29637 case E_HFmode:
29638 if (!TARGET_VFP_FP16INST)
29639 break;
29640 /* FP16 comparisons are done in SF mode. */
29641 mode = SFmode;
29642 *op1 = convert_to_mode (mode, *op1, 1);
29643 *op2 = convert_to_mode (mode, *op2, 1);
29644 /* Fall through. */
29645 case E_SFmode:
29646 case E_DFmode:
29647 if (!vfp_compare_operand (*op1, mode))
29648 *op1 = force_reg (mode, *op1);
29649 if (!vfp_compare_operand (*op2, mode))
29650 *op2 = force_reg (mode, *op2);
29651 return true;
29652 default:
29653 break;
29656 return false;
29660 /* Maximum number of instructions to set block of memory. */
29661 static int
29662 arm_block_set_max_insns (void)
29664 if (optimize_function_for_size_p (cfun))
29665 return 4;
29666 else
29667 return current_tune->max_insns_inline_memset;
29670 /* Return TRUE if it's profitable to set block of memory for
29671 non-vectorized case. VAL is the value to set the memory
29672 with. LENGTH is the number of bytes to set. ALIGN is the
29673 alignment of the destination memory in bytes. UNALIGNED_P
29674 is TRUE if we can only set the memory with instructions
29675 meeting alignment requirements. USE_STRD_P is TRUE if we
29676 can use strd to set the memory. */
29677 static bool
29678 arm_block_set_non_vect_profit_p (rtx val,
29679 unsigned HOST_WIDE_INT length,
29680 unsigned HOST_WIDE_INT align,
29681 bool unaligned_p, bool use_strd_p)
29683 int num = 0;
29684 /* For leftovers in bytes of 0-7, we can set the memory block using
29685 strb/strh/str with minimum instruction number. */
29686 const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
29688 if (unaligned_p)
29690 num = arm_const_inline_cost (SET, val);
29691 num += length / align + length % align;
29693 else if (use_strd_p)
29695 num = arm_const_double_inline_cost (val);
29696 num += (length >> 3) + leftover[length & 7];
29698 else
29700 num = arm_const_inline_cost (SET, val);
29701 num += (length >> 2) + leftover[length & 3];
29704 /* We may be able to combine last pair STRH/STRB into a single STR
29705 by shifting one byte back. */
29706 if (unaligned_access && length > 3 && (length & 3) == 3)
29707 num--;
29709 return (num <= arm_block_set_max_insns ());
29712 /* Return TRUE if it's profitable to set block of memory for
29713 vectorized case. LENGTH is the number of bytes to set.
29714 ALIGN is the alignment of destination memory in bytes.
29715 MODE is the vector mode used to set the memory. */
29716 static bool
29717 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
29718 unsigned HOST_WIDE_INT align,
29719 machine_mode mode)
29721 int num;
29722 bool unaligned_p = ((align & 3) != 0);
29723 unsigned int nelt = GET_MODE_NUNITS (mode);
29725 /* Instruction loading constant value. */
29726 num = 1;
29727 /* Instructions storing the memory. */
29728 num += (length + nelt - 1) / nelt;
29729 /* Instructions adjusting the address expression. Only need to
29730 adjust address expression if it's 4 bytes aligned and bytes
29731 leftover can only be stored by mis-aligned store instruction. */
29732 if (!unaligned_p && (length & 3) != 0)
29733 num++;
29735 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
29736 if (!unaligned_p && mode == V16QImode)
29737 num--;
29739 return (num <= arm_block_set_max_insns ());
29742 /* Set a block of memory using vectorization instructions for the
29743 unaligned case. We fill the first LENGTH bytes of the memory
29744 area starting from DSTBASE with byte constant VALUE. ALIGN is
29745 the alignment requirement of memory. Return TRUE if succeeded. */
29746 static bool
29747 arm_block_set_unaligned_vect (rtx dstbase,
29748 unsigned HOST_WIDE_INT length,
29749 unsigned HOST_WIDE_INT value,
29750 unsigned HOST_WIDE_INT align)
29752 unsigned int i, j, nelt_v16, nelt_v8, nelt_mode;
29753 rtx dst, mem;
29754 rtx val_elt, val_vec, reg;
29755 rtx rval[MAX_VECT_LEN];
29756 rtx (*gen_func) (rtx, rtx);
29757 machine_mode mode;
29758 unsigned HOST_WIDE_INT v = value;
29759 unsigned int offset = 0;
29760 gcc_assert ((align & 0x3) != 0);
29761 nelt_v8 = GET_MODE_NUNITS (V8QImode);
29762 nelt_v16 = GET_MODE_NUNITS (V16QImode);
29763 if (length >= nelt_v16)
29765 mode = V16QImode;
29766 gen_func = gen_movmisalignv16qi;
29768 else
29770 mode = V8QImode;
29771 gen_func = gen_movmisalignv8qi;
29773 nelt_mode = GET_MODE_NUNITS (mode);
29774 gcc_assert (length >= nelt_mode);
29775 /* Skip if it isn't profitable. */
29776 if (!arm_block_set_vect_profit_p (length, align, mode))
29777 return false;
29779 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29780 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29782 v = sext_hwi (v, BITS_PER_WORD);
29783 val_elt = GEN_INT (v);
29784 for (j = 0; j < nelt_mode; j++)
29785 rval[j] = val_elt;
29787 reg = gen_reg_rtx (mode);
29788 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
29789 /* Emit instruction loading the constant value. */
29790 emit_move_insn (reg, val_vec);
29792 /* Handle nelt_mode bytes in a vector. */
29793 for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
29795 emit_insn ((*gen_func) (mem, reg));
29796 if (i + 2 * nelt_mode <= length)
29798 emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
29799 offset += nelt_mode;
29800 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29804 /* If there are not less than nelt_v8 bytes leftover, we must be in
29805 V16QI mode. */
29806 gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
29808 /* Handle (8, 16) bytes leftover. */
29809 if (i + nelt_v8 < length)
29811 emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
29812 offset += length - i;
29813 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29815 /* We are shifting bytes back, set the alignment accordingly. */
29816 if ((length & 1) != 0 && align >= 2)
29817 set_mem_align (mem, BITS_PER_UNIT);
29819 emit_insn (gen_movmisalignv16qi (mem, reg));
29821 /* Handle (0, 8] bytes leftover. */
29822 else if (i < length && i + nelt_v8 >= length)
29824 if (mode == V16QImode)
29825 reg = gen_lowpart (V8QImode, reg);
29827 emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
29828 + (nelt_mode - nelt_v8))));
29829 offset += (length - i) + (nelt_mode - nelt_v8);
29830 mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
29832 /* We are shifting bytes back, set the alignment accordingly. */
29833 if ((length & 1) != 0 && align >= 2)
29834 set_mem_align (mem, BITS_PER_UNIT);
29836 emit_insn (gen_movmisalignv8qi (mem, reg));
29839 return true;
29842 /* Set a block of memory using vectorization instructions for the
29843 aligned case. We fill the first LENGTH bytes of the memory area
29844 starting from DSTBASE with byte constant VALUE. ALIGN is the
29845 alignment requirement of memory. Return TRUE if succeeded. */
29846 static bool
29847 arm_block_set_aligned_vect (rtx dstbase,
29848 unsigned HOST_WIDE_INT length,
29849 unsigned HOST_WIDE_INT value,
29850 unsigned HOST_WIDE_INT align)
29852 unsigned int i, j, nelt_v8, nelt_v16, nelt_mode;
29853 rtx dst, addr, mem;
29854 rtx val_elt, val_vec, reg;
29855 rtx rval[MAX_VECT_LEN];
29856 machine_mode mode;
29857 unsigned HOST_WIDE_INT v = value;
29858 unsigned int offset = 0;
29860 gcc_assert ((align & 0x3) == 0);
29861 nelt_v8 = GET_MODE_NUNITS (V8QImode);
29862 nelt_v16 = GET_MODE_NUNITS (V16QImode);
29863 if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
29864 mode = V16QImode;
29865 else
29866 mode = V8QImode;
29868 nelt_mode = GET_MODE_NUNITS (mode);
29869 gcc_assert (length >= nelt_mode);
29870 /* Skip if it isn't profitable. */
29871 if (!arm_block_set_vect_profit_p (length, align, mode))
29872 return false;
29874 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29876 v = sext_hwi (v, BITS_PER_WORD);
29877 val_elt = GEN_INT (v);
29878 for (j = 0; j < nelt_mode; j++)
29879 rval[j] = val_elt;
29881 reg = gen_reg_rtx (mode);
29882 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
29883 /* Emit instruction loading the constant value. */
29884 emit_move_insn (reg, val_vec);
29886 i = 0;
29887 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
29888 if (mode == V16QImode)
29890 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29891 emit_insn (gen_movmisalignv16qi (mem, reg));
29892 i += nelt_mode;
29893 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
29894 if (i + nelt_v8 < length && i + nelt_v16 > length)
29896 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
29897 offset += length - nelt_mode;
29898 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29899 /* We are shifting bytes back, set the alignment accordingly. */
29900 if ((length & 0x3) == 0)
29901 set_mem_align (mem, BITS_PER_UNIT * 4);
29902 else if ((length & 0x1) == 0)
29903 set_mem_align (mem, BITS_PER_UNIT * 2);
29904 else
29905 set_mem_align (mem, BITS_PER_UNIT);
29907 emit_insn (gen_movmisalignv16qi (mem, reg));
29908 return true;
29910 /* Fall through for bytes leftover. */
29911 mode = V8QImode;
29912 nelt_mode = GET_MODE_NUNITS (mode);
29913 reg = gen_lowpart (V8QImode, reg);
29916 /* Handle 8 bytes in a vector. */
29917 for (; (i + nelt_mode <= length); i += nelt_mode)
29919 addr = plus_constant (Pmode, dst, i);
29920 mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
29921 emit_move_insn (mem, reg);
29924 /* Handle single word leftover by shifting 4 bytes back. We can
29925 use aligned access for this case. */
29926 if (i + UNITS_PER_WORD == length)
29928 addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
29929 offset += i - UNITS_PER_WORD;
29930 mem = adjust_automodify_address (dstbase, mode, addr, offset);
29931 /* We are shifting 4 bytes back, set the alignment accordingly. */
29932 if (align > UNITS_PER_WORD)
29933 set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
29935 emit_move_insn (mem, reg);
29937 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
29938 We have to use unaligned access for this case. */
29939 else if (i < length)
29941 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
29942 offset += length - nelt_mode;
29943 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29944 /* We are shifting bytes back, set the alignment accordingly. */
29945 if ((length & 1) == 0)
29946 set_mem_align (mem, BITS_PER_UNIT * 2);
29947 else
29948 set_mem_align (mem, BITS_PER_UNIT);
29950 emit_insn (gen_movmisalignv8qi (mem, reg));
29953 return true;
29956 /* Set a block of memory using plain strh/strb instructions, only
29957 using instructions allowed by ALIGN on processor. We fill the
29958 first LENGTH bytes of the memory area starting from DSTBASE
29959 with byte constant VALUE. ALIGN is the alignment requirement
29960 of memory. */
29961 static bool
29962 arm_block_set_unaligned_non_vect (rtx dstbase,
29963 unsigned HOST_WIDE_INT length,
29964 unsigned HOST_WIDE_INT value,
29965 unsigned HOST_WIDE_INT align)
29967 unsigned int i;
29968 rtx dst, addr, mem;
29969 rtx val_exp, val_reg, reg;
29970 machine_mode mode;
29971 HOST_WIDE_INT v = value;
29973 gcc_assert (align == 1 || align == 2);
29975 if (align == 2)
29976 v |= (value << BITS_PER_UNIT);
29978 v = sext_hwi (v, BITS_PER_WORD);
29979 val_exp = GEN_INT (v);
29980 /* Skip if it isn't profitable. */
29981 if (!arm_block_set_non_vect_profit_p (val_exp, length,
29982 align, true, false))
29983 return false;
29985 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29986 mode = (align == 2 ? HImode : QImode);
29987 val_reg = force_reg (SImode, val_exp);
29988 reg = gen_lowpart (mode, val_reg);
29990 for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
29992 addr = plus_constant (Pmode, dst, i);
29993 mem = adjust_automodify_address (dstbase, mode, addr, i);
29994 emit_move_insn (mem, reg);
29997 /* Handle single byte leftover. */
29998 if (i + 1 == length)
30000 reg = gen_lowpart (QImode, val_reg);
30001 addr = plus_constant (Pmode, dst, i);
30002 mem = adjust_automodify_address (dstbase, QImode, addr, i);
30003 emit_move_insn (mem, reg);
30004 i++;
30007 gcc_assert (i == length);
30008 return true;
30011 /* Set a block of memory using plain strd/str/strh/strb instructions,
30012 to permit unaligned copies on processors which support unaligned
30013 semantics for those instructions. We fill the first LENGTH bytes
30014 of the memory area starting from DSTBASE with byte constant VALUE.
30015 ALIGN is the alignment requirement of memory. */
30016 static bool
30017 arm_block_set_aligned_non_vect (rtx dstbase,
30018 unsigned HOST_WIDE_INT length,
30019 unsigned HOST_WIDE_INT value,
30020 unsigned HOST_WIDE_INT align)
30022 unsigned int i;
30023 rtx dst, addr, mem;
30024 rtx val_exp, val_reg, reg;
30025 unsigned HOST_WIDE_INT v;
30026 bool use_strd_p;
30028 use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
30029 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
30031 v = (value | (value << 8) | (value << 16) | (value << 24));
30032 if (length < UNITS_PER_WORD)
30033 v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
30035 if (use_strd_p)
30036 v |= (v << BITS_PER_WORD);
30037 else
30038 v = sext_hwi (v, BITS_PER_WORD);
30040 val_exp = GEN_INT (v);
30041 /* Skip if it isn't profitable. */
30042 if (!arm_block_set_non_vect_profit_p (val_exp, length,
30043 align, false, use_strd_p))
30045 if (!use_strd_p)
30046 return false;
30048 /* Try without strd. */
30049 v = (v >> BITS_PER_WORD);
30050 v = sext_hwi (v, BITS_PER_WORD);
30051 val_exp = GEN_INT (v);
30052 use_strd_p = false;
30053 if (!arm_block_set_non_vect_profit_p (val_exp, length,
30054 align, false, use_strd_p))
30055 return false;
30058 i = 0;
30059 dst = copy_addr_to_reg (XEXP (dstbase, 0));
30060 /* Handle double words using strd if possible. */
30061 if (use_strd_p)
30063 val_reg = force_reg (DImode, val_exp);
30064 reg = val_reg;
30065 for (; (i + 8 <= length); i += 8)
30067 addr = plus_constant (Pmode, dst, i);
30068 mem = adjust_automodify_address (dstbase, DImode, addr, i);
30069 emit_move_insn (mem, reg);
30072 else
30073 val_reg = force_reg (SImode, val_exp);
30075 /* Handle words. */
30076 reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
30077 for (; (i + 4 <= length); i += 4)
30079 addr = plus_constant (Pmode, dst, i);
30080 mem = adjust_automodify_address (dstbase, SImode, addr, i);
30081 if ((align & 3) == 0)
30082 emit_move_insn (mem, reg);
30083 else
30084 emit_insn (gen_unaligned_storesi (mem, reg));
30087 /* Merge last pair of STRH and STRB into a STR if possible. */
30088 if (unaligned_access && i > 0 && (i + 3) == length)
30090 addr = plus_constant (Pmode, dst, i - 1);
30091 mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
30092 /* We are shifting one byte back, set the alignment accordingly. */
30093 if ((align & 1) == 0)
30094 set_mem_align (mem, BITS_PER_UNIT);
30096 /* Most likely this is an unaligned access, and we can't tell at
30097 compilation time. */
30098 emit_insn (gen_unaligned_storesi (mem, reg));
30099 return true;
30102 /* Handle half word leftover. */
30103 if (i + 2 <= length)
30105 reg = gen_lowpart (HImode, val_reg);
30106 addr = plus_constant (Pmode, dst, i);
30107 mem = adjust_automodify_address (dstbase, HImode, addr, i);
30108 if ((align & 1) == 0)
30109 emit_move_insn (mem, reg);
30110 else
30111 emit_insn (gen_unaligned_storehi (mem, reg));
30113 i += 2;
30116 /* Handle single byte leftover. */
30117 if (i + 1 == length)
30119 reg = gen_lowpart (QImode, val_reg);
30120 addr = plus_constant (Pmode, dst, i);
30121 mem = adjust_automodify_address (dstbase, QImode, addr, i);
30122 emit_move_insn (mem, reg);
30125 return true;
30128 /* Set a block of memory using vectorization instructions for both
30129 aligned and unaligned cases. We fill the first LENGTH bytes of
30130 the memory area starting from DSTBASE with byte constant VALUE.
30131 ALIGN is the alignment requirement of memory. */
30132 static bool
30133 arm_block_set_vect (rtx dstbase,
30134 unsigned HOST_WIDE_INT length,
30135 unsigned HOST_WIDE_INT value,
30136 unsigned HOST_WIDE_INT align)
30138 /* Check whether we need to use unaligned store instruction. */
30139 if (((align & 3) != 0 || (length & 3) != 0)
30140 /* Check whether unaligned store instruction is available. */
30141 && (!unaligned_access || BYTES_BIG_ENDIAN))
30142 return false;
30144 if ((align & 3) == 0)
30145 return arm_block_set_aligned_vect (dstbase, length, value, align);
30146 else
30147 return arm_block_set_unaligned_vect (dstbase, length, value, align);
30150 /* Expand string store operation. Firstly we try to do that by using
30151 vectorization instructions, then try with ARM unaligned access and
30152 double-word store if profitable. OPERANDS[0] is the destination,
30153 OPERANDS[1] is the number of bytes, operands[2] is the value to
30154 initialize the memory, OPERANDS[3] is the known alignment of the
30155 destination. */
30156 bool
30157 arm_gen_setmem (rtx *operands)
30159 rtx dstbase = operands[0];
30160 unsigned HOST_WIDE_INT length;
30161 unsigned HOST_WIDE_INT value;
30162 unsigned HOST_WIDE_INT align;
30164 if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
30165 return false;
30167 length = UINTVAL (operands[1]);
30168 if (length > 64)
30169 return false;
30171 value = (UINTVAL (operands[2]) & 0xFF);
30172 align = UINTVAL (operands[3]);
30173 if (TARGET_NEON && length >= 8
30174 && current_tune->string_ops_prefer_neon
30175 && arm_block_set_vect (dstbase, length, value, align))
30176 return true;
30178 if (!unaligned_access && (align & 3) != 0)
30179 return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
30181 return arm_block_set_aligned_non_vect (dstbase, length, value, align);
30185 static bool
30186 arm_macro_fusion_p (void)
30188 return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
30191 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
30192 for MOVW / MOVT macro fusion. */
30194 static bool
30195 arm_sets_movw_movt_fusible_p (rtx prev_set, rtx curr_set)
30197 /* We are trying to fuse
30198 movw imm / movt imm
30199 instructions as a group that gets scheduled together. */
30201 rtx set_dest = SET_DEST (curr_set);
30203 if (GET_MODE (set_dest) != SImode)
30204 return false;
30206 /* We are trying to match:
30207 prev (movw) == (set (reg r0) (const_int imm16))
30208 curr (movt) == (set (zero_extract (reg r0)
30209 (const_int 16)
30210 (const_int 16))
30211 (const_int imm16_1))
30213 prev (movw) == (set (reg r1)
30214 (high (symbol_ref ("SYM"))))
30215 curr (movt) == (set (reg r0)
30216 (lo_sum (reg r1)
30217 (symbol_ref ("SYM")))) */
30219 if (GET_CODE (set_dest) == ZERO_EXTRACT)
30221 if (CONST_INT_P (SET_SRC (curr_set))
30222 && CONST_INT_P (SET_SRC (prev_set))
30223 && REG_P (XEXP (set_dest, 0))
30224 && REG_P (SET_DEST (prev_set))
30225 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
30226 return true;
30229 else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
30230 && REG_P (SET_DEST (curr_set))
30231 && REG_P (SET_DEST (prev_set))
30232 && GET_CODE (SET_SRC (prev_set)) == HIGH
30233 && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
30234 return true;
30236 return false;
30239 static bool
30240 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
30242 rtx prev_set = single_set (prev);
30243 rtx curr_set = single_set (curr);
30245 if (!prev_set
30246 || !curr_set)
30247 return false;
30249 if (any_condjump_p (curr))
30250 return false;
30252 if (!arm_macro_fusion_p ())
30253 return false;
30255 if (current_tune->fusible_ops & tune_params::FUSE_AES_AESMC
30256 && aarch_crypto_can_dual_issue (prev, curr))
30257 return true;
30259 if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT
30260 && arm_sets_movw_movt_fusible_p (prev_set, curr_set))
30261 return true;
30263 return false;
30266 /* Return true iff the instruction fusion described by OP is enabled. */
30267 bool
30268 arm_fusion_enabled_p (tune_params::fuse_ops op)
30270 return current_tune->fusible_ops & op;
30273 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
30274 scheduled for speculative execution. Reject the long-running division
30275 and square-root instructions. */
30277 static bool
30278 arm_sched_can_speculate_insn (rtx_insn *insn)
30280 switch (get_attr_type (insn))
30282 case TYPE_SDIV:
30283 case TYPE_UDIV:
30284 case TYPE_FDIVS:
30285 case TYPE_FDIVD:
30286 case TYPE_FSQRTS:
30287 case TYPE_FSQRTD:
30288 case TYPE_NEON_FP_SQRT_S:
30289 case TYPE_NEON_FP_SQRT_D:
30290 case TYPE_NEON_FP_SQRT_S_Q:
30291 case TYPE_NEON_FP_SQRT_D_Q:
30292 case TYPE_NEON_FP_DIV_S:
30293 case TYPE_NEON_FP_DIV_D:
30294 case TYPE_NEON_FP_DIV_S_Q:
30295 case TYPE_NEON_FP_DIV_D_Q:
30296 return false;
30297 default:
30298 return true;
30302 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
30304 static unsigned HOST_WIDE_INT
30305 arm_asan_shadow_offset (void)
30307 return HOST_WIDE_INT_1U << 29;
30311 /* This is a temporary fix for PR60655. Ideally we need
30312 to handle most of these cases in the generic part but
30313 currently we reject minus (..) (sym_ref). We try to
30314 ameliorate the case with minus (sym_ref1) (sym_ref2)
30315 where they are in the same section. */
30317 static bool
30318 arm_const_not_ok_for_debug_p (rtx p)
30320 tree decl_op0 = NULL;
30321 tree decl_op1 = NULL;
30323 if (GET_CODE (p) == MINUS)
30325 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
30327 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
30328 if (decl_op1
30329 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
30330 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
30332 if ((VAR_P (decl_op1)
30333 || TREE_CODE (decl_op1) == CONST_DECL)
30334 && (VAR_P (decl_op0)
30335 || TREE_CODE (decl_op0) == CONST_DECL))
30336 return (get_variable_section (decl_op1, false)
30337 != get_variable_section (decl_op0, false));
30339 if (TREE_CODE (decl_op1) == LABEL_DECL
30340 && TREE_CODE (decl_op0) == LABEL_DECL)
30341 return (DECL_CONTEXT (decl_op1)
30342 != DECL_CONTEXT (decl_op0));
30345 return true;
30349 return false;
30352 /* return TRUE if x is a reference to a value in a constant pool */
30353 extern bool
30354 arm_is_constant_pool_ref (rtx x)
30356 return (MEM_P (x)
30357 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
30358 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
30361 /* Remember the last target of arm_set_current_function. */
30362 static GTY(()) tree arm_previous_fndecl;
30364 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
30366 void
30367 save_restore_target_globals (tree new_tree)
30369 /* If we have a previous state, use it. */
30370 if (TREE_TARGET_GLOBALS (new_tree))
30371 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
30372 else if (new_tree == target_option_default_node)
30373 restore_target_globals (&default_target_globals);
30374 else
30376 /* Call target_reinit and save the state for TARGET_GLOBALS. */
30377 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
30380 arm_option_params_internal ();
30383 /* Invalidate arm_previous_fndecl. */
30385 void
30386 arm_reset_previous_fndecl (void)
30388 arm_previous_fndecl = NULL_TREE;
30391 /* Establish appropriate back-end context for processing the function
30392 FNDECL. The argument might be NULL to indicate processing at top
30393 level, outside of any function scope. */
30395 static void
30396 arm_set_current_function (tree fndecl)
30398 if (!fndecl || fndecl == arm_previous_fndecl)
30399 return;
30401 tree old_tree = (arm_previous_fndecl
30402 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
30403 : NULL_TREE);
30405 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30407 /* If current function has no attributes but previous one did,
30408 use the default node. */
30409 if (! new_tree && old_tree)
30410 new_tree = target_option_default_node;
30412 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
30413 the default have been handled by save_restore_target_globals from
30414 arm_pragma_target_parse. */
30415 if (old_tree == new_tree)
30416 return;
30418 arm_previous_fndecl = fndecl;
30420 /* First set the target options. */
30421 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
30423 save_restore_target_globals (new_tree);
30426 /* Implement TARGET_OPTION_PRINT. */
30428 static void
30429 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
30431 int flags = ptr->x_target_flags;
30432 const char *fpu_name;
30434 fpu_name = (ptr->x_arm_fpu_index == TARGET_FPU_auto
30435 ? "auto" : all_fpus[ptr->x_arm_fpu_index].name);
30437 fprintf (file, "%*sselected isa %s\n", indent, "",
30438 TARGET_THUMB2_P (flags) ? "thumb2" :
30439 TARGET_THUMB_P (flags) ? "thumb1" :
30440 "arm");
30442 if (ptr->x_arm_arch_string)
30443 fprintf (file, "%*sselected architecture %s\n", indent, "",
30444 ptr->x_arm_arch_string);
30446 if (ptr->x_arm_cpu_string)
30447 fprintf (file, "%*sselected CPU %s\n", indent, "",
30448 ptr->x_arm_cpu_string);
30450 if (ptr->x_arm_tune_string)
30451 fprintf (file, "%*sselected tune %s\n", indent, "",
30452 ptr->x_arm_tune_string);
30454 fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_name);
30457 /* Hook to determine if one function can safely inline another. */
30459 static bool
30460 arm_can_inline_p (tree caller, tree callee)
30462 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
30463 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
30464 bool can_inline = true;
30466 struct cl_target_option *caller_opts
30467 = TREE_TARGET_OPTION (caller_tree ? caller_tree
30468 : target_option_default_node);
30470 struct cl_target_option *callee_opts
30471 = TREE_TARGET_OPTION (callee_tree ? callee_tree
30472 : target_option_default_node);
30474 if (callee_opts == caller_opts)
30475 return true;
30477 /* Callee's ISA features should be a subset of the caller's. */
30478 struct arm_build_target caller_target;
30479 struct arm_build_target callee_target;
30480 caller_target.isa = sbitmap_alloc (isa_num_bits);
30481 callee_target.isa = sbitmap_alloc (isa_num_bits);
30483 arm_configure_build_target (&caller_target, caller_opts, &global_options_set,
30484 false);
30485 arm_configure_build_target (&callee_target, callee_opts, &global_options_set,
30486 false);
30487 if (!bitmap_subset_p (callee_target.isa, caller_target.isa))
30488 can_inline = false;
30490 sbitmap_free (caller_target.isa);
30491 sbitmap_free (callee_target.isa);
30493 /* OK to inline between different modes.
30494 Function with mode specific instructions, e.g using asm,
30495 must be explicitly protected with noinline. */
30496 return can_inline;
30499 /* Hook to fix function's alignment affected by target attribute. */
30501 static void
30502 arm_relayout_function (tree fndecl)
30504 if (DECL_USER_ALIGN (fndecl))
30505 return;
30507 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30509 if (!callee_tree)
30510 callee_tree = target_option_default_node;
30512 struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
30513 SET_DECL_ALIGN
30514 (fndecl,
30515 FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts->x_target_flags)));
30518 /* Inner function to process the attribute((target(...))), take an argument and
30519 set the current options from the argument. If we have a list, recursively
30520 go over the list. */
30522 static bool
30523 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
30525 if (TREE_CODE (args) == TREE_LIST)
30527 bool ret = true;
30529 for (; args; args = TREE_CHAIN (args))
30530 if (TREE_VALUE (args)
30531 && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
30532 ret = false;
30533 return ret;
30536 else if (TREE_CODE (args) != STRING_CST)
30538 error ("attribute %<target%> argument not a string");
30539 return false;
30542 char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
30543 char *q;
30545 while ((q = strtok (argstr, ",")) != NULL)
30547 while (ISSPACE (*q)) ++q;
30549 argstr = NULL;
30550 if (!strncmp (q, "thumb", 5))
30551 opts->x_target_flags |= MASK_THUMB;
30553 else if (!strncmp (q, "arm", 3))
30554 opts->x_target_flags &= ~MASK_THUMB;
30556 else if (!strncmp (q, "fpu=", 4))
30558 int fpu_index;
30559 if (! opt_enum_arg_to_value (OPT_mfpu_, q+4,
30560 &fpu_index, CL_TARGET))
30562 error ("invalid fpu for attribute(target(\"%s\"))", q);
30563 return false;
30565 if (fpu_index == TARGET_FPU_auto)
30567 /* This doesn't really make sense until we support
30568 general dynamic selection of the architecture and all
30569 sub-features. */
30570 sorry ("auto fpu selection not currently permitted here");
30571 return false;
30573 opts->x_arm_fpu_index = (enum fpu_type) fpu_index;
30575 else
30577 error ("attribute(target(\"%s\")) is unknown", q);
30578 return false;
30582 return true;
30585 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
30587 tree
30588 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
30589 struct gcc_options *opts_set)
30591 struct cl_target_option cl_opts;
30593 if (!arm_valid_target_attribute_rec (args, opts))
30594 return NULL_TREE;
30596 cl_target_option_save (&cl_opts, opts);
30597 arm_configure_build_target (&arm_active_target, &cl_opts, opts_set, false);
30598 arm_option_check_internal (opts);
30599 /* Do any overrides, such as global options arch=xxx. */
30600 arm_option_override_internal (opts, opts_set);
30602 return build_target_option_node (opts);
30605 static void
30606 add_attribute (const char * mode, tree *attributes)
30608 size_t len = strlen (mode);
30609 tree value = build_string (len, mode);
30611 TREE_TYPE (value) = build_array_type (char_type_node,
30612 build_index_type (size_int (len)));
30614 *attributes = tree_cons (get_identifier ("target"),
30615 build_tree_list (NULL_TREE, value),
30616 *attributes);
30619 /* For testing. Insert thumb or arm modes alternatively on functions. */
30621 static void
30622 arm_insert_attributes (tree fndecl, tree * attributes)
30624 const char *mode;
30626 if (! TARGET_FLIP_THUMB)
30627 return;
30629 if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
30630 || DECL_BUILT_IN (fndecl) || DECL_ARTIFICIAL (fndecl))
30631 return;
30633 /* Nested definitions must inherit mode. */
30634 if (current_function_decl)
30636 mode = TARGET_THUMB ? "thumb" : "arm";
30637 add_attribute (mode, attributes);
30638 return;
30641 /* If there is already a setting don't change it. */
30642 if (lookup_attribute ("target", *attributes) != NULL)
30643 return;
30645 mode = thumb_flipper ? "thumb" : "arm";
30646 add_attribute (mode, attributes);
30648 thumb_flipper = !thumb_flipper;
30651 /* Hook to validate attribute((target("string"))). */
30653 static bool
30654 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
30655 tree args, int ARG_UNUSED (flags))
30657 bool ret = true;
30658 struct gcc_options func_options;
30659 tree cur_tree, new_optimize;
30660 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
30662 /* Get the optimization options of the current function. */
30663 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
30665 /* If the function changed the optimization levels as well as setting target
30666 options, start with the optimizations specified. */
30667 if (!func_optimize)
30668 func_optimize = optimization_default_node;
30670 /* Init func_options. */
30671 memset (&func_options, 0, sizeof (func_options));
30672 init_options_struct (&func_options, NULL);
30673 lang_hooks.init_options_struct (&func_options);
30675 /* Initialize func_options to the defaults. */
30676 cl_optimization_restore (&func_options,
30677 TREE_OPTIMIZATION (func_optimize));
30679 cl_target_option_restore (&func_options,
30680 TREE_TARGET_OPTION (target_option_default_node));
30682 /* Set func_options flags with new target mode. */
30683 cur_tree = arm_valid_target_attribute_tree (args, &func_options,
30684 &global_options_set);
30686 if (cur_tree == NULL_TREE)
30687 ret = false;
30689 new_optimize = build_optimization_node (&func_options);
30691 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
30693 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
30695 finalize_options_struct (&func_options);
30697 return ret;
30700 /* Match an ISA feature bitmap to a named FPU. We always use the
30701 first entry that exactly matches the feature set, so that we
30702 effectively canonicalize the FPU name for the assembler. */
30703 static const char*
30704 arm_identify_fpu_from_isa (sbitmap isa)
30706 auto_sbitmap fpubits (isa_num_bits);
30707 auto_sbitmap cand_fpubits (isa_num_bits);
30709 bitmap_and (fpubits, isa, isa_all_fpubits);
30711 /* If there are no ISA feature bits relating to the FPU, we must be
30712 doing soft-float. */
30713 if (bitmap_empty_p (fpubits))
30714 return "softvfp";
30716 for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
30718 arm_initialize_isa (cand_fpubits, all_fpus[i].isa_bits);
30719 if (bitmap_equal_p (fpubits, cand_fpubits))
30720 return all_fpus[i].name;
30722 /* We must find an entry, or things have gone wrong. */
30723 gcc_unreachable ();
30726 void
30727 arm_declare_function_name (FILE *stream, const char *name, tree decl)
30730 fprintf (stream, "\t.syntax unified\n");
30732 if (TARGET_THUMB)
30734 if (is_called_in_ARM_mode (decl)
30735 || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
30736 && cfun->is_thunk))
30737 fprintf (stream, "\t.code 32\n");
30738 else if (TARGET_THUMB1)
30739 fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
30740 else
30741 fprintf (stream, "\t.thumb\n\t.thumb_func\n");
30743 else
30744 fprintf (stream, "\t.arm\n");
30746 asm_fprintf (asm_out_file, "\t.fpu %s\n",
30747 (TARGET_SOFT_FLOAT
30748 ? "softvfp"
30749 : arm_identify_fpu_from_isa (arm_active_target.isa)));
30751 if (TARGET_POKE_FUNCTION_NAME)
30752 arm_poke_function_name (stream, (const char *) name);
30755 /* If MEM is in the form of [base+offset], extract the two parts
30756 of address and set to BASE and OFFSET, otherwise return false
30757 after clearing BASE and OFFSET. */
30759 static bool
30760 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
30762 rtx addr;
30764 gcc_assert (MEM_P (mem));
30766 addr = XEXP (mem, 0);
30768 /* Strip off const from addresses like (const (addr)). */
30769 if (GET_CODE (addr) == CONST)
30770 addr = XEXP (addr, 0);
30772 if (GET_CODE (addr) == REG)
30774 *base = addr;
30775 *offset = const0_rtx;
30776 return true;
30779 if (GET_CODE (addr) == PLUS
30780 && GET_CODE (XEXP (addr, 0)) == REG
30781 && CONST_INT_P (XEXP (addr, 1)))
30783 *base = XEXP (addr, 0);
30784 *offset = XEXP (addr, 1);
30785 return true;
30788 *base = NULL_RTX;
30789 *offset = NULL_RTX;
30791 return false;
30794 /* If INSN is a load or store of address in the form of [base+offset],
30795 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
30796 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
30797 otherwise return FALSE. */
30799 static bool
30800 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
30802 rtx x, dest, src;
30804 gcc_assert (INSN_P (insn));
30805 x = PATTERN (insn);
30806 if (GET_CODE (x) != SET)
30807 return false;
30809 src = SET_SRC (x);
30810 dest = SET_DEST (x);
30811 if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
30813 *is_load = false;
30814 extract_base_offset_in_addr (dest, base, offset);
30816 else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
30818 *is_load = true;
30819 extract_base_offset_in_addr (src, base, offset);
30821 else
30822 return false;
30824 return (*base != NULL_RTX && *offset != NULL_RTX);
30827 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
30829 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
30830 and PRI are only calculated for these instructions. For other instruction,
30831 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
30832 instruction fusion can be supported by returning different priorities.
30834 It's important that irrelevant instructions get the largest FUSION_PRI. */
30836 static void
30837 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
30838 int *fusion_pri, int *pri)
30840 int tmp, off_val;
30841 bool is_load;
30842 rtx base, offset;
30844 gcc_assert (INSN_P (insn));
30846 tmp = max_pri - 1;
30847 if (!fusion_load_store (insn, &base, &offset, &is_load))
30849 *pri = tmp;
30850 *fusion_pri = tmp;
30851 return;
30854 /* Load goes first. */
30855 if (is_load)
30856 *fusion_pri = tmp - 1;
30857 else
30858 *fusion_pri = tmp - 2;
30860 tmp /= 2;
30862 /* INSN with smaller base register goes first. */
30863 tmp -= ((REGNO (base) & 0xff) << 20);
30865 /* INSN with smaller offset goes first. */
30866 off_val = (int)(INTVAL (offset));
30867 if (off_val >= 0)
30868 tmp -= (off_val & 0xfffff);
30869 else
30870 tmp += ((- off_val) & 0xfffff);
30872 *pri = tmp;
30873 return;
30877 /* Construct and return a PARALLEL RTX vector with elements numbering the
30878 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
30879 the vector - from the perspective of the architecture. This does not
30880 line up with GCC's perspective on lane numbers, so we end up with
30881 different masks depending on our target endian-ness. The diagram
30882 below may help. We must draw the distinction when building masks
30883 which select one half of the vector. An instruction selecting
30884 architectural low-lanes for a big-endian target, must be described using
30885 a mask selecting GCC high-lanes.
30887 Big-Endian Little-Endian
30889 GCC 0 1 2 3 3 2 1 0
30890 | x | x | x | x | | x | x | x | x |
30891 Architecture 3 2 1 0 3 2 1 0
30893 Low Mask: { 2, 3 } { 0, 1 }
30894 High Mask: { 0, 1 } { 2, 3 }
30898 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
30900 int nunits = GET_MODE_NUNITS (mode);
30901 rtvec v = rtvec_alloc (nunits / 2);
30902 int high_base = nunits / 2;
30903 int low_base = 0;
30904 int base;
30905 rtx t1;
30906 int i;
30908 if (BYTES_BIG_ENDIAN)
30909 base = high ? low_base : high_base;
30910 else
30911 base = high ? high_base : low_base;
30913 for (i = 0; i < nunits / 2; i++)
30914 RTVEC_ELT (v, i) = GEN_INT (base + i);
30916 t1 = gen_rtx_PARALLEL (mode, v);
30917 return t1;
30920 /* Check OP for validity as a PARALLEL RTX vector with elements
30921 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
30922 from the perspective of the architecture. See the diagram above
30923 arm_simd_vect_par_cnst_half_p for more details. */
30925 bool
30926 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
30927 bool high)
30929 rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
30930 HOST_WIDE_INT count_op = XVECLEN (op, 0);
30931 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
30932 int i = 0;
30934 if (!VECTOR_MODE_P (mode))
30935 return false;
30937 if (count_op != count_ideal)
30938 return false;
30940 for (i = 0; i < count_ideal; i++)
30942 rtx elt_op = XVECEXP (op, 0, i);
30943 rtx elt_ideal = XVECEXP (ideal, 0, i);
30945 if (!CONST_INT_P (elt_op)
30946 || INTVAL (elt_ideal) != INTVAL (elt_op))
30947 return false;
30949 return true;
30952 /* Can output mi_thunk for all cases except for non-zero vcall_offset
30953 in Thumb1. */
30954 static bool
30955 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
30956 const_tree)
30958 /* For now, we punt and not handle this for TARGET_THUMB1. */
30959 if (vcall_offset && TARGET_THUMB1)
30960 return false;
30962 /* Otherwise ok. */
30963 return true;
30966 /* Generate RTL for a conditional branch with rtx comparison CODE in
30967 mode CC_MODE. The destination of the unlikely conditional branch
30968 is LABEL_REF. */
30970 void
30971 arm_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
30972 rtx label_ref)
30974 rtx x;
30975 x = gen_rtx_fmt_ee (code, VOIDmode,
30976 gen_rtx_REG (cc_mode, CC_REGNUM),
30977 const0_rtx);
30979 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
30980 gen_rtx_LABEL_REF (VOIDmode, label_ref),
30981 pc_rtx);
30982 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
30985 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
30987 For pure-code sections there is no letter code for this attribute, so
30988 output all the section flags numerically when this is needed. */
30990 static bool
30991 arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num)
30994 if (flags & SECTION_ARM_PURECODE)
30996 *num = 0x20000000;
30998 if (!(flags & SECTION_DEBUG))
30999 *num |= 0x2;
31000 if (flags & SECTION_EXCLUDE)
31001 *num |= 0x80000000;
31002 if (flags & SECTION_WRITE)
31003 *num |= 0x1;
31004 if (flags & SECTION_CODE)
31005 *num |= 0x4;
31006 if (flags & SECTION_MERGE)
31007 *num |= 0x10;
31008 if (flags & SECTION_STRINGS)
31009 *num |= 0x20;
31010 if (flags & SECTION_TLS)
31011 *num |= 0x400;
31012 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
31013 *num |= 0x200;
31015 return true;
31018 return false;
31021 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
31023 If pure-code is passed as an option, make sure all functions are in
31024 sections that have the SHF_ARM_PURECODE attribute. */
31026 static section *
31027 arm_function_section (tree decl, enum node_frequency freq,
31028 bool startup, bool exit)
31030 const char * section_name;
31031 section * sec;
31033 if (!decl || TREE_CODE (decl) != FUNCTION_DECL)
31034 return default_function_section (decl, freq, startup, exit);
31036 if (!target_pure_code)
31037 return default_function_section (decl, freq, startup, exit);
31040 section_name = DECL_SECTION_NAME (decl);
31042 /* If a function is not in a named section then it falls under the 'default'
31043 text section, also known as '.text'. We can preserve previous behavior as
31044 the default text section already has the SHF_ARM_PURECODE section
31045 attribute. */
31046 if (!section_name)
31048 section *default_sec = default_function_section (decl, freq, startup,
31049 exit);
31051 /* If default_sec is not null, then it must be a special section like for
31052 example .text.startup. We set the pure-code attribute and return the
31053 same section to preserve existing behavior. */
31054 if (default_sec)
31055 default_sec->common.flags |= SECTION_ARM_PURECODE;
31056 return default_sec;
31059 /* Otherwise look whether a section has already been created with
31060 'section_name'. */
31061 sec = get_named_section (decl, section_name, 0);
31062 if (!sec)
31063 /* If that is not the case passing NULL as the section's name to
31064 'get_named_section' will create a section with the declaration's
31065 section name. */
31066 sec = get_named_section (decl, NULL, 0);
31068 /* Set the SHF_ARM_PURECODE attribute. */
31069 sec->common.flags |= SECTION_ARM_PURECODE;
31071 return sec;
31074 /* Implements the TARGET_SECTION_FLAGS hook.
31076 If DECL is a function declaration and pure-code is passed as an option
31077 then add the SFH_ARM_PURECODE attribute to the section flags. NAME is the
31078 section's name and RELOC indicates whether the declarations initializer may
31079 contain runtime relocations. */
31081 static unsigned int
31082 arm_elf_section_type_flags (tree decl, const char *name, int reloc)
31084 unsigned int flags = default_section_type_flags (decl, name, reloc);
31086 if (decl && TREE_CODE (decl) == FUNCTION_DECL && target_pure_code)
31087 flags |= SECTION_ARM_PURECODE;
31089 return flags;
31092 /* Generate call to __aeabi_[mode]divmod (op0, op1). */
31094 static void
31095 arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
31096 rtx op0, rtx op1,
31097 rtx *quot_p, rtx *rem_p)
31099 if (mode == SImode)
31100 gcc_assert (!TARGET_IDIV);
31102 scalar_int_mode libval_mode
31103 = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode));
31105 rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
31106 libval_mode,
31107 op0, GET_MODE (op0),
31108 op1, GET_MODE (op1));
31110 rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
31111 rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
31112 GET_MODE_SIZE (mode));
31114 gcc_assert (quotient);
31115 gcc_assert (remainder);
31117 *quot_p = quotient;
31118 *rem_p = remainder;
31121 /* This function checks for the availability of the coprocessor builtin passed
31122 in BUILTIN for the current target. Returns true if it is available and
31123 false otherwise. If a BUILTIN is passed for which this function has not
31124 been implemented it will cause an exception. */
31126 bool
31127 arm_coproc_builtin_available (enum unspecv builtin)
31129 /* None of these builtins are available in Thumb mode if the target only
31130 supports Thumb-1. */
31131 if (TARGET_THUMB1)
31132 return false;
31134 switch (builtin)
31136 case VUNSPEC_CDP:
31137 case VUNSPEC_LDC:
31138 case VUNSPEC_LDCL:
31139 case VUNSPEC_STC:
31140 case VUNSPEC_STCL:
31141 case VUNSPEC_MCR:
31142 case VUNSPEC_MRC:
31143 if (arm_arch4)
31144 return true;
31145 break;
31146 case VUNSPEC_CDP2:
31147 case VUNSPEC_LDC2:
31148 case VUNSPEC_LDC2L:
31149 case VUNSPEC_STC2:
31150 case VUNSPEC_STC2L:
31151 case VUNSPEC_MCR2:
31152 case VUNSPEC_MRC2:
31153 /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
31154 ARMv8-{A,M}. */
31155 if (arm_arch5)
31156 return true;
31157 break;
31158 case VUNSPEC_MCRR:
31159 case VUNSPEC_MRRC:
31160 /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
31161 ARMv8-{A,M}. */
31162 if (arm_arch6 || arm_arch5te)
31163 return true;
31164 break;
31165 case VUNSPEC_MCRR2:
31166 case VUNSPEC_MRRC2:
31167 if (arm_arch6)
31168 return true;
31169 break;
31170 default:
31171 gcc_unreachable ();
31173 return false;
31176 /* This function returns true if OP is a valid memory operand for the ldc and
31177 stc coprocessor instructions and false otherwise. */
31179 bool
31180 arm_coproc_ldc_stc_legitimate_address (rtx op)
31182 HOST_WIDE_INT range;
31183 /* Has to be a memory operand. */
31184 if (!MEM_P (op))
31185 return false;
31187 op = XEXP (op, 0);
31189 /* We accept registers. */
31190 if (REG_P (op))
31191 return true;
31193 switch GET_CODE (op)
31195 case PLUS:
31197 /* Or registers with an offset. */
31198 if (!REG_P (XEXP (op, 0)))
31199 return false;
31201 op = XEXP (op, 1);
31203 /* The offset must be an immediate though. */
31204 if (!CONST_INT_P (op))
31205 return false;
31207 range = INTVAL (op);
31209 /* Within the range of [-1020,1020]. */
31210 if (!IN_RANGE (range, -1020, 1020))
31211 return false;
31213 /* And a multiple of 4. */
31214 return (range % 4) == 0;
31216 case PRE_INC:
31217 case POST_INC:
31218 case PRE_DEC:
31219 case POST_DEC:
31220 return REG_P (XEXP (op, 0));
31221 default:
31222 gcc_unreachable ();
31224 return false;
31227 #if CHECKING_P
31228 namespace selftest {
31230 /* Scan the static data tables generated by parsecpu.awk looking for
31231 potential issues with the data. We primarily check for
31232 inconsistencies in the option extensions at present (extensions
31233 that duplicate others but aren't marked as aliases). Furthermore,
31234 for correct canonicalization later options must never be a subset
31235 of an earlier option. Any extension should also only specify other
31236 feature bits and never an architecture bit. The architecture is inferred
31237 from the declaration of the extension. */
31238 static void
31239 arm_test_cpu_arch_data (void)
31241 const arch_option *arch;
31242 const cpu_option *cpu;
31243 auto_sbitmap target_isa (isa_num_bits);
31244 auto_sbitmap isa1 (isa_num_bits);
31245 auto_sbitmap isa2 (isa_num_bits);
31247 for (arch = all_architectures; arch->common.name != NULL; ++arch)
31249 const cpu_arch_extension *ext1, *ext2;
31251 if (arch->common.extensions == NULL)
31252 continue;
31254 arm_initialize_isa (target_isa, arch->common.isa_bits);
31256 for (ext1 = arch->common.extensions; ext1->name != NULL; ++ext1)
31258 if (ext1->alias)
31259 continue;
31261 arm_initialize_isa (isa1, ext1->isa_bits);
31262 for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
31264 if (ext2->alias || ext1->remove != ext2->remove)
31265 continue;
31267 arm_initialize_isa (isa2, ext2->isa_bits);
31268 /* If the option is a subset of the parent option, it doesn't
31269 add anything and so isn't useful. */
31270 ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
31272 /* If the extension specifies any architectural bits then
31273 disallow it. Extensions should only specify feature bits. */
31274 ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
31279 for (cpu = all_cores; cpu->common.name != NULL; ++cpu)
31281 const cpu_arch_extension *ext1, *ext2;
31283 if (cpu->common.extensions == NULL)
31284 continue;
31286 arm_initialize_isa (target_isa, arch->common.isa_bits);
31288 for (ext1 = cpu->common.extensions; ext1->name != NULL; ++ext1)
31290 if (ext1->alias)
31291 continue;
31293 arm_initialize_isa (isa1, ext1->isa_bits);
31294 for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
31296 if (ext2->alias || ext1->remove != ext2->remove)
31297 continue;
31299 arm_initialize_isa (isa2, ext2->isa_bits);
31300 /* If the option is a subset of the parent option, it doesn't
31301 add anything and so isn't useful. */
31302 ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
31304 /* If the extension specifies any architectural bits then
31305 disallow it. Extensions should only specify feature bits. */
31306 ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
31312 static void
31313 arm_run_selftests (void)
31315 arm_test_cpu_arch_data ();
31317 } /* Namespace selftest. */
31319 #undef TARGET_RUN_TARGET_SELFTESTS
31320 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
31321 #endif /* CHECKING_P */
31323 struct gcc_target targetm = TARGET_INITIALIZER;
31325 #include "gt-arm.h"