[ARM] PR71607: Fix ICE when loading constant
[official-gcc.git] / gcc / config / arm / arm.c
blob55bfcd227888df67ea1945369169c2c4ff0f467a
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2017 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "target.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "memmodel.h"
31 #include "cfghooks.h"
32 #include "df.h"
33 #include "tm_p.h"
34 #include "stringpool.h"
35 #include "optabs.h"
36 #include "regs.h"
37 #include "emit-rtl.h"
38 #include "recog.h"
39 #include "cgraph.h"
40 #include "diagnostic-core.h"
41 #include "alias.h"
42 #include "fold-const.h"
43 #include "stor-layout.h"
44 #include "calls.h"
45 #include "varasm.h"
46 #include "output.h"
47 #include "insn-attr.h"
48 #include "flags.h"
49 #include "reload.h"
50 #include "explow.h"
51 #include "expr.h"
52 #include "cfgrtl.h"
53 #include "sched-int.h"
54 #include "common/common-target.h"
55 #include "langhooks.h"
56 #include "intl.h"
57 #include "libfuncs.h"
58 #include "params.h"
59 #include "opts.h"
60 #include "dumpfile.h"
61 #include "target-globals.h"
62 #include "builtins.h"
63 #include "tm-constrs.h"
64 #include "rtl-iter.h"
65 #include "optabs-libfuncs.h"
66 #include "gimplify.h"
67 #include "gimple.h"
69 /* This file should be included last. */
70 #include "target-def.h"
72 /* Forward definitions of types. */
73 typedef struct minipool_node Mnode;
74 typedef struct minipool_fixup Mfix;
76 void (*arm_lang_output_object_attributes_hook)(void);
78 struct four_ints
80 int i[4];
83 /* Forward function declarations. */
84 static bool arm_const_not_ok_for_debug_p (rtx);
85 static int arm_needs_doubleword_align (machine_mode, const_tree);
86 static int arm_compute_static_chain_stack_bytes (void);
87 static arm_stack_offsets *arm_get_frame_offsets (void);
88 static void arm_add_gc_roots (void);
89 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
90 unsigned HOST_WIDE_INT, rtx, rtx, int, int);
91 static unsigned bit_count (unsigned long);
92 static unsigned bitmap_popcount (const sbitmap);
93 static int arm_address_register_rtx_p (rtx, int);
94 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
95 static bool is_called_in_ARM_mode (tree);
96 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
97 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
98 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
99 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
100 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
101 inline static int thumb1_index_register_rtx_p (rtx, int);
102 static int thumb_far_jump_used_p (void);
103 static bool thumb_force_lr_save (void);
104 static unsigned arm_size_return_regs (void);
105 static bool arm_assemble_integer (rtx, unsigned int, int);
106 static void arm_print_operand (FILE *, rtx, int);
107 static void arm_print_operand_address (FILE *, machine_mode, rtx);
108 static bool arm_print_operand_punct_valid_p (unsigned char code);
109 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
110 static arm_cc get_arm_condition_code (rtx);
111 static const char *output_multi_immediate (rtx *, const char *, const char *,
112 int, HOST_WIDE_INT);
113 static const char *shift_op (rtx, HOST_WIDE_INT *);
114 static struct machine_function *arm_init_machine_status (void);
115 static void thumb_exit (FILE *, int);
116 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
117 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
118 static Mnode *add_minipool_forward_ref (Mfix *);
119 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
120 static Mnode *add_minipool_backward_ref (Mfix *);
121 static void assign_minipool_offsets (Mfix *);
122 static void arm_print_value (FILE *, rtx);
123 static void dump_minipool (rtx_insn *);
124 static int arm_barrier_cost (rtx_insn *);
125 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
126 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
127 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
128 machine_mode, rtx);
129 static void arm_reorg (void);
130 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
131 static unsigned long arm_compute_save_reg0_reg12_mask (void);
132 static unsigned long arm_compute_save_reg_mask (void);
133 static unsigned long arm_isr_value (tree);
134 static unsigned long arm_compute_func_type (void);
135 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
136 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
137 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
138 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
139 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
140 #endif
141 static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *);
142 static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree, int, bool *);
143 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
144 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
145 static int arm_comp_type_attributes (const_tree, const_tree);
146 static void arm_set_default_type_attributes (tree);
147 static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
148 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
149 static int optimal_immediate_sequence (enum rtx_code code,
150 unsigned HOST_WIDE_INT val,
151 struct four_ints *return_sequence);
152 static int optimal_immediate_sequence_1 (enum rtx_code code,
153 unsigned HOST_WIDE_INT val,
154 struct four_ints *return_sequence,
155 int i);
156 static int arm_get_strip_length (int);
157 static bool arm_function_ok_for_sibcall (tree, tree);
158 static machine_mode arm_promote_function_mode (const_tree,
159 machine_mode, int *,
160 const_tree, int);
161 static bool arm_return_in_memory (const_tree, const_tree);
162 static rtx arm_function_value (const_tree, const_tree, bool);
163 static rtx arm_libcall_value_1 (machine_mode);
164 static rtx arm_libcall_value (machine_mode, const_rtx);
165 static bool arm_function_value_regno_p (const unsigned int);
166 static void arm_internal_label (FILE *, const char *, unsigned long);
167 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
168 tree);
169 static bool arm_have_conditional_execution (void);
170 static bool arm_cannot_force_const_mem (machine_mode, rtx);
171 static bool arm_legitimate_constant_p (machine_mode, rtx);
172 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
173 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
174 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
175 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
176 static void emit_constant_insn (rtx cond, rtx pattern);
177 static rtx_insn *emit_set_insn (rtx, rtx);
178 static rtx emit_multi_reg_push (unsigned long, unsigned long);
179 static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
180 tree, bool);
181 static rtx arm_function_arg (cumulative_args_t, machine_mode,
182 const_tree, bool);
183 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
184 const_tree, bool);
185 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
186 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
187 const_tree);
188 static rtx aapcs_libcall_value (machine_mode);
189 static int aapcs_select_return_coproc (const_tree, const_tree);
191 #ifdef OBJECT_FORMAT_ELF
192 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
193 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
194 #endif
195 #ifndef ARM_PE
196 static void arm_encode_section_info (tree, rtx, int);
197 #endif
199 static void arm_file_end (void);
200 static void arm_file_start (void);
201 static void arm_insert_attributes (tree, tree *);
203 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
204 tree, int *, int);
205 static bool arm_pass_by_reference (cumulative_args_t,
206 machine_mode, const_tree, bool);
207 static bool arm_promote_prototypes (const_tree);
208 static bool arm_default_short_enums (void);
209 static bool arm_align_anon_bitfield (void);
210 static bool arm_return_in_msb (const_tree);
211 static bool arm_must_pass_in_stack (machine_mode, const_tree);
212 static bool arm_return_in_memory (const_tree, const_tree);
213 #if ARM_UNWIND_INFO
214 static void arm_unwind_emit (FILE *, rtx_insn *);
215 static bool arm_output_ttype (rtx);
216 static void arm_asm_emit_except_personality (rtx);
217 #endif
218 static void arm_asm_init_sections (void);
219 static rtx arm_dwarf_register_span (rtx);
221 static tree arm_cxx_guard_type (void);
222 static bool arm_cxx_guard_mask_bit (void);
223 static tree arm_get_cookie_size (tree);
224 static bool arm_cookie_has_size (void);
225 static bool arm_cxx_cdtor_returns_this (void);
226 static bool arm_cxx_key_method_may_be_inline (void);
227 static void arm_cxx_determine_class_data_visibility (tree);
228 static bool arm_cxx_class_data_always_comdat (void);
229 static bool arm_cxx_use_aeabi_atexit (void);
230 static void arm_init_libfuncs (void);
231 static tree arm_build_builtin_va_list (void);
232 static void arm_expand_builtin_va_start (tree, rtx);
233 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
234 static void arm_option_override (void);
235 static void arm_option_restore (struct gcc_options *,
236 struct cl_target_option *);
237 static void arm_override_options_after_change (void);
238 static void arm_option_print (FILE *, int, struct cl_target_option *);
239 static void arm_set_current_function (tree);
240 static bool arm_can_inline_p (tree, tree);
241 static void arm_relayout_function (tree);
242 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
243 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
244 static bool arm_sched_can_speculate_insn (rtx_insn *);
245 static bool arm_macro_fusion_p (void);
246 static bool arm_cannot_copy_insn_p (rtx_insn *);
247 static int arm_issue_rate (void);
248 static int arm_first_cycle_multipass_dfa_lookahead (void);
249 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
250 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
251 static bool arm_output_addr_const_extra (FILE *, rtx);
252 static bool arm_allocate_stack_slots_for_args (void);
253 static bool arm_warn_func_return (tree);
254 static tree arm_promoted_type (const_tree t);
255 static bool arm_scalar_mode_supported_p (machine_mode);
256 static bool arm_frame_pointer_required (void);
257 static bool arm_can_eliminate (const int, const int);
258 static void arm_asm_trampoline_template (FILE *);
259 static void arm_trampoline_init (rtx, tree, rtx);
260 static rtx arm_trampoline_adjust_address (rtx);
261 static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg);
262 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
263 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
264 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
265 static bool arm_array_mode_supported_p (machine_mode,
266 unsigned HOST_WIDE_INT);
267 static machine_mode arm_preferred_simd_mode (machine_mode);
268 static bool arm_class_likely_spilled_p (reg_class_t);
269 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
270 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
271 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
272 const_tree type,
273 int misalignment,
274 bool is_packed);
275 static void arm_conditional_register_usage (void);
276 static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
277 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
278 static unsigned int arm_autovectorize_vector_sizes (void);
279 static int arm_default_branch_cost (bool, bool);
280 static int arm_cortex_a5_branch_cost (bool, bool);
281 static int arm_cortex_m_branch_cost (bool, bool);
282 static int arm_cortex_m7_branch_cost (bool, bool);
284 static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode,
285 const unsigned char *sel);
287 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
289 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
290 tree vectype,
291 int misalign ATTRIBUTE_UNUSED);
292 static unsigned arm_add_stmt_cost (void *data, int count,
293 enum vect_cost_for_stmt kind,
294 struct _stmt_vec_info *stmt_info,
295 int misalign,
296 enum vect_cost_model_location where);
298 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
299 bool op0_preserve_value);
300 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
302 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
303 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
304 const_tree);
305 static section *arm_function_section (tree, enum node_frequency, bool, bool);
306 static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num);
307 static unsigned int arm_elf_section_type_flags (tree decl, const char *name,
308 int reloc);
309 static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *);
310 static machine_mode arm_floatn_mode (int, bool);
312 /* Table of machine attributes. */
313 static const struct attribute_spec arm_attribute_table[] =
315 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
316 affects_type_identity } */
317 /* Function calls made to this symbol must be done indirectly, because
318 it may lie outside of the 26 bit addressing range of a normal function
319 call. */
320 { "long_call", 0, 0, false, true, true, NULL, false },
321 /* Whereas these functions are always known to reside within the 26 bit
322 addressing range. */
323 { "short_call", 0, 0, false, true, true, NULL, false },
324 /* Specify the procedure call conventions for a function. */
325 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
326 false },
327 /* Interrupt Service Routines have special prologue and epilogue requirements. */
328 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
329 false },
330 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
331 false },
332 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
333 false },
334 #ifdef ARM_PE
335 /* ARM/PE has three new attributes:
336 interfacearm - ?
337 dllexport - for exporting a function/variable that will live in a dll
338 dllimport - for importing a function/variable from a dll
340 Microsoft allows multiple declspecs in one __declspec, separating
341 them with spaces. We do NOT support this. Instead, use __declspec
342 multiple times.
344 { "dllimport", 0, 0, true, false, false, NULL, false },
345 { "dllexport", 0, 0, true, false, false, NULL, false },
346 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
347 false },
348 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
349 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
350 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
351 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
352 false },
353 #endif
354 /* ARMv8-M Security Extensions support. */
355 { "cmse_nonsecure_entry", 0, 0, true, false, false,
356 arm_handle_cmse_nonsecure_entry, false },
357 { "cmse_nonsecure_call", 0, 0, true, false, false,
358 arm_handle_cmse_nonsecure_call, true },
359 { NULL, 0, 0, false, false, false, NULL, false }
362 /* Initialize the GCC target structure. */
363 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
364 #undef TARGET_MERGE_DECL_ATTRIBUTES
365 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
366 #endif
368 #undef TARGET_LEGITIMIZE_ADDRESS
369 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
371 #undef TARGET_ATTRIBUTE_TABLE
372 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
374 #undef TARGET_INSERT_ATTRIBUTES
375 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
377 #undef TARGET_ASM_FILE_START
378 #define TARGET_ASM_FILE_START arm_file_start
379 #undef TARGET_ASM_FILE_END
380 #define TARGET_ASM_FILE_END arm_file_end
382 #undef TARGET_ASM_ALIGNED_SI_OP
383 #define TARGET_ASM_ALIGNED_SI_OP NULL
384 #undef TARGET_ASM_INTEGER
385 #define TARGET_ASM_INTEGER arm_assemble_integer
387 #undef TARGET_PRINT_OPERAND
388 #define TARGET_PRINT_OPERAND arm_print_operand
389 #undef TARGET_PRINT_OPERAND_ADDRESS
390 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
391 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
392 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
394 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
395 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
397 #undef TARGET_ASM_FUNCTION_PROLOGUE
398 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
400 #undef TARGET_ASM_FUNCTION_EPILOGUE
401 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
403 #undef TARGET_CAN_INLINE_P
404 #define TARGET_CAN_INLINE_P arm_can_inline_p
406 #undef TARGET_RELAYOUT_FUNCTION
407 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
409 #undef TARGET_OPTION_OVERRIDE
410 #define TARGET_OPTION_OVERRIDE arm_option_override
412 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
413 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
415 #undef TARGET_OPTION_RESTORE
416 #define TARGET_OPTION_RESTORE arm_option_restore
418 #undef TARGET_OPTION_PRINT
419 #define TARGET_OPTION_PRINT arm_option_print
421 #undef TARGET_COMP_TYPE_ATTRIBUTES
422 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
424 #undef TARGET_SCHED_CAN_SPECULATE_INSN
425 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
427 #undef TARGET_SCHED_MACRO_FUSION_P
428 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
430 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
431 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
433 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
434 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
436 #undef TARGET_SCHED_ADJUST_COST
437 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
439 #undef TARGET_SET_CURRENT_FUNCTION
440 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
442 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
443 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
445 #undef TARGET_SCHED_REORDER
446 #define TARGET_SCHED_REORDER arm_sched_reorder
448 #undef TARGET_REGISTER_MOVE_COST
449 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
451 #undef TARGET_MEMORY_MOVE_COST
452 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
454 #undef TARGET_ENCODE_SECTION_INFO
455 #ifdef ARM_PE
456 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
457 #else
458 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
459 #endif
461 #undef TARGET_STRIP_NAME_ENCODING
462 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
464 #undef TARGET_ASM_INTERNAL_LABEL
465 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
467 #undef TARGET_FLOATN_MODE
468 #define TARGET_FLOATN_MODE arm_floatn_mode
470 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
471 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
473 #undef TARGET_FUNCTION_VALUE
474 #define TARGET_FUNCTION_VALUE arm_function_value
476 #undef TARGET_LIBCALL_VALUE
477 #define TARGET_LIBCALL_VALUE arm_libcall_value
479 #undef TARGET_FUNCTION_VALUE_REGNO_P
480 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
482 #undef TARGET_ASM_OUTPUT_MI_THUNK
483 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
484 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
485 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
487 #undef TARGET_RTX_COSTS
488 #define TARGET_RTX_COSTS arm_rtx_costs
489 #undef TARGET_ADDRESS_COST
490 #define TARGET_ADDRESS_COST arm_address_cost
492 #undef TARGET_SHIFT_TRUNCATION_MASK
493 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
494 #undef TARGET_VECTOR_MODE_SUPPORTED_P
495 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
496 #undef TARGET_ARRAY_MODE_SUPPORTED_P
497 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
498 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
499 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
500 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
501 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
502 arm_autovectorize_vector_sizes
504 #undef TARGET_MACHINE_DEPENDENT_REORG
505 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
507 #undef TARGET_INIT_BUILTINS
508 #define TARGET_INIT_BUILTINS arm_init_builtins
509 #undef TARGET_EXPAND_BUILTIN
510 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
511 #undef TARGET_BUILTIN_DECL
512 #define TARGET_BUILTIN_DECL arm_builtin_decl
514 #undef TARGET_INIT_LIBFUNCS
515 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
517 #undef TARGET_PROMOTE_FUNCTION_MODE
518 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
519 #undef TARGET_PROMOTE_PROTOTYPES
520 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
521 #undef TARGET_PASS_BY_REFERENCE
522 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
523 #undef TARGET_ARG_PARTIAL_BYTES
524 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
525 #undef TARGET_FUNCTION_ARG
526 #define TARGET_FUNCTION_ARG arm_function_arg
527 #undef TARGET_FUNCTION_ARG_ADVANCE
528 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
529 #undef TARGET_FUNCTION_ARG_BOUNDARY
530 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
532 #undef TARGET_SETUP_INCOMING_VARARGS
533 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
535 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
536 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
538 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
539 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
540 #undef TARGET_TRAMPOLINE_INIT
541 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
542 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
543 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
545 #undef TARGET_WARN_FUNC_RETURN
546 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
548 #undef TARGET_DEFAULT_SHORT_ENUMS
549 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
551 #undef TARGET_ALIGN_ANON_BITFIELD
552 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
554 #undef TARGET_NARROW_VOLATILE_BITFIELD
555 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
557 #undef TARGET_CXX_GUARD_TYPE
558 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
560 #undef TARGET_CXX_GUARD_MASK_BIT
561 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
563 #undef TARGET_CXX_GET_COOKIE_SIZE
564 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
566 #undef TARGET_CXX_COOKIE_HAS_SIZE
567 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
569 #undef TARGET_CXX_CDTOR_RETURNS_THIS
570 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
572 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
573 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
575 #undef TARGET_CXX_USE_AEABI_ATEXIT
576 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
578 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
579 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
580 arm_cxx_determine_class_data_visibility
582 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
583 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
585 #undef TARGET_RETURN_IN_MSB
586 #define TARGET_RETURN_IN_MSB arm_return_in_msb
588 #undef TARGET_RETURN_IN_MEMORY
589 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
591 #undef TARGET_MUST_PASS_IN_STACK
592 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
594 #if ARM_UNWIND_INFO
595 #undef TARGET_ASM_UNWIND_EMIT
596 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
598 /* EABI unwinding tables use a different format for the typeinfo tables. */
599 #undef TARGET_ASM_TTYPE
600 #define TARGET_ASM_TTYPE arm_output_ttype
602 #undef TARGET_ARM_EABI_UNWINDER
603 #define TARGET_ARM_EABI_UNWINDER true
605 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
606 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
608 #endif /* ARM_UNWIND_INFO */
610 #undef TARGET_ASM_INIT_SECTIONS
611 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
613 #undef TARGET_DWARF_REGISTER_SPAN
614 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
616 #undef TARGET_CANNOT_COPY_INSN_P
617 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
619 #ifdef HAVE_AS_TLS
620 #undef TARGET_HAVE_TLS
621 #define TARGET_HAVE_TLS true
622 #endif
624 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
625 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
627 #undef TARGET_LEGITIMATE_CONSTANT_P
628 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
630 #undef TARGET_CANNOT_FORCE_CONST_MEM
631 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
633 #undef TARGET_MAX_ANCHOR_OFFSET
634 #define TARGET_MAX_ANCHOR_OFFSET 4095
636 /* The minimum is set such that the total size of the block
637 for a particular anchor is -4088 + 1 + 4095 bytes, which is
638 divisible by eight, ensuring natural spacing of anchors. */
639 #undef TARGET_MIN_ANCHOR_OFFSET
640 #define TARGET_MIN_ANCHOR_OFFSET -4088
642 #undef TARGET_SCHED_ISSUE_RATE
643 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
645 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
646 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
647 arm_first_cycle_multipass_dfa_lookahead
649 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
650 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
651 arm_first_cycle_multipass_dfa_lookahead_guard
653 #undef TARGET_MANGLE_TYPE
654 #define TARGET_MANGLE_TYPE arm_mangle_type
656 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
657 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
659 #undef TARGET_BUILD_BUILTIN_VA_LIST
660 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
661 #undef TARGET_EXPAND_BUILTIN_VA_START
662 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
663 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
664 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
666 #ifdef HAVE_AS_TLS
667 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
668 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
669 #endif
671 #undef TARGET_LEGITIMATE_ADDRESS_P
672 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
674 #undef TARGET_PREFERRED_RELOAD_CLASS
675 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
677 #undef TARGET_PROMOTED_TYPE
678 #define TARGET_PROMOTED_TYPE arm_promoted_type
680 #undef TARGET_SCALAR_MODE_SUPPORTED_P
681 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
683 #undef TARGET_FRAME_POINTER_REQUIRED
684 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
686 #undef TARGET_CAN_ELIMINATE
687 #define TARGET_CAN_ELIMINATE arm_can_eliminate
689 #undef TARGET_CONDITIONAL_REGISTER_USAGE
690 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
692 #undef TARGET_CLASS_LIKELY_SPILLED_P
693 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
695 #undef TARGET_VECTORIZE_BUILTINS
696 #define TARGET_VECTORIZE_BUILTINS
698 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
699 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
700 arm_builtin_vectorized_function
702 #undef TARGET_VECTOR_ALIGNMENT
703 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
705 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
706 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
707 arm_vector_alignment_reachable
709 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
710 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
711 arm_builtin_support_vector_misalignment
713 #undef TARGET_PREFERRED_RENAME_CLASS
714 #define TARGET_PREFERRED_RENAME_CLASS \
715 arm_preferred_rename_class
717 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
718 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
719 arm_vectorize_vec_perm_const_ok
721 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
722 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
723 arm_builtin_vectorization_cost
724 #undef TARGET_VECTORIZE_ADD_STMT_COST
725 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
727 #undef TARGET_CANONICALIZE_COMPARISON
728 #define TARGET_CANONICALIZE_COMPARISON \
729 arm_canonicalize_comparison
731 #undef TARGET_ASAN_SHADOW_OFFSET
732 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
734 #undef MAX_INSN_PER_IT_BLOCK
735 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
737 #undef TARGET_CAN_USE_DOLOOP_P
738 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
740 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
741 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
743 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
744 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
746 #undef TARGET_SCHED_FUSION_PRIORITY
747 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
749 #undef TARGET_ASM_FUNCTION_SECTION
750 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
752 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
753 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
755 #undef TARGET_SECTION_TYPE_FLAGS
756 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
758 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
759 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
761 #undef TARGET_C_EXCESS_PRECISION
762 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
764 /* Although the architecture reserves bits 0 and 1, only the former is
765 used for ARM/Thumb ISA selection in v7 and earlier versions. */
766 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
767 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
769 struct gcc_target targetm = TARGET_INITIALIZER;
771 /* Obstack for minipool constant handling. */
772 static struct obstack minipool_obstack;
773 static char * minipool_startobj;
775 /* The maximum number of insns skipped which
776 will be conditionalised if possible. */
777 static int max_insns_skipped = 5;
779 extern FILE * asm_out_file;
781 /* True if we are currently building a constant table. */
782 int making_const_table;
784 /* The processor for which instructions should be scheduled. */
785 enum processor_type arm_tune = TARGET_CPU_arm_none;
787 /* The current tuning set. */
788 const struct tune_params *current_tune;
790 /* Which floating point hardware to schedule for. */
791 int arm_fpu_attr;
793 /* Used for Thumb call_via trampolines. */
794 rtx thumb_call_via_label[14];
795 static int thumb_call_reg_needed;
797 /* The bits in this mask specify which instruction scheduling options should
798 be used. */
799 unsigned int tune_flags = 0;
801 /* The highest ARM architecture version supported by the
802 target. */
803 enum base_architecture arm_base_arch = BASE_ARCH_0;
805 /* Active target architecture and tuning. */
807 struct arm_build_target arm_active_target;
809 /* The following are used in the arm.md file as equivalents to bits
810 in the above two flag variables. */
812 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
813 int arm_arch3m = 0;
815 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
816 int arm_arch4 = 0;
818 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
819 int arm_arch4t = 0;
821 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
822 int arm_arch5 = 0;
824 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
825 int arm_arch5e = 0;
827 /* Nonzero if this chip supports the ARM Architecture 5TE extensions. */
828 int arm_arch5te = 0;
830 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
831 int arm_arch6 = 0;
833 /* Nonzero if this chip supports the ARM 6K extensions. */
834 int arm_arch6k = 0;
836 /* Nonzero if this chip supports the ARM 6KZ extensions. */
837 int arm_arch6kz = 0;
839 /* Nonzero if instructions present in ARMv6-M can be used. */
840 int arm_arch6m = 0;
842 /* Nonzero if this chip supports the ARM 7 extensions. */
843 int arm_arch7 = 0;
845 /* Nonzero if this chip supports the Large Physical Address Extension. */
846 int arm_arch_lpae = 0;
848 /* Nonzero if instructions not present in the 'M' profile can be used. */
849 int arm_arch_notm = 0;
851 /* Nonzero if instructions present in ARMv7E-M can be used. */
852 int arm_arch7em = 0;
854 /* Nonzero if instructions present in ARMv8 can be used. */
855 int arm_arch8 = 0;
857 /* Nonzero if this chip supports the ARMv8.1 extensions. */
858 int arm_arch8_1 = 0;
860 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */
861 int arm_arch8_2 = 0;
863 /* Nonzero if this chip supports the FP16 instructions extension of ARM
864 Architecture 8.2. */
865 int arm_fp16_inst = 0;
867 /* Nonzero if this chip can benefit from load scheduling. */
868 int arm_ld_sched = 0;
870 /* Nonzero if this chip is a StrongARM. */
871 int arm_tune_strongarm = 0;
873 /* Nonzero if this chip supports Intel Wireless MMX technology. */
874 int arm_arch_iwmmxt = 0;
876 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
877 int arm_arch_iwmmxt2 = 0;
879 /* Nonzero if this chip is an XScale. */
880 int arm_arch_xscale = 0;
882 /* Nonzero if tuning for XScale */
883 int arm_tune_xscale = 0;
885 /* Nonzero if we want to tune for stores that access the write-buffer.
886 This typically means an ARM6 or ARM7 with MMU or MPU. */
887 int arm_tune_wbuf = 0;
889 /* Nonzero if tuning for Cortex-A9. */
890 int arm_tune_cortex_a9 = 0;
892 /* Nonzero if we should define __THUMB_INTERWORK__ in the
893 preprocessor.
894 XXX This is a bit of a hack, it's intended to help work around
895 problems in GLD which doesn't understand that armv5t code is
896 interworking clean. */
897 int arm_cpp_interwork = 0;
899 /* Nonzero if chip supports Thumb 1. */
900 int arm_arch_thumb1;
902 /* Nonzero if chip supports Thumb 2. */
903 int arm_arch_thumb2;
905 /* Nonzero if chip supports integer division instruction. */
906 int arm_arch_arm_hwdiv;
907 int arm_arch_thumb_hwdiv;
909 /* Nonzero if chip disallows volatile memory access in IT block. */
910 int arm_arch_no_volatile_ce;
912 /* Nonzero if we should use Neon to handle 64-bits operations rather
913 than core registers. */
914 int prefer_neon_for_64bits = 0;
916 /* Nonzero if we shouldn't use literal pools. */
917 bool arm_disable_literal_pool = false;
919 /* The register number to be used for the PIC offset register. */
920 unsigned arm_pic_register = INVALID_REGNUM;
922 enum arm_pcs arm_pcs_default;
924 /* For an explanation of these variables, see final_prescan_insn below. */
925 int arm_ccfsm_state;
926 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
927 enum arm_cond_code arm_current_cc;
929 rtx arm_target_insn;
930 int arm_target_label;
931 /* The number of conditionally executed insns, including the current insn. */
932 int arm_condexec_count = 0;
933 /* A bitmask specifying the patterns for the IT block.
934 Zero means do not output an IT block before this insn. */
935 int arm_condexec_mask = 0;
936 /* The number of bits used in arm_condexec_mask. */
937 int arm_condexec_masklen = 0;
939 /* Nonzero if chip supports the ARMv8 CRC instructions. */
940 int arm_arch_crc = 0;
942 /* Nonzero if chip supports the ARMv8-M security extensions. */
943 int arm_arch_cmse = 0;
945 /* Nonzero if the core has a very small, high-latency, multiply unit. */
946 int arm_m_profile_small_mul = 0;
948 /* The condition codes of the ARM, and the inverse function. */
949 static const char * const arm_condition_codes[] =
951 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
952 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
955 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
956 int arm_regs_in_sequence[] =
958 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
961 #define ARM_LSL_NAME "lsl"
962 #define streq(string1, string2) (strcmp (string1, string2) == 0)
964 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
965 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
966 | (1 << PIC_OFFSET_TABLE_REGNUM)))
968 /* Initialization code. */
970 struct processors
972 const char *const name;
973 enum processor_type core;
974 unsigned int tune_flags;
975 const char *arch;
976 enum base_architecture base_arch;
977 enum isa_feature isa_bits[isa_num_bits];
978 const struct tune_params *const tune;
982 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
983 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
985 num_slots, \
986 l1_size, \
987 l1_line_size \
990 /* arm generic vectorizer costs. */
991 static const
992 struct cpu_vec_costs arm_default_vec_cost = {
993 1, /* scalar_stmt_cost. */
994 1, /* scalar load_cost. */
995 1, /* scalar_store_cost. */
996 1, /* vec_stmt_cost. */
997 1, /* vec_to_scalar_cost. */
998 1, /* scalar_to_vec_cost. */
999 1, /* vec_align_load_cost. */
1000 1, /* vec_unalign_load_cost. */
1001 1, /* vec_unalign_store_cost. */
1002 1, /* vec_store_cost. */
1003 3, /* cond_taken_branch_cost. */
1004 1, /* cond_not_taken_branch_cost. */
1007 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
1008 #include "aarch-cost-tables.h"
1012 const struct cpu_cost_table cortexa9_extra_costs =
1014 /* ALU */
1016 0, /* arith. */
1017 0, /* logical. */
1018 0, /* shift. */
1019 COSTS_N_INSNS (1), /* shift_reg. */
1020 COSTS_N_INSNS (1), /* arith_shift. */
1021 COSTS_N_INSNS (2), /* arith_shift_reg. */
1022 0, /* log_shift. */
1023 COSTS_N_INSNS (1), /* log_shift_reg. */
1024 COSTS_N_INSNS (1), /* extend. */
1025 COSTS_N_INSNS (2), /* extend_arith. */
1026 COSTS_N_INSNS (1), /* bfi. */
1027 COSTS_N_INSNS (1), /* bfx. */
1028 0, /* clz. */
1029 0, /* rev. */
1030 0, /* non_exec. */
1031 true /* non_exec_costs_exec. */
1034 /* MULT SImode */
1036 COSTS_N_INSNS (3), /* simple. */
1037 COSTS_N_INSNS (3), /* flag_setting. */
1038 COSTS_N_INSNS (2), /* extend. */
1039 COSTS_N_INSNS (3), /* add. */
1040 COSTS_N_INSNS (2), /* extend_add. */
1041 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1043 /* MULT DImode */
1045 0, /* simple (N/A). */
1046 0, /* flag_setting (N/A). */
1047 COSTS_N_INSNS (4), /* extend. */
1048 0, /* add (N/A). */
1049 COSTS_N_INSNS (4), /* extend_add. */
1050 0 /* idiv (N/A). */
1053 /* LD/ST */
1055 COSTS_N_INSNS (2), /* load. */
1056 COSTS_N_INSNS (2), /* load_sign_extend. */
1057 COSTS_N_INSNS (2), /* ldrd. */
1058 COSTS_N_INSNS (2), /* ldm_1st. */
1059 1, /* ldm_regs_per_insn_1st. */
1060 2, /* ldm_regs_per_insn_subsequent. */
1061 COSTS_N_INSNS (5), /* loadf. */
1062 COSTS_N_INSNS (5), /* loadd. */
1063 COSTS_N_INSNS (1), /* load_unaligned. */
1064 COSTS_N_INSNS (2), /* store. */
1065 COSTS_N_INSNS (2), /* strd. */
1066 COSTS_N_INSNS (2), /* stm_1st. */
1067 1, /* stm_regs_per_insn_1st. */
1068 2, /* stm_regs_per_insn_subsequent. */
1069 COSTS_N_INSNS (1), /* storef. */
1070 COSTS_N_INSNS (1), /* stored. */
1071 COSTS_N_INSNS (1), /* store_unaligned. */
1072 COSTS_N_INSNS (1), /* loadv. */
1073 COSTS_N_INSNS (1) /* storev. */
1076 /* FP SFmode */
1078 COSTS_N_INSNS (14), /* div. */
1079 COSTS_N_INSNS (4), /* mult. */
1080 COSTS_N_INSNS (7), /* mult_addsub. */
1081 COSTS_N_INSNS (30), /* fma. */
1082 COSTS_N_INSNS (3), /* addsub. */
1083 COSTS_N_INSNS (1), /* fpconst. */
1084 COSTS_N_INSNS (1), /* neg. */
1085 COSTS_N_INSNS (3), /* compare. */
1086 COSTS_N_INSNS (3), /* widen. */
1087 COSTS_N_INSNS (3), /* narrow. */
1088 COSTS_N_INSNS (3), /* toint. */
1089 COSTS_N_INSNS (3), /* fromint. */
1090 COSTS_N_INSNS (3) /* roundint. */
1092 /* FP DFmode */
1094 COSTS_N_INSNS (24), /* div. */
1095 COSTS_N_INSNS (5), /* mult. */
1096 COSTS_N_INSNS (8), /* mult_addsub. */
1097 COSTS_N_INSNS (30), /* fma. */
1098 COSTS_N_INSNS (3), /* addsub. */
1099 COSTS_N_INSNS (1), /* fpconst. */
1100 COSTS_N_INSNS (1), /* neg. */
1101 COSTS_N_INSNS (3), /* compare. */
1102 COSTS_N_INSNS (3), /* widen. */
1103 COSTS_N_INSNS (3), /* narrow. */
1104 COSTS_N_INSNS (3), /* toint. */
1105 COSTS_N_INSNS (3), /* fromint. */
1106 COSTS_N_INSNS (3) /* roundint. */
1109 /* Vector */
1111 COSTS_N_INSNS (1) /* alu. */
1115 const struct cpu_cost_table cortexa8_extra_costs =
1117 /* ALU */
1119 0, /* arith. */
1120 0, /* logical. */
1121 COSTS_N_INSNS (1), /* shift. */
1122 0, /* shift_reg. */
1123 COSTS_N_INSNS (1), /* arith_shift. */
1124 0, /* arith_shift_reg. */
1125 COSTS_N_INSNS (1), /* log_shift. */
1126 0, /* log_shift_reg. */
1127 0, /* extend. */
1128 0, /* extend_arith. */
1129 0, /* bfi. */
1130 0, /* bfx. */
1131 0, /* clz. */
1132 0, /* rev. */
1133 0, /* non_exec. */
1134 true /* non_exec_costs_exec. */
1137 /* MULT SImode */
1139 COSTS_N_INSNS (1), /* simple. */
1140 COSTS_N_INSNS (1), /* flag_setting. */
1141 COSTS_N_INSNS (1), /* extend. */
1142 COSTS_N_INSNS (1), /* add. */
1143 COSTS_N_INSNS (1), /* extend_add. */
1144 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1146 /* MULT DImode */
1148 0, /* simple (N/A). */
1149 0, /* flag_setting (N/A). */
1150 COSTS_N_INSNS (2), /* extend. */
1151 0, /* add (N/A). */
1152 COSTS_N_INSNS (2), /* extend_add. */
1153 0 /* idiv (N/A). */
1156 /* LD/ST */
1158 COSTS_N_INSNS (1), /* load. */
1159 COSTS_N_INSNS (1), /* load_sign_extend. */
1160 COSTS_N_INSNS (1), /* ldrd. */
1161 COSTS_N_INSNS (1), /* ldm_1st. */
1162 1, /* ldm_regs_per_insn_1st. */
1163 2, /* ldm_regs_per_insn_subsequent. */
1164 COSTS_N_INSNS (1), /* loadf. */
1165 COSTS_N_INSNS (1), /* loadd. */
1166 COSTS_N_INSNS (1), /* load_unaligned. */
1167 COSTS_N_INSNS (1), /* store. */
1168 COSTS_N_INSNS (1), /* strd. */
1169 COSTS_N_INSNS (1), /* stm_1st. */
1170 1, /* stm_regs_per_insn_1st. */
1171 2, /* stm_regs_per_insn_subsequent. */
1172 COSTS_N_INSNS (1), /* storef. */
1173 COSTS_N_INSNS (1), /* stored. */
1174 COSTS_N_INSNS (1), /* store_unaligned. */
1175 COSTS_N_INSNS (1), /* loadv. */
1176 COSTS_N_INSNS (1) /* storev. */
1179 /* FP SFmode */
1181 COSTS_N_INSNS (36), /* div. */
1182 COSTS_N_INSNS (11), /* mult. */
1183 COSTS_N_INSNS (20), /* mult_addsub. */
1184 COSTS_N_INSNS (30), /* fma. */
1185 COSTS_N_INSNS (9), /* addsub. */
1186 COSTS_N_INSNS (3), /* fpconst. */
1187 COSTS_N_INSNS (3), /* neg. */
1188 COSTS_N_INSNS (6), /* compare. */
1189 COSTS_N_INSNS (4), /* widen. */
1190 COSTS_N_INSNS (4), /* narrow. */
1191 COSTS_N_INSNS (8), /* toint. */
1192 COSTS_N_INSNS (8), /* fromint. */
1193 COSTS_N_INSNS (8) /* roundint. */
1195 /* FP DFmode */
1197 COSTS_N_INSNS (64), /* div. */
1198 COSTS_N_INSNS (16), /* mult. */
1199 COSTS_N_INSNS (25), /* mult_addsub. */
1200 COSTS_N_INSNS (30), /* fma. */
1201 COSTS_N_INSNS (9), /* addsub. */
1202 COSTS_N_INSNS (3), /* fpconst. */
1203 COSTS_N_INSNS (3), /* neg. */
1204 COSTS_N_INSNS (6), /* compare. */
1205 COSTS_N_INSNS (6), /* widen. */
1206 COSTS_N_INSNS (6), /* narrow. */
1207 COSTS_N_INSNS (8), /* toint. */
1208 COSTS_N_INSNS (8), /* fromint. */
1209 COSTS_N_INSNS (8) /* roundint. */
1212 /* Vector */
1214 COSTS_N_INSNS (1) /* alu. */
1218 const struct cpu_cost_table cortexa5_extra_costs =
1220 /* ALU */
1222 0, /* arith. */
1223 0, /* logical. */
1224 COSTS_N_INSNS (1), /* shift. */
1225 COSTS_N_INSNS (1), /* shift_reg. */
1226 COSTS_N_INSNS (1), /* arith_shift. */
1227 COSTS_N_INSNS (1), /* arith_shift_reg. */
1228 COSTS_N_INSNS (1), /* log_shift. */
1229 COSTS_N_INSNS (1), /* log_shift_reg. */
1230 COSTS_N_INSNS (1), /* extend. */
1231 COSTS_N_INSNS (1), /* extend_arith. */
1232 COSTS_N_INSNS (1), /* bfi. */
1233 COSTS_N_INSNS (1), /* bfx. */
1234 COSTS_N_INSNS (1), /* clz. */
1235 COSTS_N_INSNS (1), /* rev. */
1236 0, /* non_exec. */
1237 true /* non_exec_costs_exec. */
1241 /* MULT SImode */
1243 0, /* simple. */
1244 COSTS_N_INSNS (1), /* flag_setting. */
1245 COSTS_N_INSNS (1), /* extend. */
1246 COSTS_N_INSNS (1), /* add. */
1247 COSTS_N_INSNS (1), /* extend_add. */
1248 COSTS_N_INSNS (7) /* idiv. */
1250 /* MULT DImode */
1252 0, /* simple (N/A). */
1253 0, /* flag_setting (N/A). */
1254 COSTS_N_INSNS (1), /* extend. */
1255 0, /* add. */
1256 COSTS_N_INSNS (2), /* extend_add. */
1257 0 /* idiv (N/A). */
1260 /* LD/ST */
1262 COSTS_N_INSNS (1), /* load. */
1263 COSTS_N_INSNS (1), /* load_sign_extend. */
1264 COSTS_N_INSNS (6), /* ldrd. */
1265 COSTS_N_INSNS (1), /* ldm_1st. */
1266 1, /* ldm_regs_per_insn_1st. */
1267 2, /* ldm_regs_per_insn_subsequent. */
1268 COSTS_N_INSNS (2), /* loadf. */
1269 COSTS_N_INSNS (4), /* loadd. */
1270 COSTS_N_INSNS (1), /* load_unaligned. */
1271 COSTS_N_INSNS (1), /* store. */
1272 COSTS_N_INSNS (3), /* strd. */
1273 COSTS_N_INSNS (1), /* stm_1st. */
1274 1, /* stm_regs_per_insn_1st. */
1275 2, /* stm_regs_per_insn_subsequent. */
1276 COSTS_N_INSNS (2), /* storef. */
1277 COSTS_N_INSNS (2), /* stored. */
1278 COSTS_N_INSNS (1), /* store_unaligned. */
1279 COSTS_N_INSNS (1), /* loadv. */
1280 COSTS_N_INSNS (1) /* storev. */
1283 /* FP SFmode */
1285 COSTS_N_INSNS (15), /* div. */
1286 COSTS_N_INSNS (3), /* mult. */
1287 COSTS_N_INSNS (7), /* mult_addsub. */
1288 COSTS_N_INSNS (7), /* fma. */
1289 COSTS_N_INSNS (3), /* addsub. */
1290 COSTS_N_INSNS (3), /* fpconst. */
1291 COSTS_N_INSNS (3), /* neg. */
1292 COSTS_N_INSNS (3), /* compare. */
1293 COSTS_N_INSNS (3), /* widen. */
1294 COSTS_N_INSNS (3), /* narrow. */
1295 COSTS_N_INSNS (3), /* toint. */
1296 COSTS_N_INSNS (3), /* fromint. */
1297 COSTS_N_INSNS (3) /* roundint. */
1299 /* FP DFmode */
1301 COSTS_N_INSNS (30), /* div. */
1302 COSTS_N_INSNS (6), /* mult. */
1303 COSTS_N_INSNS (10), /* mult_addsub. */
1304 COSTS_N_INSNS (7), /* fma. */
1305 COSTS_N_INSNS (3), /* addsub. */
1306 COSTS_N_INSNS (3), /* fpconst. */
1307 COSTS_N_INSNS (3), /* neg. */
1308 COSTS_N_INSNS (3), /* compare. */
1309 COSTS_N_INSNS (3), /* widen. */
1310 COSTS_N_INSNS (3), /* narrow. */
1311 COSTS_N_INSNS (3), /* toint. */
1312 COSTS_N_INSNS (3), /* fromint. */
1313 COSTS_N_INSNS (3) /* roundint. */
1316 /* Vector */
1318 COSTS_N_INSNS (1) /* alu. */
1323 const struct cpu_cost_table cortexa7_extra_costs =
1325 /* ALU */
1327 0, /* arith. */
1328 0, /* logical. */
1329 COSTS_N_INSNS (1), /* shift. */
1330 COSTS_N_INSNS (1), /* shift_reg. */
1331 COSTS_N_INSNS (1), /* arith_shift. */
1332 COSTS_N_INSNS (1), /* arith_shift_reg. */
1333 COSTS_N_INSNS (1), /* log_shift. */
1334 COSTS_N_INSNS (1), /* log_shift_reg. */
1335 COSTS_N_INSNS (1), /* extend. */
1336 COSTS_N_INSNS (1), /* extend_arith. */
1337 COSTS_N_INSNS (1), /* bfi. */
1338 COSTS_N_INSNS (1), /* bfx. */
1339 COSTS_N_INSNS (1), /* clz. */
1340 COSTS_N_INSNS (1), /* rev. */
1341 0, /* non_exec. */
1342 true /* non_exec_costs_exec. */
1346 /* MULT SImode */
1348 0, /* simple. */
1349 COSTS_N_INSNS (1), /* flag_setting. */
1350 COSTS_N_INSNS (1), /* extend. */
1351 COSTS_N_INSNS (1), /* add. */
1352 COSTS_N_INSNS (1), /* extend_add. */
1353 COSTS_N_INSNS (7) /* idiv. */
1355 /* MULT DImode */
1357 0, /* simple (N/A). */
1358 0, /* flag_setting (N/A). */
1359 COSTS_N_INSNS (1), /* extend. */
1360 0, /* add. */
1361 COSTS_N_INSNS (2), /* extend_add. */
1362 0 /* idiv (N/A). */
1365 /* LD/ST */
1367 COSTS_N_INSNS (1), /* load. */
1368 COSTS_N_INSNS (1), /* load_sign_extend. */
1369 COSTS_N_INSNS (3), /* ldrd. */
1370 COSTS_N_INSNS (1), /* ldm_1st. */
1371 1, /* ldm_regs_per_insn_1st. */
1372 2, /* ldm_regs_per_insn_subsequent. */
1373 COSTS_N_INSNS (2), /* loadf. */
1374 COSTS_N_INSNS (2), /* loadd. */
1375 COSTS_N_INSNS (1), /* load_unaligned. */
1376 COSTS_N_INSNS (1), /* store. */
1377 COSTS_N_INSNS (3), /* strd. */
1378 COSTS_N_INSNS (1), /* stm_1st. */
1379 1, /* stm_regs_per_insn_1st. */
1380 2, /* stm_regs_per_insn_subsequent. */
1381 COSTS_N_INSNS (2), /* storef. */
1382 COSTS_N_INSNS (2), /* stored. */
1383 COSTS_N_INSNS (1), /* store_unaligned. */
1384 COSTS_N_INSNS (1), /* loadv. */
1385 COSTS_N_INSNS (1) /* storev. */
1388 /* FP SFmode */
1390 COSTS_N_INSNS (15), /* div. */
1391 COSTS_N_INSNS (3), /* mult. */
1392 COSTS_N_INSNS (7), /* mult_addsub. */
1393 COSTS_N_INSNS (7), /* fma. */
1394 COSTS_N_INSNS (3), /* addsub. */
1395 COSTS_N_INSNS (3), /* fpconst. */
1396 COSTS_N_INSNS (3), /* neg. */
1397 COSTS_N_INSNS (3), /* compare. */
1398 COSTS_N_INSNS (3), /* widen. */
1399 COSTS_N_INSNS (3), /* narrow. */
1400 COSTS_N_INSNS (3), /* toint. */
1401 COSTS_N_INSNS (3), /* fromint. */
1402 COSTS_N_INSNS (3) /* roundint. */
1404 /* FP DFmode */
1406 COSTS_N_INSNS (30), /* div. */
1407 COSTS_N_INSNS (6), /* mult. */
1408 COSTS_N_INSNS (10), /* mult_addsub. */
1409 COSTS_N_INSNS (7), /* fma. */
1410 COSTS_N_INSNS (3), /* addsub. */
1411 COSTS_N_INSNS (3), /* fpconst. */
1412 COSTS_N_INSNS (3), /* neg. */
1413 COSTS_N_INSNS (3), /* compare. */
1414 COSTS_N_INSNS (3), /* widen. */
1415 COSTS_N_INSNS (3), /* narrow. */
1416 COSTS_N_INSNS (3), /* toint. */
1417 COSTS_N_INSNS (3), /* fromint. */
1418 COSTS_N_INSNS (3) /* roundint. */
1421 /* Vector */
1423 COSTS_N_INSNS (1) /* alu. */
1427 const struct cpu_cost_table cortexa12_extra_costs =
1429 /* ALU */
1431 0, /* arith. */
1432 0, /* logical. */
1433 0, /* shift. */
1434 COSTS_N_INSNS (1), /* shift_reg. */
1435 COSTS_N_INSNS (1), /* arith_shift. */
1436 COSTS_N_INSNS (1), /* arith_shift_reg. */
1437 COSTS_N_INSNS (1), /* log_shift. */
1438 COSTS_N_INSNS (1), /* log_shift_reg. */
1439 0, /* extend. */
1440 COSTS_N_INSNS (1), /* extend_arith. */
1441 0, /* bfi. */
1442 COSTS_N_INSNS (1), /* bfx. */
1443 COSTS_N_INSNS (1), /* clz. */
1444 COSTS_N_INSNS (1), /* rev. */
1445 0, /* non_exec. */
1446 true /* non_exec_costs_exec. */
1448 /* MULT SImode */
1451 COSTS_N_INSNS (2), /* simple. */
1452 COSTS_N_INSNS (3), /* flag_setting. */
1453 COSTS_N_INSNS (2), /* extend. */
1454 COSTS_N_INSNS (3), /* add. */
1455 COSTS_N_INSNS (2), /* extend_add. */
1456 COSTS_N_INSNS (18) /* idiv. */
1458 /* MULT DImode */
1460 0, /* simple (N/A). */
1461 0, /* flag_setting (N/A). */
1462 COSTS_N_INSNS (3), /* extend. */
1463 0, /* add (N/A). */
1464 COSTS_N_INSNS (3), /* extend_add. */
1465 0 /* idiv (N/A). */
1468 /* LD/ST */
1470 COSTS_N_INSNS (3), /* load. */
1471 COSTS_N_INSNS (3), /* load_sign_extend. */
1472 COSTS_N_INSNS (3), /* ldrd. */
1473 COSTS_N_INSNS (3), /* ldm_1st. */
1474 1, /* ldm_regs_per_insn_1st. */
1475 2, /* ldm_regs_per_insn_subsequent. */
1476 COSTS_N_INSNS (3), /* loadf. */
1477 COSTS_N_INSNS (3), /* loadd. */
1478 0, /* load_unaligned. */
1479 0, /* store. */
1480 0, /* strd. */
1481 0, /* stm_1st. */
1482 1, /* stm_regs_per_insn_1st. */
1483 2, /* stm_regs_per_insn_subsequent. */
1484 COSTS_N_INSNS (2), /* storef. */
1485 COSTS_N_INSNS (2), /* stored. */
1486 0, /* store_unaligned. */
1487 COSTS_N_INSNS (1), /* loadv. */
1488 COSTS_N_INSNS (1) /* storev. */
1491 /* FP SFmode */
1493 COSTS_N_INSNS (17), /* div. */
1494 COSTS_N_INSNS (4), /* mult. */
1495 COSTS_N_INSNS (8), /* mult_addsub. */
1496 COSTS_N_INSNS (8), /* fma. */
1497 COSTS_N_INSNS (4), /* addsub. */
1498 COSTS_N_INSNS (2), /* fpconst. */
1499 COSTS_N_INSNS (2), /* neg. */
1500 COSTS_N_INSNS (2), /* compare. */
1501 COSTS_N_INSNS (4), /* widen. */
1502 COSTS_N_INSNS (4), /* narrow. */
1503 COSTS_N_INSNS (4), /* toint. */
1504 COSTS_N_INSNS (4), /* fromint. */
1505 COSTS_N_INSNS (4) /* roundint. */
1507 /* FP DFmode */
1509 COSTS_N_INSNS (31), /* div. */
1510 COSTS_N_INSNS (4), /* mult. */
1511 COSTS_N_INSNS (8), /* mult_addsub. */
1512 COSTS_N_INSNS (8), /* fma. */
1513 COSTS_N_INSNS (4), /* addsub. */
1514 COSTS_N_INSNS (2), /* fpconst. */
1515 COSTS_N_INSNS (2), /* neg. */
1516 COSTS_N_INSNS (2), /* compare. */
1517 COSTS_N_INSNS (4), /* widen. */
1518 COSTS_N_INSNS (4), /* narrow. */
1519 COSTS_N_INSNS (4), /* toint. */
1520 COSTS_N_INSNS (4), /* fromint. */
1521 COSTS_N_INSNS (4) /* roundint. */
1524 /* Vector */
1526 COSTS_N_INSNS (1) /* alu. */
1530 const struct cpu_cost_table cortexa15_extra_costs =
1532 /* ALU */
1534 0, /* arith. */
1535 0, /* logical. */
1536 0, /* shift. */
1537 0, /* shift_reg. */
1538 COSTS_N_INSNS (1), /* arith_shift. */
1539 COSTS_N_INSNS (1), /* arith_shift_reg. */
1540 COSTS_N_INSNS (1), /* log_shift. */
1541 COSTS_N_INSNS (1), /* log_shift_reg. */
1542 0, /* extend. */
1543 COSTS_N_INSNS (1), /* extend_arith. */
1544 COSTS_N_INSNS (1), /* bfi. */
1545 0, /* bfx. */
1546 0, /* clz. */
1547 0, /* rev. */
1548 0, /* non_exec. */
1549 true /* non_exec_costs_exec. */
1551 /* MULT SImode */
1554 COSTS_N_INSNS (2), /* simple. */
1555 COSTS_N_INSNS (3), /* flag_setting. */
1556 COSTS_N_INSNS (2), /* extend. */
1557 COSTS_N_INSNS (2), /* add. */
1558 COSTS_N_INSNS (2), /* extend_add. */
1559 COSTS_N_INSNS (18) /* idiv. */
1561 /* MULT DImode */
1563 0, /* simple (N/A). */
1564 0, /* flag_setting (N/A). */
1565 COSTS_N_INSNS (3), /* extend. */
1566 0, /* add (N/A). */
1567 COSTS_N_INSNS (3), /* extend_add. */
1568 0 /* idiv (N/A). */
1571 /* LD/ST */
1573 COSTS_N_INSNS (3), /* load. */
1574 COSTS_N_INSNS (3), /* load_sign_extend. */
1575 COSTS_N_INSNS (3), /* ldrd. */
1576 COSTS_N_INSNS (4), /* ldm_1st. */
1577 1, /* ldm_regs_per_insn_1st. */
1578 2, /* ldm_regs_per_insn_subsequent. */
1579 COSTS_N_INSNS (4), /* loadf. */
1580 COSTS_N_INSNS (4), /* loadd. */
1581 0, /* load_unaligned. */
1582 0, /* store. */
1583 0, /* strd. */
1584 COSTS_N_INSNS (1), /* stm_1st. */
1585 1, /* stm_regs_per_insn_1st. */
1586 2, /* stm_regs_per_insn_subsequent. */
1587 0, /* storef. */
1588 0, /* stored. */
1589 0, /* store_unaligned. */
1590 COSTS_N_INSNS (1), /* loadv. */
1591 COSTS_N_INSNS (1) /* storev. */
1594 /* FP SFmode */
1596 COSTS_N_INSNS (17), /* div. */
1597 COSTS_N_INSNS (4), /* mult. */
1598 COSTS_N_INSNS (8), /* mult_addsub. */
1599 COSTS_N_INSNS (8), /* fma. */
1600 COSTS_N_INSNS (4), /* addsub. */
1601 COSTS_N_INSNS (2), /* fpconst. */
1602 COSTS_N_INSNS (2), /* neg. */
1603 COSTS_N_INSNS (5), /* compare. */
1604 COSTS_N_INSNS (4), /* widen. */
1605 COSTS_N_INSNS (4), /* narrow. */
1606 COSTS_N_INSNS (4), /* toint. */
1607 COSTS_N_INSNS (4), /* fromint. */
1608 COSTS_N_INSNS (4) /* roundint. */
1610 /* FP DFmode */
1612 COSTS_N_INSNS (31), /* div. */
1613 COSTS_N_INSNS (4), /* mult. */
1614 COSTS_N_INSNS (8), /* mult_addsub. */
1615 COSTS_N_INSNS (8), /* fma. */
1616 COSTS_N_INSNS (4), /* addsub. */
1617 COSTS_N_INSNS (2), /* fpconst. */
1618 COSTS_N_INSNS (2), /* neg. */
1619 COSTS_N_INSNS (2), /* compare. */
1620 COSTS_N_INSNS (4), /* widen. */
1621 COSTS_N_INSNS (4), /* narrow. */
1622 COSTS_N_INSNS (4), /* toint. */
1623 COSTS_N_INSNS (4), /* fromint. */
1624 COSTS_N_INSNS (4) /* roundint. */
1627 /* Vector */
1629 COSTS_N_INSNS (1) /* alu. */
1633 const struct cpu_cost_table v7m_extra_costs =
1635 /* ALU */
1637 0, /* arith. */
1638 0, /* logical. */
1639 0, /* shift. */
1640 0, /* shift_reg. */
1641 0, /* arith_shift. */
1642 COSTS_N_INSNS (1), /* arith_shift_reg. */
1643 0, /* log_shift. */
1644 COSTS_N_INSNS (1), /* log_shift_reg. */
1645 0, /* extend. */
1646 COSTS_N_INSNS (1), /* extend_arith. */
1647 0, /* bfi. */
1648 0, /* bfx. */
1649 0, /* clz. */
1650 0, /* rev. */
1651 COSTS_N_INSNS (1), /* non_exec. */
1652 false /* non_exec_costs_exec. */
1655 /* MULT SImode */
1657 COSTS_N_INSNS (1), /* simple. */
1658 COSTS_N_INSNS (1), /* flag_setting. */
1659 COSTS_N_INSNS (2), /* extend. */
1660 COSTS_N_INSNS (1), /* add. */
1661 COSTS_N_INSNS (3), /* extend_add. */
1662 COSTS_N_INSNS (8) /* idiv. */
1664 /* MULT DImode */
1666 0, /* simple (N/A). */
1667 0, /* flag_setting (N/A). */
1668 COSTS_N_INSNS (2), /* extend. */
1669 0, /* add (N/A). */
1670 COSTS_N_INSNS (3), /* extend_add. */
1671 0 /* idiv (N/A). */
1674 /* LD/ST */
1676 COSTS_N_INSNS (2), /* load. */
1677 0, /* load_sign_extend. */
1678 COSTS_N_INSNS (3), /* ldrd. */
1679 COSTS_N_INSNS (2), /* ldm_1st. */
1680 1, /* ldm_regs_per_insn_1st. */
1681 1, /* ldm_regs_per_insn_subsequent. */
1682 COSTS_N_INSNS (2), /* loadf. */
1683 COSTS_N_INSNS (3), /* loadd. */
1684 COSTS_N_INSNS (1), /* load_unaligned. */
1685 COSTS_N_INSNS (2), /* store. */
1686 COSTS_N_INSNS (3), /* strd. */
1687 COSTS_N_INSNS (2), /* stm_1st. */
1688 1, /* stm_regs_per_insn_1st. */
1689 1, /* stm_regs_per_insn_subsequent. */
1690 COSTS_N_INSNS (2), /* storef. */
1691 COSTS_N_INSNS (3), /* stored. */
1692 COSTS_N_INSNS (1), /* store_unaligned. */
1693 COSTS_N_INSNS (1), /* loadv. */
1694 COSTS_N_INSNS (1) /* storev. */
1697 /* FP SFmode */
1699 COSTS_N_INSNS (7), /* div. */
1700 COSTS_N_INSNS (2), /* mult. */
1701 COSTS_N_INSNS (5), /* mult_addsub. */
1702 COSTS_N_INSNS (3), /* fma. */
1703 COSTS_N_INSNS (1), /* addsub. */
1704 0, /* fpconst. */
1705 0, /* neg. */
1706 0, /* compare. */
1707 0, /* widen. */
1708 0, /* narrow. */
1709 0, /* toint. */
1710 0, /* fromint. */
1711 0 /* roundint. */
1713 /* FP DFmode */
1715 COSTS_N_INSNS (15), /* div. */
1716 COSTS_N_INSNS (5), /* mult. */
1717 COSTS_N_INSNS (7), /* mult_addsub. */
1718 COSTS_N_INSNS (7), /* fma. */
1719 COSTS_N_INSNS (3), /* addsub. */
1720 0, /* fpconst. */
1721 0, /* neg. */
1722 0, /* compare. */
1723 0, /* widen. */
1724 0, /* narrow. */
1725 0, /* toint. */
1726 0, /* fromint. */
1727 0 /* roundint. */
1730 /* Vector */
1732 COSTS_N_INSNS (1) /* alu. */
1736 const struct tune_params arm_slowmul_tune =
1738 &generic_extra_costs, /* Insn extra costs. */
1739 NULL, /* Sched adj cost. */
1740 arm_default_branch_cost,
1741 &arm_default_vec_cost,
1742 3, /* Constant limit. */
1743 5, /* Max cond insns. */
1744 8, /* Memset max inline. */
1745 1, /* Issue rate. */
1746 ARM_PREFETCH_NOT_BENEFICIAL,
1747 tune_params::PREF_CONST_POOL_TRUE,
1748 tune_params::PREF_LDRD_FALSE,
1749 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1750 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1751 tune_params::DISPARAGE_FLAGS_NEITHER,
1752 tune_params::PREF_NEON_64_FALSE,
1753 tune_params::PREF_NEON_STRINGOPS_FALSE,
1754 tune_params::FUSE_NOTHING,
1755 tune_params::SCHED_AUTOPREF_OFF
1758 const struct tune_params arm_fastmul_tune =
1760 &generic_extra_costs, /* Insn extra costs. */
1761 NULL, /* Sched adj cost. */
1762 arm_default_branch_cost,
1763 &arm_default_vec_cost,
1764 1, /* Constant limit. */
1765 5, /* Max cond insns. */
1766 8, /* Memset max inline. */
1767 1, /* Issue rate. */
1768 ARM_PREFETCH_NOT_BENEFICIAL,
1769 tune_params::PREF_CONST_POOL_TRUE,
1770 tune_params::PREF_LDRD_FALSE,
1771 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1772 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1773 tune_params::DISPARAGE_FLAGS_NEITHER,
1774 tune_params::PREF_NEON_64_FALSE,
1775 tune_params::PREF_NEON_STRINGOPS_FALSE,
1776 tune_params::FUSE_NOTHING,
1777 tune_params::SCHED_AUTOPREF_OFF
1780 /* StrongARM has early execution of branches, so a sequence that is worth
1781 skipping is shorter. Set max_insns_skipped to a lower value. */
1783 const struct tune_params arm_strongarm_tune =
1785 &generic_extra_costs, /* Insn extra costs. */
1786 NULL, /* Sched adj cost. */
1787 arm_default_branch_cost,
1788 &arm_default_vec_cost,
1789 1, /* Constant limit. */
1790 3, /* Max cond insns. */
1791 8, /* Memset max inline. */
1792 1, /* Issue rate. */
1793 ARM_PREFETCH_NOT_BENEFICIAL,
1794 tune_params::PREF_CONST_POOL_TRUE,
1795 tune_params::PREF_LDRD_FALSE,
1796 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1797 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1798 tune_params::DISPARAGE_FLAGS_NEITHER,
1799 tune_params::PREF_NEON_64_FALSE,
1800 tune_params::PREF_NEON_STRINGOPS_FALSE,
1801 tune_params::FUSE_NOTHING,
1802 tune_params::SCHED_AUTOPREF_OFF
1805 const struct tune_params arm_xscale_tune =
1807 &generic_extra_costs, /* Insn extra costs. */
1808 xscale_sched_adjust_cost,
1809 arm_default_branch_cost,
1810 &arm_default_vec_cost,
1811 2, /* Constant limit. */
1812 3, /* Max cond insns. */
1813 8, /* Memset max inline. */
1814 1, /* Issue rate. */
1815 ARM_PREFETCH_NOT_BENEFICIAL,
1816 tune_params::PREF_CONST_POOL_TRUE,
1817 tune_params::PREF_LDRD_FALSE,
1818 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1819 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1820 tune_params::DISPARAGE_FLAGS_NEITHER,
1821 tune_params::PREF_NEON_64_FALSE,
1822 tune_params::PREF_NEON_STRINGOPS_FALSE,
1823 tune_params::FUSE_NOTHING,
1824 tune_params::SCHED_AUTOPREF_OFF
1827 const struct tune_params arm_9e_tune =
1829 &generic_extra_costs, /* Insn extra costs. */
1830 NULL, /* Sched adj cost. */
1831 arm_default_branch_cost,
1832 &arm_default_vec_cost,
1833 1, /* Constant limit. */
1834 5, /* Max cond insns. */
1835 8, /* Memset max inline. */
1836 1, /* Issue rate. */
1837 ARM_PREFETCH_NOT_BENEFICIAL,
1838 tune_params::PREF_CONST_POOL_TRUE,
1839 tune_params::PREF_LDRD_FALSE,
1840 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1841 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1842 tune_params::DISPARAGE_FLAGS_NEITHER,
1843 tune_params::PREF_NEON_64_FALSE,
1844 tune_params::PREF_NEON_STRINGOPS_FALSE,
1845 tune_params::FUSE_NOTHING,
1846 tune_params::SCHED_AUTOPREF_OFF
1849 const struct tune_params arm_marvell_pj4_tune =
1851 &generic_extra_costs, /* Insn extra costs. */
1852 NULL, /* Sched adj cost. */
1853 arm_default_branch_cost,
1854 &arm_default_vec_cost,
1855 1, /* Constant limit. */
1856 5, /* Max cond insns. */
1857 8, /* Memset max inline. */
1858 2, /* Issue rate. */
1859 ARM_PREFETCH_NOT_BENEFICIAL,
1860 tune_params::PREF_CONST_POOL_TRUE,
1861 tune_params::PREF_LDRD_FALSE,
1862 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1863 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1864 tune_params::DISPARAGE_FLAGS_NEITHER,
1865 tune_params::PREF_NEON_64_FALSE,
1866 tune_params::PREF_NEON_STRINGOPS_FALSE,
1867 tune_params::FUSE_NOTHING,
1868 tune_params::SCHED_AUTOPREF_OFF
1871 const struct tune_params arm_v6t2_tune =
1873 &generic_extra_costs, /* Insn extra costs. */
1874 NULL, /* Sched adj cost. */
1875 arm_default_branch_cost,
1876 &arm_default_vec_cost,
1877 1, /* Constant limit. */
1878 5, /* Max cond insns. */
1879 8, /* Memset max inline. */
1880 1, /* Issue rate. */
1881 ARM_PREFETCH_NOT_BENEFICIAL,
1882 tune_params::PREF_CONST_POOL_FALSE,
1883 tune_params::PREF_LDRD_FALSE,
1884 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1885 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1886 tune_params::DISPARAGE_FLAGS_NEITHER,
1887 tune_params::PREF_NEON_64_FALSE,
1888 tune_params::PREF_NEON_STRINGOPS_FALSE,
1889 tune_params::FUSE_NOTHING,
1890 tune_params::SCHED_AUTOPREF_OFF
1894 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1895 const struct tune_params arm_cortex_tune =
1897 &generic_extra_costs,
1898 NULL, /* Sched adj cost. */
1899 arm_default_branch_cost,
1900 &arm_default_vec_cost,
1901 1, /* Constant limit. */
1902 5, /* Max cond insns. */
1903 8, /* Memset max inline. */
1904 2, /* Issue rate. */
1905 ARM_PREFETCH_NOT_BENEFICIAL,
1906 tune_params::PREF_CONST_POOL_FALSE,
1907 tune_params::PREF_LDRD_FALSE,
1908 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1909 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1910 tune_params::DISPARAGE_FLAGS_NEITHER,
1911 tune_params::PREF_NEON_64_FALSE,
1912 tune_params::PREF_NEON_STRINGOPS_FALSE,
1913 tune_params::FUSE_NOTHING,
1914 tune_params::SCHED_AUTOPREF_OFF
1917 const struct tune_params arm_cortex_a8_tune =
1919 &cortexa8_extra_costs,
1920 NULL, /* Sched adj cost. */
1921 arm_default_branch_cost,
1922 &arm_default_vec_cost,
1923 1, /* Constant limit. */
1924 5, /* Max cond insns. */
1925 8, /* Memset max inline. */
1926 2, /* Issue rate. */
1927 ARM_PREFETCH_NOT_BENEFICIAL,
1928 tune_params::PREF_CONST_POOL_FALSE,
1929 tune_params::PREF_LDRD_FALSE,
1930 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1931 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1932 tune_params::DISPARAGE_FLAGS_NEITHER,
1933 tune_params::PREF_NEON_64_FALSE,
1934 tune_params::PREF_NEON_STRINGOPS_TRUE,
1935 tune_params::FUSE_NOTHING,
1936 tune_params::SCHED_AUTOPREF_OFF
1939 const struct tune_params arm_cortex_a7_tune =
1941 &cortexa7_extra_costs,
1942 NULL, /* Sched adj cost. */
1943 arm_default_branch_cost,
1944 &arm_default_vec_cost,
1945 1, /* Constant limit. */
1946 5, /* Max cond insns. */
1947 8, /* Memset max inline. */
1948 2, /* Issue rate. */
1949 ARM_PREFETCH_NOT_BENEFICIAL,
1950 tune_params::PREF_CONST_POOL_FALSE,
1951 tune_params::PREF_LDRD_FALSE,
1952 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1953 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1954 tune_params::DISPARAGE_FLAGS_NEITHER,
1955 tune_params::PREF_NEON_64_FALSE,
1956 tune_params::PREF_NEON_STRINGOPS_TRUE,
1957 tune_params::FUSE_NOTHING,
1958 tune_params::SCHED_AUTOPREF_OFF
1961 const struct tune_params arm_cortex_a15_tune =
1963 &cortexa15_extra_costs,
1964 NULL, /* Sched adj cost. */
1965 arm_default_branch_cost,
1966 &arm_default_vec_cost,
1967 1, /* Constant limit. */
1968 2, /* Max cond insns. */
1969 8, /* Memset max inline. */
1970 3, /* Issue rate. */
1971 ARM_PREFETCH_NOT_BENEFICIAL,
1972 tune_params::PREF_CONST_POOL_FALSE,
1973 tune_params::PREF_LDRD_TRUE,
1974 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1975 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1976 tune_params::DISPARAGE_FLAGS_ALL,
1977 tune_params::PREF_NEON_64_FALSE,
1978 tune_params::PREF_NEON_STRINGOPS_TRUE,
1979 tune_params::FUSE_NOTHING,
1980 tune_params::SCHED_AUTOPREF_FULL
1983 const struct tune_params arm_cortex_a35_tune =
1985 &cortexa53_extra_costs,
1986 NULL, /* Sched adj cost. */
1987 arm_default_branch_cost,
1988 &arm_default_vec_cost,
1989 1, /* Constant limit. */
1990 5, /* Max cond insns. */
1991 8, /* Memset max inline. */
1992 1, /* Issue rate. */
1993 ARM_PREFETCH_NOT_BENEFICIAL,
1994 tune_params::PREF_CONST_POOL_FALSE,
1995 tune_params::PREF_LDRD_FALSE,
1996 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1997 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1998 tune_params::DISPARAGE_FLAGS_NEITHER,
1999 tune_params::PREF_NEON_64_FALSE,
2000 tune_params::PREF_NEON_STRINGOPS_TRUE,
2001 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2002 tune_params::SCHED_AUTOPREF_OFF
2005 const struct tune_params arm_cortex_a53_tune =
2007 &cortexa53_extra_costs,
2008 NULL, /* Sched adj cost. */
2009 arm_default_branch_cost,
2010 &arm_default_vec_cost,
2011 1, /* Constant limit. */
2012 5, /* Max cond insns. */
2013 8, /* Memset max inline. */
2014 2, /* Issue rate. */
2015 ARM_PREFETCH_NOT_BENEFICIAL,
2016 tune_params::PREF_CONST_POOL_FALSE,
2017 tune_params::PREF_LDRD_FALSE,
2018 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2019 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2020 tune_params::DISPARAGE_FLAGS_NEITHER,
2021 tune_params::PREF_NEON_64_FALSE,
2022 tune_params::PREF_NEON_STRINGOPS_TRUE,
2023 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2024 tune_params::SCHED_AUTOPREF_OFF
2027 const struct tune_params arm_cortex_a57_tune =
2029 &cortexa57_extra_costs,
2030 NULL, /* Sched adj cost. */
2031 arm_default_branch_cost,
2032 &arm_default_vec_cost,
2033 1, /* Constant limit. */
2034 2, /* Max cond insns. */
2035 8, /* Memset max inline. */
2036 3, /* Issue rate. */
2037 ARM_PREFETCH_NOT_BENEFICIAL,
2038 tune_params::PREF_CONST_POOL_FALSE,
2039 tune_params::PREF_LDRD_TRUE,
2040 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2041 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2042 tune_params::DISPARAGE_FLAGS_ALL,
2043 tune_params::PREF_NEON_64_FALSE,
2044 tune_params::PREF_NEON_STRINGOPS_TRUE,
2045 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2046 tune_params::SCHED_AUTOPREF_FULL
2049 const struct tune_params arm_exynosm1_tune =
2051 &exynosm1_extra_costs,
2052 NULL, /* Sched adj cost. */
2053 arm_default_branch_cost,
2054 &arm_default_vec_cost,
2055 1, /* Constant limit. */
2056 2, /* Max cond insns. */
2057 8, /* Memset max inline. */
2058 3, /* Issue rate. */
2059 ARM_PREFETCH_NOT_BENEFICIAL,
2060 tune_params::PREF_CONST_POOL_FALSE,
2061 tune_params::PREF_LDRD_TRUE,
2062 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2063 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2064 tune_params::DISPARAGE_FLAGS_ALL,
2065 tune_params::PREF_NEON_64_FALSE,
2066 tune_params::PREF_NEON_STRINGOPS_TRUE,
2067 tune_params::FUSE_NOTHING,
2068 tune_params::SCHED_AUTOPREF_OFF
2071 const struct tune_params arm_xgene1_tune =
2073 &xgene1_extra_costs,
2074 NULL, /* Sched adj cost. */
2075 arm_default_branch_cost,
2076 &arm_default_vec_cost,
2077 1, /* Constant limit. */
2078 2, /* Max cond insns. */
2079 32, /* Memset max inline. */
2080 4, /* Issue rate. */
2081 ARM_PREFETCH_NOT_BENEFICIAL,
2082 tune_params::PREF_CONST_POOL_FALSE,
2083 tune_params::PREF_LDRD_TRUE,
2084 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2085 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2086 tune_params::DISPARAGE_FLAGS_ALL,
2087 tune_params::PREF_NEON_64_FALSE,
2088 tune_params::PREF_NEON_STRINGOPS_FALSE,
2089 tune_params::FUSE_NOTHING,
2090 tune_params::SCHED_AUTOPREF_OFF
2093 const struct tune_params arm_qdf24xx_tune =
2095 &qdf24xx_extra_costs,
2096 NULL, /* Scheduler cost adjustment. */
2097 arm_default_branch_cost,
2098 &arm_default_vec_cost, /* Vectorizer costs. */
2099 1, /* Constant limit. */
2100 2, /* Max cond insns. */
2101 8, /* Memset max inline. */
2102 4, /* Issue rate. */
2103 ARM_PREFETCH_BENEFICIAL (0, -1, 64),
2104 tune_params::PREF_CONST_POOL_FALSE,
2105 tune_params::PREF_LDRD_TRUE,
2106 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2107 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2108 tune_params::DISPARAGE_FLAGS_ALL,
2109 tune_params::PREF_NEON_64_FALSE,
2110 tune_params::PREF_NEON_STRINGOPS_TRUE,
2111 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2112 tune_params::SCHED_AUTOPREF_FULL
2115 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2116 less appealing. Set max_insns_skipped to a low value. */
2118 const struct tune_params arm_cortex_a5_tune =
2120 &cortexa5_extra_costs,
2121 NULL, /* Sched adj cost. */
2122 arm_cortex_a5_branch_cost,
2123 &arm_default_vec_cost,
2124 1, /* Constant limit. */
2125 1, /* Max cond insns. */
2126 8, /* Memset max inline. */
2127 2, /* Issue rate. */
2128 ARM_PREFETCH_NOT_BENEFICIAL,
2129 tune_params::PREF_CONST_POOL_FALSE,
2130 tune_params::PREF_LDRD_FALSE,
2131 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2132 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2133 tune_params::DISPARAGE_FLAGS_NEITHER,
2134 tune_params::PREF_NEON_64_FALSE,
2135 tune_params::PREF_NEON_STRINGOPS_TRUE,
2136 tune_params::FUSE_NOTHING,
2137 tune_params::SCHED_AUTOPREF_OFF
2140 const struct tune_params arm_cortex_a9_tune =
2142 &cortexa9_extra_costs,
2143 cortex_a9_sched_adjust_cost,
2144 arm_default_branch_cost,
2145 &arm_default_vec_cost,
2146 1, /* Constant limit. */
2147 5, /* Max cond insns. */
2148 8, /* Memset max inline. */
2149 2, /* Issue rate. */
2150 ARM_PREFETCH_BENEFICIAL(4,32,32),
2151 tune_params::PREF_CONST_POOL_FALSE,
2152 tune_params::PREF_LDRD_FALSE,
2153 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2154 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2155 tune_params::DISPARAGE_FLAGS_NEITHER,
2156 tune_params::PREF_NEON_64_FALSE,
2157 tune_params::PREF_NEON_STRINGOPS_FALSE,
2158 tune_params::FUSE_NOTHING,
2159 tune_params::SCHED_AUTOPREF_OFF
2162 const struct tune_params arm_cortex_a12_tune =
2164 &cortexa12_extra_costs,
2165 NULL, /* Sched adj cost. */
2166 arm_default_branch_cost,
2167 &arm_default_vec_cost, /* Vectorizer costs. */
2168 1, /* Constant limit. */
2169 2, /* Max cond insns. */
2170 8, /* Memset max inline. */
2171 2, /* Issue rate. */
2172 ARM_PREFETCH_NOT_BENEFICIAL,
2173 tune_params::PREF_CONST_POOL_FALSE,
2174 tune_params::PREF_LDRD_TRUE,
2175 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2176 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2177 tune_params::DISPARAGE_FLAGS_ALL,
2178 tune_params::PREF_NEON_64_FALSE,
2179 tune_params::PREF_NEON_STRINGOPS_TRUE,
2180 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2181 tune_params::SCHED_AUTOPREF_OFF
2184 const struct tune_params arm_cortex_a73_tune =
2186 &cortexa57_extra_costs,
2187 NULL, /* Sched adj cost. */
2188 arm_default_branch_cost,
2189 &arm_default_vec_cost, /* Vectorizer costs. */
2190 1, /* Constant limit. */
2191 2, /* Max cond insns. */
2192 8, /* Memset max inline. */
2193 2, /* Issue rate. */
2194 ARM_PREFETCH_NOT_BENEFICIAL,
2195 tune_params::PREF_CONST_POOL_FALSE,
2196 tune_params::PREF_LDRD_TRUE,
2197 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2198 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2199 tune_params::DISPARAGE_FLAGS_ALL,
2200 tune_params::PREF_NEON_64_FALSE,
2201 tune_params::PREF_NEON_STRINGOPS_TRUE,
2202 FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2203 tune_params::SCHED_AUTOPREF_FULL
2206 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2207 cycle to execute each. An LDR from the constant pool also takes two cycles
2208 to execute, but mildly increases pipelining opportunity (consecutive
2209 loads/stores can be pipelined together, saving one cycle), and may also
2210 improve icache utilisation. Hence we prefer the constant pool for such
2211 processors. */
2213 const struct tune_params arm_v7m_tune =
2215 &v7m_extra_costs,
2216 NULL, /* Sched adj cost. */
2217 arm_cortex_m_branch_cost,
2218 &arm_default_vec_cost,
2219 1, /* Constant limit. */
2220 2, /* Max cond insns. */
2221 8, /* Memset max inline. */
2222 1, /* Issue rate. */
2223 ARM_PREFETCH_NOT_BENEFICIAL,
2224 tune_params::PREF_CONST_POOL_TRUE,
2225 tune_params::PREF_LDRD_FALSE,
2226 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2227 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2228 tune_params::DISPARAGE_FLAGS_NEITHER,
2229 tune_params::PREF_NEON_64_FALSE,
2230 tune_params::PREF_NEON_STRINGOPS_FALSE,
2231 tune_params::FUSE_NOTHING,
2232 tune_params::SCHED_AUTOPREF_OFF
2235 /* Cortex-M7 tuning. */
2237 const struct tune_params arm_cortex_m7_tune =
2239 &v7m_extra_costs,
2240 NULL, /* Sched adj cost. */
2241 arm_cortex_m7_branch_cost,
2242 &arm_default_vec_cost,
2243 0, /* Constant limit. */
2244 1, /* Max cond insns. */
2245 8, /* Memset max inline. */
2246 2, /* Issue rate. */
2247 ARM_PREFETCH_NOT_BENEFICIAL,
2248 tune_params::PREF_CONST_POOL_TRUE,
2249 tune_params::PREF_LDRD_FALSE,
2250 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2251 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2252 tune_params::DISPARAGE_FLAGS_NEITHER,
2253 tune_params::PREF_NEON_64_FALSE,
2254 tune_params::PREF_NEON_STRINGOPS_FALSE,
2255 tune_params::FUSE_NOTHING,
2256 tune_params::SCHED_AUTOPREF_OFF
2259 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2260 arm_v6t2_tune. It is used for cortex-m0, cortex-m1, cortex-m0plus and
2261 cortex-m23. */
2262 const struct tune_params arm_v6m_tune =
2264 &generic_extra_costs, /* Insn extra costs. */
2265 NULL, /* Sched adj cost. */
2266 arm_default_branch_cost,
2267 &arm_default_vec_cost, /* Vectorizer costs. */
2268 1, /* Constant limit. */
2269 5, /* Max cond insns. */
2270 8, /* Memset max inline. */
2271 1, /* Issue rate. */
2272 ARM_PREFETCH_NOT_BENEFICIAL,
2273 tune_params::PREF_CONST_POOL_FALSE,
2274 tune_params::PREF_LDRD_FALSE,
2275 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2276 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2277 tune_params::DISPARAGE_FLAGS_NEITHER,
2278 tune_params::PREF_NEON_64_FALSE,
2279 tune_params::PREF_NEON_STRINGOPS_FALSE,
2280 tune_params::FUSE_NOTHING,
2281 tune_params::SCHED_AUTOPREF_OFF
2284 const struct tune_params arm_fa726te_tune =
2286 &generic_extra_costs, /* Insn extra costs. */
2287 fa726te_sched_adjust_cost,
2288 arm_default_branch_cost,
2289 &arm_default_vec_cost,
2290 1, /* Constant limit. */
2291 5, /* Max cond insns. */
2292 8, /* Memset max inline. */
2293 2, /* Issue rate. */
2294 ARM_PREFETCH_NOT_BENEFICIAL,
2295 tune_params::PREF_CONST_POOL_TRUE,
2296 tune_params::PREF_LDRD_FALSE,
2297 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2298 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2299 tune_params::DISPARAGE_FLAGS_NEITHER,
2300 tune_params::PREF_NEON_64_FALSE,
2301 tune_params::PREF_NEON_STRINGOPS_FALSE,
2302 tune_params::FUSE_NOTHING,
2303 tune_params::SCHED_AUTOPREF_OFF
2306 /* Auto-generated CPU, FPU and architecture tables. */
2307 #include "arm-cpu-data.h"
2309 /* The name of the preprocessor macro to define for this architecture. PROFILE
2310 is replaced by the architecture name (eg. 8A) in arm_option_override () and
2311 is thus chosen to be big enough to hold the longest architecture name. */
2313 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2315 /* Supported TLS relocations. */
2317 enum tls_reloc {
2318 TLS_GD32,
2319 TLS_LDM32,
2320 TLS_LDO32,
2321 TLS_IE32,
2322 TLS_LE32,
2323 TLS_DESCSEQ /* GNU scheme */
2326 /* The maximum number of insns to be used when loading a constant. */
2327 inline static int
2328 arm_constant_limit (bool size_p)
2330 return size_p ? 1 : current_tune->constant_limit;
2333 /* Emit an insn that's a simple single-set. Both the operands must be known
2334 to be valid. */
2335 inline static rtx_insn *
2336 emit_set_insn (rtx x, rtx y)
2338 return emit_insn (gen_rtx_SET (x, y));
2341 /* Return the number of bits set in VALUE. */
2342 static unsigned
2343 bit_count (unsigned long value)
2345 unsigned long count = 0;
2347 while (value)
2349 count++;
2350 value &= value - 1; /* Clear the least-significant set bit. */
2353 return count;
2356 /* Return the number of bits set in BMAP. */
2357 static unsigned
2358 bitmap_popcount (const sbitmap bmap)
2360 unsigned int count = 0;
2361 unsigned int n = 0;
2362 sbitmap_iterator sbi;
2364 EXECUTE_IF_SET_IN_BITMAP (bmap, 0, n, sbi)
2365 count++;
2366 return count;
2369 typedef struct
2371 machine_mode mode;
2372 const char *name;
2373 } arm_fixed_mode_set;
2375 /* A small helper for setting fixed-point library libfuncs. */
2377 static void
2378 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2379 const char *funcname, const char *modename,
2380 int num_suffix)
2382 char buffer[50];
2384 if (num_suffix == 0)
2385 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2386 else
2387 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2389 set_optab_libfunc (optable, mode, buffer);
2392 static void
2393 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2394 machine_mode from, const char *funcname,
2395 const char *toname, const char *fromname)
2397 char buffer[50];
2398 const char *maybe_suffix_2 = "";
2400 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2401 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2402 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2403 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2404 maybe_suffix_2 = "2";
2406 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2407 maybe_suffix_2);
2409 set_conv_libfunc (optable, to, from, buffer);
2412 /* Set up library functions unique to ARM. */
2414 static void
2415 arm_init_libfuncs (void)
2417 /* For Linux, we have access to kernel support for atomic operations. */
2418 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2419 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2421 /* There are no special library functions unless we are using the
2422 ARM BPABI. */
2423 if (!TARGET_BPABI)
2424 return;
2426 /* The functions below are described in Section 4 of the "Run-Time
2427 ABI for the ARM architecture", Version 1.0. */
2429 /* Double-precision floating-point arithmetic. Table 2. */
2430 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2431 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2432 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2433 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2434 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2436 /* Double-precision comparisons. Table 3. */
2437 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2438 set_optab_libfunc (ne_optab, DFmode, NULL);
2439 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2440 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2441 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2442 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2443 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2445 /* Single-precision floating-point arithmetic. Table 4. */
2446 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2447 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2448 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2449 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2450 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2452 /* Single-precision comparisons. Table 5. */
2453 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2454 set_optab_libfunc (ne_optab, SFmode, NULL);
2455 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2456 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2457 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2458 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2459 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2461 /* Floating-point to integer conversions. Table 6. */
2462 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2463 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2464 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2465 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2466 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2467 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2468 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2469 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2471 /* Conversions between floating types. Table 7. */
2472 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2473 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2475 /* Integer to floating-point conversions. Table 8. */
2476 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2477 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2478 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2479 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2480 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2481 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2482 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2483 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2485 /* Long long. Table 9. */
2486 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2487 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2488 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2489 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2490 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2491 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2492 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2493 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2495 /* Integer (32/32->32) division. \S 4.3.1. */
2496 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2497 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2499 /* The divmod functions are designed so that they can be used for
2500 plain division, even though they return both the quotient and the
2501 remainder. The quotient is returned in the usual location (i.e.,
2502 r0 for SImode, {r0, r1} for DImode), just as would be expected
2503 for an ordinary division routine. Because the AAPCS calling
2504 conventions specify that all of { r0, r1, r2, r3 } are
2505 callee-saved registers, there is no need to tell the compiler
2506 explicitly that those registers are clobbered by these
2507 routines. */
2508 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2509 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2511 /* For SImode division the ABI provides div-without-mod routines,
2512 which are faster. */
2513 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2514 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2516 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2517 divmod libcalls instead. */
2518 set_optab_libfunc (smod_optab, DImode, NULL);
2519 set_optab_libfunc (umod_optab, DImode, NULL);
2520 set_optab_libfunc (smod_optab, SImode, NULL);
2521 set_optab_libfunc (umod_optab, SImode, NULL);
2523 /* Half-precision float operations. The compiler handles all operations
2524 with NULL libfuncs by converting the SFmode. */
2525 switch (arm_fp16_format)
2527 case ARM_FP16_FORMAT_IEEE:
2528 case ARM_FP16_FORMAT_ALTERNATIVE:
2530 /* Conversions. */
2531 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2532 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2533 ? "__gnu_f2h_ieee"
2534 : "__gnu_f2h_alternative"));
2535 set_conv_libfunc (sext_optab, SFmode, HFmode,
2536 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2537 ? "__gnu_h2f_ieee"
2538 : "__gnu_h2f_alternative"));
2540 set_conv_libfunc (trunc_optab, HFmode, DFmode,
2541 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2542 ? "__gnu_d2h_ieee"
2543 : "__gnu_d2h_alternative"));
2545 /* Arithmetic. */
2546 set_optab_libfunc (add_optab, HFmode, NULL);
2547 set_optab_libfunc (sdiv_optab, HFmode, NULL);
2548 set_optab_libfunc (smul_optab, HFmode, NULL);
2549 set_optab_libfunc (neg_optab, HFmode, NULL);
2550 set_optab_libfunc (sub_optab, HFmode, NULL);
2552 /* Comparisons. */
2553 set_optab_libfunc (eq_optab, HFmode, NULL);
2554 set_optab_libfunc (ne_optab, HFmode, NULL);
2555 set_optab_libfunc (lt_optab, HFmode, NULL);
2556 set_optab_libfunc (le_optab, HFmode, NULL);
2557 set_optab_libfunc (ge_optab, HFmode, NULL);
2558 set_optab_libfunc (gt_optab, HFmode, NULL);
2559 set_optab_libfunc (unord_optab, HFmode, NULL);
2560 break;
2562 default:
2563 break;
2566 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2568 const arm_fixed_mode_set fixed_arith_modes[] =
2570 { QQmode, "qq" },
2571 { UQQmode, "uqq" },
2572 { HQmode, "hq" },
2573 { UHQmode, "uhq" },
2574 { SQmode, "sq" },
2575 { USQmode, "usq" },
2576 { DQmode, "dq" },
2577 { UDQmode, "udq" },
2578 { TQmode, "tq" },
2579 { UTQmode, "utq" },
2580 { HAmode, "ha" },
2581 { UHAmode, "uha" },
2582 { SAmode, "sa" },
2583 { USAmode, "usa" },
2584 { DAmode, "da" },
2585 { UDAmode, "uda" },
2586 { TAmode, "ta" },
2587 { UTAmode, "uta" }
2589 const arm_fixed_mode_set fixed_conv_modes[] =
2591 { QQmode, "qq" },
2592 { UQQmode, "uqq" },
2593 { HQmode, "hq" },
2594 { UHQmode, "uhq" },
2595 { SQmode, "sq" },
2596 { USQmode, "usq" },
2597 { DQmode, "dq" },
2598 { UDQmode, "udq" },
2599 { TQmode, "tq" },
2600 { UTQmode, "utq" },
2601 { HAmode, "ha" },
2602 { UHAmode, "uha" },
2603 { SAmode, "sa" },
2604 { USAmode, "usa" },
2605 { DAmode, "da" },
2606 { UDAmode, "uda" },
2607 { TAmode, "ta" },
2608 { UTAmode, "uta" },
2609 { QImode, "qi" },
2610 { HImode, "hi" },
2611 { SImode, "si" },
2612 { DImode, "di" },
2613 { TImode, "ti" },
2614 { SFmode, "sf" },
2615 { DFmode, "df" }
2617 unsigned int i, j;
2619 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2621 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2622 "add", fixed_arith_modes[i].name, 3);
2623 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2624 "ssadd", fixed_arith_modes[i].name, 3);
2625 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2626 "usadd", fixed_arith_modes[i].name, 3);
2627 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2628 "sub", fixed_arith_modes[i].name, 3);
2629 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2630 "sssub", fixed_arith_modes[i].name, 3);
2631 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2632 "ussub", fixed_arith_modes[i].name, 3);
2633 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2634 "mul", fixed_arith_modes[i].name, 3);
2635 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2636 "ssmul", fixed_arith_modes[i].name, 3);
2637 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2638 "usmul", fixed_arith_modes[i].name, 3);
2639 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2640 "div", fixed_arith_modes[i].name, 3);
2641 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2642 "udiv", fixed_arith_modes[i].name, 3);
2643 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2644 "ssdiv", fixed_arith_modes[i].name, 3);
2645 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2646 "usdiv", fixed_arith_modes[i].name, 3);
2647 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2648 "neg", fixed_arith_modes[i].name, 2);
2649 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2650 "ssneg", fixed_arith_modes[i].name, 2);
2651 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2652 "usneg", fixed_arith_modes[i].name, 2);
2653 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2654 "ashl", fixed_arith_modes[i].name, 3);
2655 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2656 "ashr", fixed_arith_modes[i].name, 3);
2657 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2658 "lshr", fixed_arith_modes[i].name, 3);
2659 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2660 "ssashl", fixed_arith_modes[i].name, 3);
2661 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2662 "usashl", fixed_arith_modes[i].name, 3);
2663 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2664 "cmp", fixed_arith_modes[i].name, 2);
2667 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2668 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2670 if (i == j
2671 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2672 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2673 continue;
2675 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2676 fixed_conv_modes[j].mode, "fract",
2677 fixed_conv_modes[i].name,
2678 fixed_conv_modes[j].name);
2679 arm_set_fixed_conv_libfunc (satfract_optab,
2680 fixed_conv_modes[i].mode,
2681 fixed_conv_modes[j].mode, "satfract",
2682 fixed_conv_modes[i].name,
2683 fixed_conv_modes[j].name);
2684 arm_set_fixed_conv_libfunc (fractuns_optab,
2685 fixed_conv_modes[i].mode,
2686 fixed_conv_modes[j].mode, "fractuns",
2687 fixed_conv_modes[i].name,
2688 fixed_conv_modes[j].name);
2689 arm_set_fixed_conv_libfunc (satfractuns_optab,
2690 fixed_conv_modes[i].mode,
2691 fixed_conv_modes[j].mode, "satfractuns",
2692 fixed_conv_modes[i].name,
2693 fixed_conv_modes[j].name);
2697 if (TARGET_AAPCS_BASED)
2698 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2701 /* On AAPCS systems, this is the "struct __va_list". */
2702 static GTY(()) tree va_list_type;
2704 /* Return the type to use as __builtin_va_list. */
2705 static tree
2706 arm_build_builtin_va_list (void)
2708 tree va_list_name;
2709 tree ap_field;
2711 if (!TARGET_AAPCS_BASED)
2712 return std_build_builtin_va_list ();
2714 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2715 defined as:
2717 struct __va_list
2719 void *__ap;
2722 The C Library ABI further reinforces this definition in \S
2723 4.1.
2725 We must follow this definition exactly. The structure tag
2726 name is visible in C++ mangled names, and thus forms a part
2727 of the ABI. The field name may be used by people who
2728 #include <stdarg.h>. */
2729 /* Create the type. */
2730 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2731 /* Give it the required name. */
2732 va_list_name = build_decl (BUILTINS_LOCATION,
2733 TYPE_DECL,
2734 get_identifier ("__va_list"),
2735 va_list_type);
2736 DECL_ARTIFICIAL (va_list_name) = 1;
2737 TYPE_NAME (va_list_type) = va_list_name;
2738 TYPE_STUB_DECL (va_list_type) = va_list_name;
2739 /* Create the __ap field. */
2740 ap_field = build_decl (BUILTINS_LOCATION,
2741 FIELD_DECL,
2742 get_identifier ("__ap"),
2743 ptr_type_node);
2744 DECL_ARTIFICIAL (ap_field) = 1;
2745 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2746 TYPE_FIELDS (va_list_type) = ap_field;
2747 /* Compute its layout. */
2748 layout_type (va_list_type);
2750 return va_list_type;
2753 /* Return an expression of type "void *" pointing to the next
2754 available argument in a variable-argument list. VALIST is the
2755 user-level va_list object, of type __builtin_va_list. */
2756 static tree
2757 arm_extract_valist_ptr (tree valist)
2759 if (TREE_TYPE (valist) == error_mark_node)
2760 return error_mark_node;
2762 /* On an AAPCS target, the pointer is stored within "struct
2763 va_list". */
2764 if (TARGET_AAPCS_BASED)
2766 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2767 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2768 valist, ap_field, NULL_TREE);
2771 return valist;
2774 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2775 static void
2776 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2778 valist = arm_extract_valist_ptr (valist);
2779 std_expand_builtin_va_start (valist, nextarg);
2782 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2783 static tree
2784 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2785 gimple_seq *post_p)
2787 valist = arm_extract_valist_ptr (valist);
2788 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2791 /* Check any incompatible options that the user has specified. */
2792 static void
2793 arm_option_check_internal (struct gcc_options *opts)
2795 int flags = opts->x_target_flags;
2797 /* iWMMXt and NEON are incompatible. */
2798 if (TARGET_IWMMXT
2799 && bitmap_bit_p (arm_active_target.isa, isa_bit_neon))
2800 error ("iWMMXt and NEON are incompatible");
2802 /* Make sure that the processor choice does not conflict with any of the
2803 other command line choices. */
2804 if (TARGET_ARM_P (flags)
2805 && !bitmap_bit_p (arm_active_target.isa, isa_bit_notm))
2806 error ("target CPU does not support ARM mode");
2808 /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet. */
2809 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2810 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2812 if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2813 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2815 /* If this target is normally configured to use APCS frames, warn if they
2816 are turned off and debugging is turned on. */
2817 if (TARGET_ARM_P (flags)
2818 && write_symbols != NO_DEBUG
2819 && !TARGET_APCS_FRAME
2820 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2821 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2823 /* iWMMXt unsupported under Thumb mode. */
2824 if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2825 error ("iWMMXt unsupported under Thumb mode");
2827 if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
2828 error ("can not use -mtp=cp15 with 16-bit Thumb");
2830 if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
2832 error ("RTP PIC is incompatible with Thumb");
2833 flag_pic = 0;
2836 /* We only support -mpure-code and -mslow-flash-data on M-profile targets
2837 with MOVT. */
2838 if ((target_pure_code || target_slow_flash_data)
2839 && (!TARGET_HAVE_MOVT || arm_arch_notm || flag_pic || TARGET_NEON))
2841 const char *flag = (target_pure_code ? "-mpure-code" :
2842 "-mslow-flash-data");
2843 error ("%s only supports non-pic code on M-profile targets with the "
2844 "MOVT instruction", flag);
2849 /* Recompute the global settings depending on target attribute options. */
2851 static void
2852 arm_option_params_internal (void)
2854 /* If we are not using the default (ARM mode) section anchor offset
2855 ranges, then set the correct ranges now. */
2856 if (TARGET_THUMB1)
2858 /* Thumb-1 LDR instructions cannot have negative offsets.
2859 Permissible positive offset ranges are 5-bit (for byte loads),
2860 6-bit (for halfword loads), or 7-bit (for word loads).
2861 Empirical results suggest a 7-bit anchor range gives the best
2862 overall code size. */
2863 targetm.min_anchor_offset = 0;
2864 targetm.max_anchor_offset = 127;
2866 else if (TARGET_THUMB2)
2868 /* The minimum is set such that the total size of the block
2869 for a particular anchor is 248 + 1 + 4095 bytes, which is
2870 divisible by eight, ensuring natural spacing of anchors. */
2871 targetm.min_anchor_offset = -248;
2872 targetm.max_anchor_offset = 4095;
2874 else
2876 targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
2877 targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
2880 if (optimize_size)
2882 /* If optimizing for size, bump the number of instructions that we
2883 are prepared to conditionally execute (even on a StrongARM). */
2884 max_insns_skipped = 6;
2886 /* For THUMB2, we limit the conditional sequence to one IT block. */
2887 if (TARGET_THUMB2)
2888 max_insns_skipped = arm_restrict_it ? 1 : 4;
2890 else
2891 /* When -mrestrict-it is in use tone down the if-conversion. */
2892 max_insns_skipped = (TARGET_THUMB2 && arm_restrict_it)
2893 ? 1 : current_tune->max_insns_skipped;
2896 /* True if -mflip-thumb should next add an attribute for the default
2897 mode, false if it should next add an attribute for the opposite mode. */
2898 static GTY(()) bool thumb_flipper;
2900 /* Options after initial target override. */
2901 static GTY(()) tree init_optimize;
2903 static void
2904 arm_override_options_after_change_1 (struct gcc_options *opts)
2906 if (opts->x_align_functions <= 0)
2907 opts->x_align_functions = TARGET_THUMB_P (opts->x_target_flags)
2908 && opts->x_optimize_size ? 2 : 4;
2911 /* Implement targetm.override_options_after_change. */
2913 static void
2914 arm_override_options_after_change (void)
2916 arm_configure_build_target (&arm_active_target,
2917 TREE_TARGET_OPTION (target_option_default_node),
2918 &global_options_set, false);
2920 arm_override_options_after_change_1 (&global_options);
2923 static void
2924 arm_option_restore (struct gcc_options *, struct cl_target_option *ptr)
2926 arm_configure_build_target (&arm_active_target, ptr, &global_options_set,
2927 false);
2930 /* Reset options between modes that the user has specified. */
2931 static void
2932 arm_option_override_internal (struct gcc_options *opts,
2933 struct gcc_options *opts_set)
2935 arm_override_options_after_change_1 (opts);
2937 if (TARGET_INTERWORK && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
2939 /* The default is to enable interworking, so this warning message would
2940 be confusing to users who have just compiled with, eg, -march=armv3. */
2941 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
2942 opts->x_target_flags &= ~MASK_INTERWORK;
2945 if (TARGET_THUMB_P (opts->x_target_flags)
2946 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
2948 warning (0, "target CPU does not support THUMB instructions");
2949 opts->x_target_flags &= ~MASK_THUMB;
2952 if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
2954 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2955 opts->x_target_flags &= ~MASK_APCS_FRAME;
2958 /* Callee super interworking implies thumb interworking. Adding
2959 this to the flags here simplifies the logic elsewhere. */
2960 if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
2961 opts->x_target_flags |= MASK_INTERWORK;
2963 /* need to remember initial values so combinaisons of options like
2964 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
2965 cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
2967 if (! opts_set->x_arm_restrict_it)
2968 opts->x_arm_restrict_it = arm_arch8;
2970 /* ARM execution state and M profile don't have [restrict] IT. */
2971 if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
2972 opts->x_arm_restrict_it = 0;
2974 /* Enable -munaligned-access by default for
2975 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
2976 i.e. Thumb2 and ARM state only.
2977 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2978 - ARMv8 architecture-base processors.
2980 Disable -munaligned-access by default for
2981 - all pre-ARMv6 architecture-based processors
2982 - ARMv6-M architecture-based processors
2983 - ARMv8-M Baseline processors. */
2985 if (! opts_set->x_unaligned_access)
2987 opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
2988 && arm_arch6 && (arm_arch_notm || arm_arch7));
2990 else if (opts->x_unaligned_access == 1
2991 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
2993 warning (0, "target CPU does not support unaligned accesses");
2994 opts->x_unaligned_access = 0;
2997 /* Don't warn since it's on by default in -O2. */
2998 if (TARGET_THUMB1_P (opts->x_target_flags))
2999 opts->x_flag_schedule_insns = 0;
3000 else
3001 opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
3003 /* Disable shrink-wrap when optimizing function for size, since it tends to
3004 generate additional returns. */
3005 if (optimize_function_for_size_p (cfun)
3006 && TARGET_THUMB2_P (opts->x_target_flags))
3007 opts->x_flag_shrink_wrap = false;
3008 else
3009 opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
3011 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3012 - epilogue_insns - does not accurately model the corresponding insns
3013 emitted in the asm file. In particular, see the comment in thumb_exit
3014 'Find out how many of the (return) argument registers we can corrupt'.
3015 As a consequence, the epilogue may clobber registers without fipa-ra
3016 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3017 TODO: Accurately model clobbers for epilogue_insns and reenable
3018 fipa-ra. */
3019 if (TARGET_THUMB1_P (opts->x_target_flags))
3020 opts->x_flag_ipa_ra = 0;
3021 else
3022 opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3024 /* Thumb2 inline assembly code should always use unified syntax.
3025 This will apply to ARM and Thumb1 eventually. */
3026 opts->x_inline_asm_unified = TARGET_THUMB2_P (opts->x_target_flags);
3028 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3029 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3030 #endif
3033 /* Convert a static initializer array of feature bits to sbitmap
3034 representation. */
3035 static void
3036 arm_initialize_isa (sbitmap isa, const enum isa_feature *isa_bits)
3038 bitmap_clear (isa);
3039 while (*isa_bits != isa_nobit)
3040 bitmap_set_bit (isa, *(isa_bits++));
3043 static sbitmap isa_all_fpubits;
3044 static sbitmap isa_quirkbits;
3046 /* Configure a build target TARGET from the user-specified options OPTS and
3047 OPTS_SET. If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3048 architecture have been specified, but the two are not identical. */
3049 void
3050 arm_configure_build_target (struct arm_build_target *target,
3051 struct cl_target_option *opts,
3052 struct gcc_options *opts_set,
3053 bool warn_compatible)
3055 const struct processors *arm_selected_tune = NULL;
3056 const struct processors *arm_selected_arch = NULL;
3057 const struct processors *arm_selected_cpu = NULL;
3058 const struct arm_fpu_desc *arm_selected_fpu = NULL;
3060 bitmap_clear (target->isa);
3061 target->core_name = NULL;
3062 target->arch_name = NULL;
3064 if (opts_set->x_arm_arch_option)
3065 arm_selected_arch = &all_architectures[opts->x_arm_arch_option];
3067 if (opts_set->x_arm_cpu_option)
3069 arm_selected_cpu = &all_cores[(int) opts->x_arm_cpu_option];
3070 arm_selected_tune = &all_cores[(int) opts->x_arm_cpu_option];
3073 if (opts_set->x_arm_tune_option)
3074 arm_selected_tune = &all_cores[(int) opts->x_arm_tune_option];
3076 if (arm_selected_arch)
3078 arm_initialize_isa (target->isa, arm_selected_arch->isa_bits);
3080 if (arm_selected_cpu)
3082 auto_sbitmap cpu_isa (isa_num_bits);
3084 arm_initialize_isa (cpu_isa, arm_selected_cpu->isa_bits);
3085 bitmap_xor (cpu_isa, cpu_isa, target->isa);
3086 /* Ignore any bits that are quirk bits. */
3087 bitmap_and_compl (cpu_isa, cpu_isa, isa_quirkbits);
3088 /* Ignore (for now) any bits that might be set by -mfpu. */
3089 bitmap_and_compl (cpu_isa, cpu_isa, isa_all_fpubits);
3091 if (!bitmap_empty_p (cpu_isa))
3093 if (warn_compatible)
3094 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
3095 arm_selected_cpu->name, arm_selected_arch->name);
3096 /* -march wins for code generation.
3097 -mcpu wins for default tuning. */
3098 if (!arm_selected_tune)
3099 arm_selected_tune = arm_selected_cpu;
3101 arm_selected_cpu = arm_selected_arch;
3102 target->arch_name = arm_selected_arch->name;
3104 else
3106 /* Architecture and CPU are essentially the same.
3107 Prefer the CPU setting. */
3108 arm_selected_arch = NULL;
3109 target->core_name = arm_selected_cpu->name;
3112 else
3114 /* Pick a CPU based on the architecture. */
3115 arm_selected_cpu = arm_selected_arch;
3116 target->arch_name = arm_selected_arch->name;
3117 /* Note: target->core_name is left unset in this path. */
3120 else if (arm_selected_cpu)
3122 target->core_name = arm_selected_cpu->name;
3123 arm_initialize_isa (target->isa, arm_selected_cpu->isa_bits);
3125 /* If the user did not specify a processor, choose one for them. */
3126 else
3128 const struct processors * sel;
3129 auto_sbitmap sought_isa (isa_num_bits);
3130 bitmap_clear (sought_isa);
3131 auto_sbitmap default_isa (isa_num_bits);
3133 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
3134 gcc_assert (arm_selected_cpu->name);
3136 /* RWE: All of the selection logic below (to the end of this
3137 'if' clause) looks somewhat suspect. It appears to be mostly
3138 there to support forcing thumb support when the default CPU
3139 does not have thumb (somewhat dubious in terms of what the
3140 user might be expecting). I think it should be removed once
3141 support for the pre-thumb era cores is removed. */
3142 sel = arm_selected_cpu;
3143 arm_initialize_isa (default_isa, sel->isa_bits);
3145 /* Now check to see if the user has specified any command line
3146 switches that require certain abilities from the cpu. */
3148 if (TARGET_INTERWORK || TARGET_THUMB)
3150 bitmap_set_bit (sought_isa, isa_bit_thumb);
3151 bitmap_set_bit (sought_isa, isa_bit_mode32);
3153 /* There are no ARM processors that support both APCS-26 and
3154 interworking. Therefore we forcibly remove MODE26 from
3155 from the isa features here (if it was set), so that the
3156 search below will always be able to find a compatible
3157 processor. */
3158 bitmap_clear_bit (default_isa, isa_bit_mode26);
3161 /* If there are such requirements and the default CPU does not
3162 satisfy them, we need to run over the complete list of
3163 cores looking for one that is satisfactory. */
3164 if (!bitmap_empty_p (sought_isa)
3165 && !bitmap_subset_p (sought_isa, default_isa))
3167 auto_sbitmap candidate_isa (isa_num_bits);
3168 /* We're only interested in a CPU with at least the
3169 capabilities of the default CPU and the required
3170 additional features. */
3171 bitmap_ior (default_isa, default_isa, sought_isa);
3173 /* Try to locate a CPU type that supports all of the abilities
3174 of the default CPU, plus the extra abilities requested by
3175 the user. */
3176 for (sel = all_cores; sel->name != NULL; sel++)
3178 arm_initialize_isa (candidate_isa, sel->isa_bits);
3179 /* An exact match? */
3180 if (bitmap_equal_p (default_isa, candidate_isa))
3181 break;
3184 if (sel->name == NULL)
3186 unsigned current_bit_count = isa_num_bits;
3187 const struct processors * best_fit = NULL;
3189 /* Ideally we would like to issue an error message here
3190 saying that it was not possible to find a CPU compatible
3191 with the default CPU, but which also supports the command
3192 line options specified by the programmer, and so they
3193 ought to use the -mcpu=<name> command line option to
3194 override the default CPU type.
3196 If we cannot find a CPU that has exactly the
3197 characteristics of the default CPU and the given
3198 command line options we scan the array again looking
3199 for a best match. The best match must have at least
3200 the capabilities of the perfect match. */
3201 for (sel = all_cores; sel->name != NULL; sel++)
3203 arm_initialize_isa (candidate_isa, sel->isa_bits);
3205 if (bitmap_subset_p (default_isa, candidate_isa))
3207 unsigned count;
3209 bitmap_and_compl (candidate_isa, candidate_isa,
3210 default_isa);
3211 count = bitmap_popcount (candidate_isa);
3213 if (count < current_bit_count)
3215 best_fit = sel;
3216 current_bit_count = count;
3220 gcc_assert (best_fit);
3221 sel = best_fit;
3224 arm_selected_cpu = sel;
3227 /* Now we know the CPU, we can finally initialize the target
3228 structure. */
3229 target->core_name = arm_selected_cpu->name;
3230 arm_initialize_isa (target->isa, arm_selected_cpu->isa_bits);
3233 gcc_assert (arm_selected_cpu);
3235 if (opts->x_arm_fpu_index != TARGET_FPU_auto)
3237 arm_selected_fpu = &all_fpus[opts->x_arm_fpu_index];
3238 auto_sbitmap fpu_bits (isa_num_bits);
3240 arm_initialize_isa (fpu_bits, arm_selected_fpu->isa_bits);
3241 bitmap_and_compl (target->isa, target->isa, isa_all_fpubits);
3242 bitmap_ior (target->isa, target->isa, fpu_bits);
3244 else if (target->core_name == NULL)
3245 /* To support this we need to be able to parse FPU feature options
3246 from the architecture string. */
3247 sorry ("-mfpu=auto not currently supported without an explicit CPU.");
3249 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
3250 if (!arm_selected_tune)
3251 arm_selected_tune = &all_cores[arm_selected_cpu->core];
3253 /* Finish initializing the target structure. */
3254 target->arch_pp_name = arm_selected_cpu->arch;
3255 target->base_arch = arm_selected_cpu->base_arch;
3256 target->arch_core = arm_selected_cpu->core;
3258 target->tune_flags = arm_selected_tune->tune_flags;
3259 target->tune = arm_selected_tune->tune;
3260 target->tune_core = arm_selected_tune->core;
3263 /* Fix up any incompatible options that the user has specified. */
3264 static void
3265 arm_option_override (void)
3267 static const enum isa_feature fpu_bitlist[] = { ISA_ALL_FPU, isa_nobit };
3268 static const enum isa_feature quirk_bitlist[] = { ISA_ALL_QUIRKS, isa_nobit};
3269 cl_target_option opts;
3271 isa_quirkbits = sbitmap_alloc (isa_num_bits);
3272 arm_initialize_isa (isa_quirkbits, quirk_bitlist);
3274 isa_all_fpubits = sbitmap_alloc (isa_num_bits);
3275 arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
3277 arm_active_target.isa = sbitmap_alloc (isa_num_bits);
3279 if (!global_options_set.x_arm_fpu_index)
3281 const char *target_fpu_name;
3282 bool ok;
3283 int fpu_index;
3285 #ifdef FPUTYPE_DEFAULT
3286 target_fpu_name = FPUTYPE_DEFAULT;
3287 #else
3288 target_fpu_name = "vfp";
3289 #endif
3291 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &fpu_index,
3292 CL_TARGET);
3293 gcc_assert (ok);
3294 arm_fpu_index = (enum fpu_type) fpu_index;
3297 cl_target_option_save (&opts, &global_options);
3298 arm_configure_build_target (&arm_active_target, &opts, &global_options_set,
3299 true);
3301 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3302 SUBTARGET_OVERRIDE_OPTIONS;
3303 #endif
3305 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_active_target.arch_pp_name);
3306 arm_base_arch = arm_active_target.base_arch;
3308 arm_tune = arm_active_target.tune_core;
3309 tune_flags = arm_active_target.tune_flags;
3310 current_tune = arm_active_target.tune;
3312 /* TBD: Dwarf info for apcs frame is not handled yet. */
3313 if (TARGET_APCS_FRAME)
3314 flag_shrink_wrap = false;
3316 /* BPABI targets use linker tricks to allow interworking on cores
3317 without thumb support. */
3318 if (TARGET_INTERWORK
3319 && !TARGET_BPABI
3320 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3322 warning (0, "target CPU does not support interworking" );
3323 target_flags &= ~MASK_INTERWORK;
3326 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3328 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3329 target_flags |= MASK_APCS_FRAME;
3332 if (TARGET_POKE_FUNCTION_NAME)
3333 target_flags |= MASK_APCS_FRAME;
3335 if (TARGET_APCS_REENT && flag_pic)
3336 error ("-fpic and -mapcs-reent are incompatible");
3338 if (TARGET_APCS_REENT)
3339 warning (0, "APCS reentrant code not supported. Ignored");
3341 /* Initialize boolean versions of the architectural flags, for use
3342 in the arm.md file. */
3343 arm_arch3m = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv3m);
3344 arm_arch4 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv4);
3345 arm_arch4t = arm_arch4 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3346 arm_arch5 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv5);
3347 arm_arch5e = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv5e);
3348 arm_arch5te = arm_arch5e
3349 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3350 arm_arch6 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv6);
3351 arm_arch6k = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv6k);
3352 arm_arch_notm = bitmap_bit_p (arm_active_target.isa, isa_bit_notm);
3353 arm_arch6m = arm_arch6 && !arm_arch_notm;
3354 arm_arch7 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv7);
3355 arm_arch7em = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv7em);
3356 arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv8);
3357 arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv8_1);
3358 arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv8_2);
3359 arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3360 arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
3361 arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
3362 arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt);
3363 arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2);
3364 arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv);
3365 arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
3366 arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
3367 arm_arch_cmse = bitmap_bit_p (arm_active_target.isa, isa_bit_cmse);
3368 arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16);
3369 arm_arch_lpae = bitmap_bit_p (arm_active_target.isa, isa_bit_lpae);
3370 if (arm_fp16_inst)
3372 if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
3373 error ("selected fp16 options are incompatible");
3374 arm_fp16_format = ARM_FP16_FORMAT_IEEE;
3378 /* Set up some tuning parameters. */
3379 arm_ld_sched = (tune_flags & TF_LDSCHED) != 0;
3380 arm_tune_strongarm = (tune_flags & TF_STRONG) != 0;
3381 arm_tune_wbuf = (tune_flags & TF_WBUF) != 0;
3382 arm_tune_xscale = (tune_flags & TF_XSCALE) != 0;
3383 arm_tune_cortex_a9 = (arm_tune == TARGET_CPU_cortexa9) != 0;
3384 arm_m_profile_small_mul = (tune_flags & TF_SMALLMUL) != 0;
3386 /* And finally, set up some quirks. */
3387 arm_arch_no_volatile_ce
3388 = bitmap_bit_p (arm_active_target.isa, isa_quirk_no_volatile_ce);
3389 arm_arch6kz
3390 = arm_arch6k && bitmap_bit_p (arm_active_target.isa, isa_quirk_ARMv6kz);
3392 /* V5 code we generate is completely interworking capable, so we turn off
3393 TARGET_INTERWORK here to avoid many tests later on. */
3395 /* XXX However, we must pass the right pre-processor defines to CPP
3396 or GLD can get confused. This is a hack. */
3397 if (TARGET_INTERWORK)
3398 arm_cpp_interwork = 1;
3400 if (arm_arch5)
3401 target_flags &= ~MASK_INTERWORK;
3403 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3404 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3406 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3407 error ("iwmmxt abi requires an iwmmxt capable cpu");
3409 /* If soft-float is specified then don't use FPU. */
3410 if (TARGET_SOFT_FLOAT)
3411 arm_fpu_attr = FPU_NONE;
3412 else
3413 arm_fpu_attr = FPU_VFP;
3415 if (TARGET_AAPCS_BASED)
3417 if (TARGET_CALLER_INTERWORKING)
3418 error ("AAPCS does not support -mcaller-super-interworking");
3419 else
3420 if (TARGET_CALLEE_INTERWORKING)
3421 error ("AAPCS does not support -mcallee-super-interworking");
3424 /* __fp16 support currently assumes the core has ldrh. */
3425 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3426 sorry ("__fp16 and no ldrh");
3428 if (TARGET_AAPCS_BASED)
3430 if (arm_abi == ARM_ABI_IWMMXT)
3431 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3432 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
3433 && TARGET_HARD_FLOAT)
3435 arm_pcs_default = ARM_PCS_AAPCS_VFP;
3436 if (!bitmap_bit_p (arm_active_target.isa, isa_bit_VFPv2))
3437 error ("-mfloat-abi=hard: selected processor lacks an FPU");
3439 else
3440 arm_pcs_default = ARM_PCS_AAPCS;
3442 else
3444 if (arm_float_abi == ARM_FLOAT_ABI_HARD)
3445 sorry ("-mfloat-abi=hard and VFP");
3447 if (arm_abi == ARM_ABI_APCS)
3448 arm_pcs_default = ARM_PCS_APCS;
3449 else
3450 arm_pcs_default = ARM_PCS_ATPCS;
3453 /* For arm2/3 there is no need to do any scheduling if we are doing
3454 software floating-point. */
3455 if (TARGET_SOFT_FLOAT && (tune_flags & TF_NO_MODE32))
3456 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3458 /* Use the cp15 method if it is available. */
3459 if (target_thread_pointer == TP_AUTO)
3461 if (arm_arch6k && !TARGET_THUMB1)
3462 target_thread_pointer = TP_CP15;
3463 else
3464 target_thread_pointer = TP_SOFT;
3467 /* Override the default structure alignment for AAPCS ABI. */
3468 if (!global_options_set.x_arm_structure_size_boundary)
3470 if (TARGET_AAPCS_BASED)
3471 arm_structure_size_boundary = 8;
3473 else
3475 if (arm_structure_size_boundary != 8
3476 && arm_structure_size_boundary != 32
3477 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3479 if (ARM_DOUBLEWORD_ALIGN)
3480 warning (0,
3481 "structure size boundary can only be set to 8, 32 or 64");
3482 else
3483 warning (0, "structure size boundary can only be set to 8 or 32");
3484 arm_structure_size_boundary
3485 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3489 if (TARGET_VXWORKS_RTP)
3491 if (!global_options_set.x_arm_pic_data_is_text_relative)
3492 arm_pic_data_is_text_relative = 0;
3494 else if (flag_pic
3495 && !arm_pic_data_is_text_relative
3496 && !(global_options_set.x_target_flags & MASK_SINGLE_PIC_BASE))
3497 /* When text & data segments don't have a fixed displacement, the
3498 intended use is with a single, read only, pic base register.
3499 Unless the user explicitly requested not to do that, set
3500 it. */
3501 target_flags |= MASK_SINGLE_PIC_BASE;
3503 /* If stack checking is disabled, we can use r10 as the PIC register,
3504 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3505 if (flag_pic && TARGET_SINGLE_PIC_BASE)
3507 if (TARGET_VXWORKS_RTP)
3508 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3509 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3512 if (flag_pic && TARGET_VXWORKS_RTP)
3513 arm_pic_register = 9;
3515 if (arm_pic_register_string != NULL)
3517 int pic_register = decode_reg_name (arm_pic_register_string);
3519 if (!flag_pic)
3520 warning (0, "-mpic-register= is useless without -fpic");
3522 /* Prevent the user from choosing an obviously stupid PIC register. */
3523 else if (pic_register < 0 || call_used_regs[pic_register]
3524 || pic_register == HARD_FRAME_POINTER_REGNUM
3525 || pic_register == STACK_POINTER_REGNUM
3526 || pic_register >= PC_REGNUM
3527 || (TARGET_VXWORKS_RTP
3528 && (unsigned int) pic_register != arm_pic_register))
3529 error ("unable to use '%s' for PIC register", arm_pic_register_string);
3530 else
3531 arm_pic_register = pic_register;
3534 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3535 if (fix_cm3_ldrd == 2)
3537 if (bitmap_bit_p (arm_active_target.isa, isa_quirk_cm3_ldrd))
3538 fix_cm3_ldrd = 1;
3539 else
3540 fix_cm3_ldrd = 0;
3543 /* Hot/Cold partitioning is not currently supported, since we can't
3544 handle literal pool placement in that case. */
3545 if (flag_reorder_blocks_and_partition)
3547 inform (input_location,
3548 "-freorder-blocks-and-partition not supported on this architecture");
3549 flag_reorder_blocks_and_partition = 0;
3550 flag_reorder_blocks = 1;
3553 if (flag_pic)
3554 /* Hoisting PIC address calculations more aggressively provides a small,
3555 but measurable, size reduction for PIC code. Therefore, we decrease
3556 the bar for unrestricted expression hoisting to the cost of PIC address
3557 calculation, which is 2 instructions. */
3558 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3559 global_options.x_param_values,
3560 global_options_set.x_param_values);
3562 /* ARM EABI defaults to strict volatile bitfields. */
3563 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3564 && abi_version_at_least(2))
3565 flag_strict_volatile_bitfields = 1;
3567 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3568 have deemed it beneficial (signified by setting
3569 prefetch.num_slots to 1 or more). */
3570 if (flag_prefetch_loop_arrays < 0
3571 && HAVE_prefetch
3572 && optimize >= 3
3573 && current_tune->prefetch.num_slots > 0)
3574 flag_prefetch_loop_arrays = 1;
3576 /* Set up parameters to be used in prefetching algorithm. Do not
3577 override the defaults unless we are tuning for a core we have
3578 researched values for. */
3579 if (current_tune->prefetch.num_slots > 0)
3580 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3581 current_tune->prefetch.num_slots,
3582 global_options.x_param_values,
3583 global_options_set.x_param_values);
3584 if (current_tune->prefetch.l1_cache_line_size >= 0)
3585 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3586 current_tune->prefetch.l1_cache_line_size,
3587 global_options.x_param_values,
3588 global_options_set.x_param_values);
3589 if (current_tune->prefetch.l1_cache_size >= 0)
3590 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3591 current_tune->prefetch.l1_cache_size,
3592 global_options.x_param_values,
3593 global_options_set.x_param_values);
3595 /* Use Neon to perform 64-bits operations rather than core
3596 registers. */
3597 prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3598 if (use_neon_for_64bits == 1)
3599 prefer_neon_for_64bits = true;
3601 /* Use the alternative scheduling-pressure algorithm by default. */
3602 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3603 global_options.x_param_values,
3604 global_options_set.x_param_values);
3606 /* Look through ready list and all of queue for instructions
3607 relevant for L2 auto-prefetcher. */
3608 int param_sched_autopref_queue_depth;
3610 switch (current_tune->sched_autopref)
3612 case tune_params::SCHED_AUTOPREF_OFF:
3613 param_sched_autopref_queue_depth = -1;
3614 break;
3616 case tune_params::SCHED_AUTOPREF_RANK:
3617 param_sched_autopref_queue_depth = 0;
3618 break;
3620 case tune_params::SCHED_AUTOPREF_FULL:
3621 param_sched_autopref_queue_depth = max_insn_queue_index + 1;
3622 break;
3624 default:
3625 gcc_unreachable ();
3628 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
3629 param_sched_autopref_queue_depth,
3630 global_options.x_param_values,
3631 global_options_set.x_param_values);
3633 /* Currently, for slow flash data, we just disable literal pools. We also
3634 disable it for pure-code. */
3635 if (target_slow_flash_data || target_pure_code)
3636 arm_disable_literal_pool = true;
3638 if (use_cmse && !arm_arch_cmse)
3639 error ("target CPU does not support ARMv8-M Security Extensions");
3641 /* Disable scheduling fusion by default if it's not armv7 processor
3642 or doesn't prefer ldrd/strd. */
3643 if (flag_schedule_fusion == 2
3644 && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3645 flag_schedule_fusion = 0;
3647 /* Need to remember initial options before they are overriden. */
3648 init_optimize = build_optimization_node (&global_options);
3650 arm_option_override_internal (&global_options, &global_options_set);
3651 arm_option_check_internal (&global_options);
3652 arm_option_params_internal ();
3654 /* Create the default target_options structure. */
3655 target_option_default_node = target_option_current_node
3656 = build_target_option_node (&global_options);
3658 /* Register global variables with the garbage collector. */
3659 arm_add_gc_roots ();
3661 /* Init initial mode for testing. */
3662 thumb_flipper = TARGET_THUMB;
3665 static void
3666 arm_add_gc_roots (void)
3668 gcc_obstack_init(&minipool_obstack);
3669 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3672 /* A table of known ARM exception types.
3673 For use with the interrupt function attribute. */
3675 typedef struct
3677 const char *const arg;
3678 const unsigned long return_value;
3680 isr_attribute_arg;
3682 static const isr_attribute_arg isr_attribute_args [] =
3684 { "IRQ", ARM_FT_ISR },
3685 { "irq", ARM_FT_ISR },
3686 { "FIQ", ARM_FT_FIQ },
3687 { "fiq", ARM_FT_FIQ },
3688 { "ABORT", ARM_FT_ISR },
3689 { "abort", ARM_FT_ISR },
3690 { "ABORT", ARM_FT_ISR },
3691 { "abort", ARM_FT_ISR },
3692 { "UNDEF", ARM_FT_EXCEPTION },
3693 { "undef", ARM_FT_EXCEPTION },
3694 { "SWI", ARM_FT_EXCEPTION },
3695 { "swi", ARM_FT_EXCEPTION },
3696 { NULL, ARM_FT_NORMAL }
3699 /* Returns the (interrupt) function type of the current
3700 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3702 static unsigned long
3703 arm_isr_value (tree argument)
3705 const isr_attribute_arg * ptr;
3706 const char * arg;
3708 if (!arm_arch_notm)
3709 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3711 /* No argument - default to IRQ. */
3712 if (argument == NULL_TREE)
3713 return ARM_FT_ISR;
3715 /* Get the value of the argument. */
3716 if (TREE_VALUE (argument) == NULL_TREE
3717 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3718 return ARM_FT_UNKNOWN;
3720 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3722 /* Check it against the list of known arguments. */
3723 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3724 if (streq (arg, ptr->arg))
3725 return ptr->return_value;
3727 /* An unrecognized interrupt type. */
3728 return ARM_FT_UNKNOWN;
3731 /* Computes the type of the current function. */
3733 static unsigned long
3734 arm_compute_func_type (void)
3736 unsigned long type = ARM_FT_UNKNOWN;
3737 tree a;
3738 tree attr;
3740 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3742 /* Decide if the current function is volatile. Such functions
3743 never return, and many memory cycles can be saved by not storing
3744 register values that will never be needed again. This optimization
3745 was added to speed up context switching in a kernel application. */
3746 if (optimize > 0
3747 && (TREE_NOTHROW (current_function_decl)
3748 || !(flag_unwind_tables
3749 || (flag_exceptions
3750 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3751 && TREE_THIS_VOLATILE (current_function_decl))
3752 type |= ARM_FT_VOLATILE;
3754 if (cfun->static_chain_decl != NULL)
3755 type |= ARM_FT_NESTED;
3757 attr = DECL_ATTRIBUTES (current_function_decl);
3759 a = lookup_attribute ("naked", attr);
3760 if (a != NULL_TREE)
3761 type |= ARM_FT_NAKED;
3763 a = lookup_attribute ("isr", attr);
3764 if (a == NULL_TREE)
3765 a = lookup_attribute ("interrupt", attr);
3767 if (a == NULL_TREE)
3768 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3769 else
3770 type |= arm_isr_value (TREE_VALUE (a));
3772 if (lookup_attribute ("cmse_nonsecure_entry", attr))
3773 type |= ARM_FT_CMSE_ENTRY;
3775 return type;
3778 /* Returns the type of the current function. */
3780 unsigned long
3781 arm_current_func_type (void)
3783 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3784 cfun->machine->func_type = arm_compute_func_type ();
3786 return cfun->machine->func_type;
3789 bool
3790 arm_allocate_stack_slots_for_args (void)
3792 /* Naked functions should not allocate stack slots for arguments. */
3793 return !IS_NAKED (arm_current_func_type ());
3796 static bool
3797 arm_warn_func_return (tree decl)
3799 /* Naked functions are implemented entirely in assembly, including the
3800 return sequence, so suppress warnings about this. */
3801 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3805 /* Output assembler code for a block containing the constant parts
3806 of a trampoline, leaving space for the variable parts.
3808 On the ARM, (if r8 is the static chain regnum, and remembering that
3809 referencing pc adds an offset of 8) the trampoline looks like:
3810 ldr r8, [pc, #0]
3811 ldr pc, [pc]
3812 .word static chain value
3813 .word function's address
3814 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3816 static void
3817 arm_asm_trampoline_template (FILE *f)
3819 fprintf (f, "\t.syntax unified\n");
3821 if (TARGET_ARM)
3823 fprintf (f, "\t.arm\n");
3824 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3825 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3827 else if (TARGET_THUMB2)
3829 fprintf (f, "\t.thumb\n");
3830 /* The Thumb-2 trampoline is similar to the arm implementation.
3831 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3832 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3833 STATIC_CHAIN_REGNUM, PC_REGNUM);
3834 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3836 else
3838 ASM_OUTPUT_ALIGN (f, 2);
3839 fprintf (f, "\t.code\t16\n");
3840 fprintf (f, ".Ltrampoline_start:\n");
3841 asm_fprintf (f, "\tpush\t{r0, r1}\n");
3842 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3843 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3844 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3845 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3846 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3848 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3849 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3852 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3854 static void
3855 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3857 rtx fnaddr, mem, a_tramp;
3859 emit_block_move (m_tramp, assemble_trampoline_template (),
3860 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3862 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3863 emit_move_insn (mem, chain_value);
3865 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3866 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3867 emit_move_insn (mem, fnaddr);
3869 a_tramp = XEXP (m_tramp, 0);
3870 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3871 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
3872 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3875 /* Thumb trampolines should be entered in thumb mode, so set
3876 the bottom bit of the address. */
3878 static rtx
3879 arm_trampoline_adjust_address (rtx addr)
3881 if (TARGET_THUMB)
3882 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3883 NULL, 0, OPTAB_LIB_WIDEN);
3884 return addr;
3887 /* Return 1 if it is possible to return using a single instruction.
3888 If SIBLING is non-null, this is a test for a return before a sibling
3889 call. SIBLING is the call insn, so we can examine its register usage. */
3892 use_return_insn (int iscond, rtx sibling)
3894 int regno;
3895 unsigned int func_type;
3896 unsigned long saved_int_regs;
3897 unsigned HOST_WIDE_INT stack_adjust;
3898 arm_stack_offsets *offsets;
3900 /* Never use a return instruction before reload has run. */
3901 if (!reload_completed)
3902 return 0;
3904 func_type = arm_current_func_type ();
3906 /* Naked, volatile and stack alignment functions need special
3907 consideration. */
3908 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3909 return 0;
3911 /* So do interrupt functions that use the frame pointer and Thumb
3912 interrupt functions. */
3913 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3914 return 0;
3916 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3917 && !optimize_function_for_size_p (cfun))
3918 return 0;
3920 offsets = arm_get_frame_offsets ();
3921 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3923 /* As do variadic functions. */
3924 if (crtl->args.pretend_args_size
3925 || cfun->machine->uses_anonymous_args
3926 /* Or if the function calls __builtin_eh_return () */
3927 || crtl->calls_eh_return
3928 /* Or if the function calls alloca */
3929 || cfun->calls_alloca
3930 /* Or if there is a stack adjustment. However, if the stack pointer
3931 is saved on the stack, we can use a pre-incrementing stack load. */
3932 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3933 && stack_adjust == 4))
3934 /* Or if the static chain register was saved above the frame, under the
3935 assumption that the stack pointer isn't saved on the stack. */
3936 || (!(TARGET_APCS_FRAME && frame_pointer_needed)
3937 && arm_compute_static_chain_stack_bytes() != 0))
3938 return 0;
3940 saved_int_regs = offsets->saved_regs_mask;
3942 /* Unfortunately, the insn
3944 ldmib sp, {..., sp, ...}
3946 triggers a bug on most SA-110 based devices, such that the stack
3947 pointer won't be correctly restored if the instruction takes a
3948 page fault. We work around this problem by popping r3 along with
3949 the other registers, since that is never slower than executing
3950 another instruction.
3952 We test for !arm_arch5 here, because code for any architecture
3953 less than this could potentially be run on one of the buggy
3954 chips. */
3955 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3957 /* Validate that r3 is a call-clobbered register (always true in
3958 the default abi) ... */
3959 if (!call_used_regs[3])
3960 return 0;
3962 /* ... that it isn't being used for a return value ... */
3963 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
3964 return 0;
3966 /* ... or for a tail-call argument ... */
3967 if (sibling)
3969 gcc_assert (CALL_P (sibling));
3971 if (find_regno_fusage (sibling, USE, 3))
3972 return 0;
3975 /* ... and that there are no call-saved registers in r0-r2
3976 (always true in the default ABI). */
3977 if (saved_int_regs & 0x7)
3978 return 0;
3981 /* Can't be done if interworking with Thumb, and any registers have been
3982 stacked. */
3983 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
3984 return 0;
3986 /* On StrongARM, conditional returns are expensive if they aren't
3987 taken and multiple registers have been stacked. */
3988 if (iscond && arm_tune_strongarm)
3990 /* Conditional return when just the LR is stored is a simple
3991 conditional-load instruction, that's not expensive. */
3992 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
3993 return 0;
3995 if (flag_pic
3996 && arm_pic_register != INVALID_REGNUM
3997 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
3998 return 0;
4001 /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4002 several instructions if anything needs to be popped. */
4003 if (saved_int_regs && IS_CMSE_ENTRY (func_type))
4004 return 0;
4006 /* If there are saved registers but the LR isn't saved, then we need
4007 two instructions for the return. */
4008 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
4009 return 0;
4011 /* Can't be done if any of the VFP regs are pushed,
4012 since this also requires an insn. */
4013 if (TARGET_HARD_FLOAT)
4014 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
4015 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
4016 return 0;
4018 if (TARGET_REALLY_IWMMXT)
4019 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
4020 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
4021 return 0;
4023 return 1;
4026 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4027 shrink-wrapping if possible. This is the case if we need to emit a
4028 prologue, which we can test by looking at the offsets. */
4029 bool
4030 use_simple_return_p (void)
4032 arm_stack_offsets *offsets;
4034 offsets = arm_get_frame_offsets ();
4035 return offsets->outgoing_args != 0;
4038 /* Return TRUE if int I is a valid immediate ARM constant. */
4041 const_ok_for_arm (HOST_WIDE_INT i)
4043 int lowbit;
4045 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4046 be all zero, or all one. */
4047 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
4048 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
4049 != ((~(unsigned HOST_WIDE_INT) 0)
4050 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
4051 return FALSE;
4053 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
4055 /* Fast return for 0 and small values. We must do this for zero, since
4056 the code below can't handle that one case. */
4057 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
4058 return TRUE;
4060 /* Get the number of trailing zeros. */
4061 lowbit = ffs((int) i) - 1;
4063 /* Only even shifts are allowed in ARM mode so round down to the
4064 nearest even number. */
4065 if (TARGET_ARM)
4066 lowbit &= ~1;
4068 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
4069 return TRUE;
4071 if (TARGET_ARM)
4073 /* Allow rotated constants in ARM mode. */
4074 if (lowbit <= 4
4075 && ((i & ~0xc000003f) == 0
4076 || (i & ~0xf000000f) == 0
4077 || (i & ~0xfc000003) == 0))
4078 return TRUE;
4080 else if (TARGET_THUMB2)
4082 HOST_WIDE_INT v;
4084 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
4085 v = i & 0xff;
4086 v |= v << 16;
4087 if (i == v || i == (v | (v << 8)))
4088 return TRUE;
4090 /* Allow repeated pattern 0xXY00XY00. */
4091 v = i & 0xff00;
4092 v |= v << 16;
4093 if (i == v)
4094 return TRUE;
4096 else if (TARGET_HAVE_MOVT)
4098 /* Thumb-1 Targets with MOVT. */
4099 if (i > 0xffff)
4100 return FALSE;
4101 else
4102 return TRUE;
4105 return FALSE;
4108 /* Return true if I is a valid constant for the operation CODE. */
4110 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
4112 if (const_ok_for_arm (i))
4113 return 1;
4115 switch (code)
4117 case SET:
4118 /* See if we can use movw. */
4119 if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
4120 return 1;
4121 else
4122 /* Otherwise, try mvn. */
4123 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4125 case PLUS:
4126 /* See if we can use addw or subw. */
4127 if (TARGET_THUMB2
4128 && ((i & 0xfffff000) == 0
4129 || ((-i) & 0xfffff000) == 0))
4130 return 1;
4131 /* Fall through. */
4132 case COMPARE:
4133 case EQ:
4134 case NE:
4135 case GT:
4136 case LE:
4137 case LT:
4138 case GE:
4139 case GEU:
4140 case LTU:
4141 case GTU:
4142 case LEU:
4143 case UNORDERED:
4144 case ORDERED:
4145 case UNEQ:
4146 case UNGE:
4147 case UNLT:
4148 case UNGT:
4149 case UNLE:
4150 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
4152 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
4153 case XOR:
4154 return 0;
4156 case IOR:
4157 if (TARGET_THUMB2)
4158 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4159 return 0;
4161 case AND:
4162 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4164 default:
4165 gcc_unreachable ();
4169 /* Return true if I is a valid di mode constant for the operation CODE. */
4171 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4173 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4174 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4175 rtx hi = GEN_INT (hi_val);
4176 rtx lo = GEN_INT (lo_val);
4178 if (TARGET_THUMB1)
4179 return 0;
4181 switch (code)
4183 case AND:
4184 case IOR:
4185 case XOR:
4186 return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
4187 && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
4188 case PLUS:
4189 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4191 default:
4192 return 0;
4196 /* Emit a sequence of insns to handle a large constant.
4197 CODE is the code of the operation required, it can be any of SET, PLUS,
4198 IOR, AND, XOR, MINUS;
4199 MODE is the mode in which the operation is being performed;
4200 VAL is the integer to operate on;
4201 SOURCE is the other operand (a register, or a null-pointer for SET);
4202 SUBTARGETS means it is safe to create scratch registers if that will
4203 either produce a simpler sequence, or we will want to cse the values.
4204 Return value is the number of insns emitted. */
4206 /* ??? Tweak this for thumb2. */
4208 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4209 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4211 rtx cond;
4213 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4214 cond = COND_EXEC_TEST (PATTERN (insn));
4215 else
4216 cond = NULL_RTX;
4218 if (subtargets || code == SET
4219 || (REG_P (target) && REG_P (source)
4220 && REGNO (target) != REGNO (source)))
4222 /* After arm_reorg has been called, we can't fix up expensive
4223 constants by pushing them into memory so we must synthesize
4224 them in-line, regardless of the cost. This is only likely to
4225 be more costly on chips that have load delay slots and we are
4226 compiling without running the scheduler (so no splitting
4227 occurred before the final instruction emission).
4229 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4231 if (!cfun->machine->after_arm_reorg
4232 && !cond
4233 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4234 1, 0)
4235 > (arm_constant_limit (optimize_function_for_size_p (cfun))
4236 + (code != SET))))
4238 if (code == SET)
4240 /* Currently SET is the only monadic value for CODE, all
4241 the rest are diadic. */
4242 if (TARGET_USE_MOVT)
4243 arm_emit_movpair (target, GEN_INT (val));
4244 else
4245 emit_set_insn (target, GEN_INT (val));
4247 return 1;
4249 else
4251 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4253 if (TARGET_USE_MOVT)
4254 arm_emit_movpair (temp, GEN_INT (val));
4255 else
4256 emit_set_insn (temp, GEN_INT (val));
4258 /* For MINUS, the value is subtracted from, since we never
4259 have subtraction of a constant. */
4260 if (code == MINUS)
4261 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4262 else
4263 emit_set_insn (target,
4264 gen_rtx_fmt_ee (code, mode, source, temp));
4265 return 2;
4270 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4274 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4275 ARM/THUMB2 immediates, and add up to VAL.
4276 Thr function return value gives the number of insns required. */
4277 static int
4278 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4279 struct four_ints *return_sequence)
4281 int best_consecutive_zeros = 0;
4282 int i;
4283 int best_start = 0;
4284 int insns1, insns2;
4285 struct four_ints tmp_sequence;
4287 /* If we aren't targeting ARM, the best place to start is always at
4288 the bottom, otherwise look more closely. */
4289 if (TARGET_ARM)
4291 for (i = 0; i < 32; i += 2)
4293 int consecutive_zeros = 0;
4295 if (!(val & (3 << i)))
4297 while ((i < 32) && !(val & (3 << i)))
4299 consecutive_zeros += 2;
4300 i += 2;
4302 if (consecutive_zeros > best_consecutive_zeros)
4304 best_consecutive_zeros = consecutive_zeros;
4305 best_start = i - consecutive_zeros;
4307 i -= 2;
4312 /* So long as it won't require any more insns to do so, it's
4313 desirable to emit a small constant (in bits 0...9) in the last
4314 insn. This way there is more chance that it can be combined with
4315 a later addressing insn to form a pre-indexed load or store
4316 operation. Consider:
4318 *((volatile int *)0xe0000100) = 1;
4319 *((volatile int *)0xe0000110) = 2;
4321 We want this to wind up as:
4323 mov rA, #0xe0000000
4324 mov rB, #1
4325 str rB, [rA, #0x100]
4326 mov rB, #2
4327 str rB, [rA, #0x110]
4329 rather than having to synthesize both large constants from scratch.
4331 Therefore, we calculate how many insns would be required to emit
4332 the constant starting from `best_start', and also starting from
4333 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4334 yield a shorter sequence, we may as well use zero. */
4335 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4336 if (best_start != 0
4337 && ((HOST_WIDE_INT_1U << best_start) < val))
4339 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4340 if (insns2 <= insns1)
4342 *return_sequence = tmp_sequence;
4343 insns1 = insns2;
4347 return insns1;
4350 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4351 static int
4352 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4353 struct four_ints *return_sequence, int i)
4355 int remainder = val & 0xffffffff;
4356 int insns = 0;
4358 /* Try and find a way of doing the job in either two or three
4359 instructions.
4361 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4362 location. We start at position I. This may be the MSB, or
4363 optimial_immediate_sequence may have positioned it at the largest block
4364 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4365 wrapping around to the top of the word when we drop off the bottom.
4366 In the worst case this code should produce no more than four insns.
4368 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4369 constants, shifted to any arbitrary location. We should always start
4370 at the MSB. */
4373 int end;
4374 unsigned int b1, b2, b3, b4;
4375 unsigned HOST_WIDE_INT result;
4376 int loc;
4378 gcc_assert (insns < 4);
4380 if (i <= 0)
4381 i += 32;
4383 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4384 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4386 loc = i;
4387 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4388 /* We can use addw/subw for the last 12 bits. */
4389 result = remainder;
4390 else
4392 /* Use an 8-bit shifted/rotated immediate. */
4393 end = i - 8;
4394 if (end < 0)
4395 end += 32;
4396 result = remainder & ((0x0ff << end)
4397 | ((i < end) ? (0xff >> (32 - end))
4398 : 0));
4399 i -= 8;
4402 else
4404 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4405 arbitrary shifts. */
4406 i -= TARGET_ARM ? 2 : 1;
4407 continue;
4410 /* Next, see if we can do a better job with a thumb2 replicated
4411 constant.
4413 We do it this way around to catch the cases like 0x01F001E0 where
4414 two 8-bit immediates would work, but a replicated constant would
4415 make it worse.
4417 TODO: 16-bit constants that don't clear all the bits, but still win.
4418 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4419 if (TARGET_THUMB2)
4421 b1 = (remainder & 0xff000000) >> 24;
4422 b2 = (remainder & 0x00ff0000) >> 16;
4423 b3 = (remainder & 0x0000ff00) >> 8;
4424 b4 = remainder & 0xff;
4426 if (loc > 24)
4428 /* The 8-bit immediate already found clears b1 (and maybe b2),
4429 but must leave b3 and b4 alone. */
4431 /* First try to find a 32-bit replicated constant that clears
4432 almost everything. We can assume that we can't do it in one,
4433 or else we wouldn't be here. */
4434 unsigned int tmp = b1 & b2 & b3 & b4;
4435 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4436 + (tmp << 24);
4437 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4438 + (tmp == b3) + (tmp == b4);
4439 if (tmp
4440 && (matching_bytes >= 3
4441 || (matching_bytes == 2
4442 && const_ok_for_op (remainder & ~tmp2, code))))
4444 /* At least 3 of the bytes match, and the fourth has at
4445 least as many bits set, or two of the bytes match
4446 and it will only require one more insn to finish. */
4447 result = tmp2;
4448 i = tmp != b1 ? 32
4449 : tmp != b2 ? 24
4450 : tmp != b3 ? 16
4451 : 8;
4454 /* Second, try to find a 16-bit replicated constant that can
4455 leave three of the bytes clear. If b2 or b4 is already
4456 zero, then we can. If the 8-bit from above would not
4457 clear b2 anyway, then we still win. */
4458 else if (b1 == b3 && (!b2 || !b4
4459 || (remainder & 0x00ff0000 & ~result)))
4461 result = remainder & 0xff00ff00;
4462 i = 24;
4465 else if (loc > 16)
4467 /* The 8-bit immediate already found clears b2 (and maybe b3)
4468 and we don't get here unless b1 is alredy clear, but it will
4469 leave b4 unchanged. */
4471 /* If we can clear b2 and b4 at once, then we win, since the
4472 8-bits couldn't possibly reach that far. */
4473 if (b2 == b4)
4475 result = remainder & 0x00ff00ff;
4476 i = 16;
4481 return_sequence->i[insns++] = result;
4482 remainder &= ~result;
4484 if (code == SET || code == MINUS)
4485 code = PLUS;
4487 while (remainder);
4489 return insns;
4492 /* Emit an instruction with the indicated PATTERN. If COND is
4493 non-NULL, conditionalize the execution of the instruction on COND
4494 being true. */
4496 static void
4497 emit_constant_insn (rtx cond, rtx pattern)
4499 if (cond)
4500 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4501 emit_insn (pattern);
4504 /* As above, but extra parameter GENERATE which, if clear, suppresses
4505 RTL generation. */
4507 static int
4508 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4509 unsigned HOST_WIDE_INT val, rtx target, rtx source,
4510 int subtargets, int generate)
4512 int can_invert = 0;
4513 int can_negate = 0;
4514 int final_invert = 0;
4515 int i;
4516 int set_sign_bit_copies = 0;
4517 int clear_sign_bit_copies = 0;
4518 int clear_zero_bit_copies = 0;
4519 int set_zero_bit_copies = 0;
4520 int insns = 0, neg_insns, inv_insns;
4521 unsigned HOST_WIDE_INT temp1, temp2;
4522 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4523 struct four_ints *immediates;
4524 struct four_ints pos_immediates, neg_immediates, inv_immediates;
4526 /* Find out which operations are safe for a given CODE. Also do a quick
4527 check for degenerate cases; these can occur when DImode operations
4528 are split. */
4529 switch (code)
4531 case SET:
4532 can_invert = 1;
4533 break;
4535 case PLUS:
4536 can_negate = 1;
4537 break;
4539 case IOR:
4540 if (remainder == 0xffffffff)
4542 if (generate)
4543 emit_constant_insn (cond,
4544 gen_rtx_SET (target,
4545 GEN_INT (ARM_SIGN_EXTEND (val))));
4546 return 1;
4549 if (remainder == 0)
4551 if (reload_completed && rtx_equal_p (target, source))
4552 return 0;
4554 if (generate)
4555 emit_constant_insn (cond, gen_rtx_SET (target, source));
4556 return 1;
4558 break;
4560 case AND:
4561 if (remainder == 0)
4563 if (generate)
4564 emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
4565 return 1;
4567 if (remainder == 0xffffffff)
4569 if (reload_completed && rtx_equal_p (target, source))
4570 return 0;
4571 if (generate)
4572 emit_constant_insn (cond, gen_rtx_SET (target, source));
4573 return 1;
4575 can_invert = 1;
4576 break;
4578 case XOR:
4579 if (remainder == 0)
4581 if (reload_completed && rtx_equal_p (target, source))
4582 return 0;
4583 if (generate)
4584 emit_constant_insn (cond, gen_rtx_SET (target, source));
4585 return 1;
4588 if (remainder == 0xffffffff)
4590 if (generate)
4591 emit_constant_insn (cond,
4592 gen_rtx_SET (target,
4593 gen_rtx_NOT (mode, source)));
4594 return 1;
4596 final_invert = 1;
4597 break;
4599 case MINUS:
4600 /* We treat MINUS as (val - source), since (source - val) is always
4601 passed as (source + (-val)). */
4602 if (remainder == 0)
4604 if (generate)
4605 emit_constant_insn (cond,
4606 gen_rtx_SET (target,
4607 gen_rtx_NEG (mode, source)));
4608 return 1;
4610 if (const_ok_for_arm (val))
4612 if (generate)
4613 emit_constant_insn (cond,
4614 gen_rtx_SET (target,
4615 gen_rtx_MINUS (mode, GEN_INT (val),
4616 source)));
4617 return 1;
4620 break;
4622 default:
4623 gcc_unreachable ();
4626 /* If we can do it in one insn get out quickly. */
4627 if (const_ok_for_op (val, code))
4629 if (generate)
4630 emit_constant_insn (cond,
4631 gen_rtx_SET (target,
4632 (source
4633 ? gen_rtx_fmt_ee (code, mode, source,
4634 GEN_INT (val))
4635 : GEN_INT (val))));
4636 return 1;
4639 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4640 insn. */
4641 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4642 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4644 if (generate)
4646 if (mode == SImode && i == 16)
4647 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4648 smaller insn. */
4649 emit_constant_insn (cond,
4650 gen_zero_extendhisi2
4651 (target, gen_lowpart (HImode, source)));
4652 else
4653 /* Extz only supports SImode, but we can coerce the operands
4654 into that mode. */
4655 emit_constant_insn (cond,
4656 gen_extzv_t2 (gen_lowpart (SImode, target),
4657 gen_lowpart (SImode, source),
4658 GEN_INT (i), const0_rtx));
4661 return 1;
4664 /* Calculate a few attributes that may be useful for specific
4665 optimizations. */
4666 /* Count number of leading zeros. */
4667 for (i = 31; i >= 0; i--)
4669 if ((remainder & (1 << i)) == 0)
4670 clear_sign_bit_copies++;
4671 else
4672 break;
4675 /* Count number of leading 1's. */
4676 for (i = 31; i >= 0; i--)
4678 if ((remainder & (1 << i)) != 0)
4679 set_sign_bit_copies++;
4680 else
4681 break;
4684 /* Count number of trailing zero's. */
4685 for (i = 0; i <= 31; i++)
4687 if ((remainder & (1 << i)) == 0)
4688 clear_zero_bit_copies++;
4689 else
4690 break;
4693 /* Count number of trailing 1's. */
4694 for (i = 0; i <= 31; i++)
4696 if ((remainder & (1 << i)) != 0)
4697 set_zero_bit_copies++;
4698 else
4699 break;
4702 switch (code)
4704 case SET:
4705 /* See if we can do this by sign_extending a constant that is known
4706 to be negative. This is a good, way of doing it, since the shift
4707 may well merge into a subsequent insn. */
4708 if (set_sign_bit_copies > 1)
4710 if (const_ok_for_arm
4711 (temp1 = ARM_SIGN_EXTEND (remainder
4712 << (set_sign_bit_copies - 1))))
4714 if (generate)
4716 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4717 emit_constant_insn (cond,
4718 gen_rtx_SET (new_src, GEN_INT (temp1)));
4719 emit_constant_insn (cond,
4720 gen_ashrsi3 (target, new_src,
4721 GEN_INT (set_sign_bit_copies - 1)));
4723 return 2;
4725 /* For an inverted constant, we will need to set the low bits,
4726 these will be shifted out of harm's way. */
4727 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4728 if (const_ok_for_arm (~temp1))
4730 if (generate)
4732 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4733 emit_constant_insn (cond,
4734 gen_rtx_SET (new_src, GEN_INT (temp1)));
4735 emit_constant_insn (cond,
4736 gen_ashrsi3 (target, new_src,
4737 GEN_INT (set_sign_bit_copies - 1)));
4739 return 2;
4743 /* See if we can calculate the value as the difference between two
4744 valid immediates. */
4745 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4747 int topshift = clear_sign_bit_copies & ~1;
4749 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4750 & (0xff000000 >> topshift));
4752 /* If temp1 is zero, then that means the 9 most significant
4753 bits of remainder were 1 and we've caused it to overflow.
4754 When topshift is 0 we don't need to do anything since we
4755 can borrow from 'bit 32'. */
4756 if (temp1 == 0 && topshift != 0)
4757 temp1 = 0x80000000 >> (topshift - 1);
4759 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4761 if (const_ok_for_arm (temp2))
4763 if (generate)
4765 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4766 emit_constant_insn (cond,
4767 gen_rtx_SET (new_src, GEN_INT (temp1)));
4768 emit_constant_insn (cond,
4769 gen_addsi3 (target, new_src,
4770 GEN_INT (-temp2)));
4773 return 2;
4777 /* See if we can generate this by setting the bottom (or the top)
4778 16 bits, and then shifting these into the other half of the
4779 word. We only look for the simplest cases, to do more would cost
4780 too much. Be careful, however, not to generate this when the
4781 alternative would take fewer insns. */
4782 if (val & 0xffff0000)
4784 temp1 = remainder & 0xffff0000;
4785 temp2 = remainder & 0x0000ffff;
4787 /* Overlaps outside this range are best done using other methods. */
4788 for (i = 9; i < 24; i++)
4790 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4791 && !const_ok_for_arm (temp2))
4793 rtx new_src = (subtargets
4794 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4795 : target);
4796 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4797 source, subtargets, generate);
4798 source = new_src;
4799 if (generate)
4800 emit_constant_insn
4801 (cond,
4802 gen_rtx_SET
4803 (target,
4804 gen_rtx_IOR (mode,
4805 gen_rtx_ASHIFT (mode, source,
4806 GEN_INT (i)),
4807 source)));
4808 return insns + 1;
4812 /* Don't duplicate cases already considered. */
4813 for (i = 17; i < 24; i++)
4815 if (((temp1 | (temp1 >> i)) == remainder)
4816 && !const_ok_for_arm (temp1))
4818 rtx new_src = (subtargets
4819 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4820 : target);
4821 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4822 source, subtargets, generate);
4823 source = new_src;
4824 if (generate)
4825 emit_constant_insn
4826 (cond,
4827 gen_rtx_SET (target,
4828 gen_rtx_IOR
4829 (mode,
4830 gen_rtx_LSHIFTRT (mode, source,
4831 GEN_INT (i)),
4832 source)));
4833 return insns + 1;
4837 break;
4839 case IOR:
4840 case XOR:
4841 /* If we have IOR or XOR, and the constant can be loaded in a
4842 single instruction, and we can find a temporary to put it in,
4843 then this can be done in two instructions instead of 3-4. */
4844 if (subtargets
4845 /* TARGET can't be NULL if SUBTARGETS is 0 */
4846 || (reload_completed && !reg_mentioned_p (target, source)))
4848 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4850 if (generate)
4852 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4854 emit_constant_insn (cond,
4855 gen_rtx_SET (sub, GEN_INT (val)));
4856 emit_constant_insn (cond,
4857 gen_rtx_SET (target,
4858 gen_rtx_fmt_ee (code, mode,
4859 source, sub)));
4861 return 2;
4865 if (code == XOR)
4866 break;
4868 /* Convert.
4869 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4870 and the remainder 0s for e.g. 0xfff00000)
4871 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4873 This can be done in 2 instructions by using shifts with mov or mvn.
4874 e.g. for
4875 x = x | 0xfff00000;
4876 we generate.
4877 mvn r0, r0, asl #12
4878 mvn r0, r0, lsr #12 */
4879 if (set_sign_bit_copies > 8
4880 && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
4882 if (generate)
4884 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4885 rtx shift = GEN_INT (set_sign_bit_copies);
4887 emit_constant_insn
4888 (cond,
4889 gen_rtx_SET (sub,
4890 gen_rtx_NOT (mode,
4891 gen_rtx_ASHIFT (mode,
4892 source,
4893 shift))));
4894 emit_constant_insn
4895 (cond,
4896 gen_rtx_SET (target,
4897 gen_rtx_NOT (mode,
4898 gen_rtx_LSHIFTRT (mode, sub,
4899 shift))));
4901 return 2;
4904 /* Convert
4905 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4907 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4909 For eg. r0 = r0 | 0xfff
4910 mvn r0, r0, lsr #12
4911 mvn r0, r0, asl #12
4914 if (set_zero_bit_copies > 8
4915 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4917 if (generate)
4919 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4920 rtx shift = GEN_INT (set_zero_bit_copies);
4922 emit_constant_insn
4923 (cond,
4924 gen_rtx_SET (sub,
4925 gen_rtx_NOT (mode,
4926 gen_rtx_LSHIFTRT (mode,
4927 source,
4928 shift))));
4929 emit_constant_insn
4930 (cond,
4931 gen_rtx_SET (target,
4932 gen_rtx_NOT (mode,
4933 gen_rtx_ASHIFT (mode, sub,
4934 shift))));
4936 return 2;
4939 /* This will never be reached for Thumb2 because orn is a valid
4940 instruction. This is for Thumb1 and the ARM 32 bit cases.
4942 x = y | constant (such that ~constant is a valid constant)
4943 Transform this to
4944 x = ~(~y & ~constant).
4946 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4948 if (generate)
4950 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4951 emit_constant_insn (cond,
4952 gen_rtx_SET (sub,
4953 gen_rtx_NOT (mode, source)));
4954 source = sub;
4955 if (subtargets)
4956 sub = gen_reg_rtx (mode);
4957 emit_constant_insn (cond,
4958 gen_rtx_SET (sub,
4959 gen_rtx_AND (mode, source,
4960 GEN_INT (temp1))));
4961 emit_constant_insn (cond,
4962 gen_rtx_SET (target,
4963 gen_rtx_NOT (mode, sub)));
4965 return 3;
4967 break;
4969 case AND:
4970 /* See if two shifts will do 2 or more insn's worth of work. */
4971 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
4973 HOST_WIDE_INT shift_mask = ((0xffffffff
4974 << (32 - clear_sign_bit_copies))
4975 & 0xffffffff);
4977 if ((remainder | shift_mask) != 0xffffffff)
4979 HOST_WIDE_INT new_val
4980 = ARM_SIGN_EXTEND (remainder | shift_mask);
4982 if (generate)
4984 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4985 insns = arm_gen_constant (AND, SImode, cond, new_val,
4986 new_src, source, subtargets, 1);
4987 source = new_src;
4989 else
4991 rtx targ = subtargets ? NULL_RTX : target;
4992 insns = arm_gen_constant (AND, mode, cond, new_val,
4993 targ, source, subtargets, 0);
4997 if (generate)
4999 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5000 rtx shift = GEN_INT (clear_sign_bit_copies);
5002 emit_insn (gen_ashlsi3 (new_src, source, shift));
5003 emit_insn (gen_lshrsi3 (target, new_src, shift));
5006 return insns + 2;
5009 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
5011 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
5013 if ((remainder | shift_mask) != 0xffffffff)
5015 HOST_WIDE_INT new_val
5016 = ARM_SIGN_EXTEND (remainder | shift_mask);
5017 if (generate)
5019 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5021 insns = arm_gen_constant (AND, mode, cond, new_val,
5022 new_src, source, subtargets, 1);
5023 source = new_src;
5025 else
5027 rtx targ = subtargets ? NULL_RTX : target;
5029 insns = arm_gen_constant (AND, mode, cond, new_val,
5030 targ, source, subtargets, 0);
5034 if (generate)
5036 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5037 rtx shift = GEN_INT (clear_zero_bit_copies);
5039 emit_insn (gen_lshrsi3 (new_src, source, shift));
5040 emit_insn (gen_ashlsi3 (target, new_src, shift));
5043 return insns + 2;
5046 break;
5048 default:
5049 break;
5052 /* Calculate what the instruction sequences would be if we generated it
5053 normally, negated, or inverted. */
5054 if (code == AND)
5055 /* AND cannot be split into multiple insns, so invert and use BIC. */
5056 insns = 99;
5057 else
5058 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
5060 if (can_negate)
5061 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
5062 &neg_immediates);
5063 else
5064 neg_insns = 99;
5066 if (can_invert || final_invert)
5067 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
5068 &inv_immediates);
5069 else
5070 inv_insns = 99;
5072 immediates = &pos_immediates;
5074 /* Is the negated immediate sequence more efficient? */
5075 if (neg_insns < insns && neg_insns <= inv_insns)
5077 insns = neg_insns;
5078 immediates = &neg_immediates;
5080 else
5081 can_negate = 0;
5083 /* Is the inverted immediate sequence more efficient?
5084 We must allow for an extra NOT instruction for XOR operations, although
5085 there is some chance that the final 'mvn' will get optimized later. */
5086 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
5088 insns = inv_insns;
5089 immediates = &inv_immediates;
5091 else
5093 can_invert = 0;
5094 final_invert = 0;
5097 /* Now output the chosen sequence as instructions. */
5098 if (generate)
5100 for (i = 0; i < insns; i++)
5102 rtx new_src, temp1_rtx;
5104 temp1 = immediates->i[i];
5106 if (code == SET || code == MINUS)
5107 new_src = (subtargets ? gen_reg_rtx (mode) : target);
5108 else if ((final_invert || i < (insns - 1)) && subtargets)
5109 new_src = gen_reg_rtx (mode);
5110 else
5111 new_src = target;
5113 if (can_invert)
5114 temp1 = ~temp1;
5115 else if (can_negate)
5116 temp1 = -temp1;
5118 temp1 = trunc_int_for_mode (temp1, mode);
5119 temp1_rtx = GEN_INT (temp1);
5121 if (code == SET)
5123 else if (code == MINUS)
5124 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
5125 else
5126 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
5128 emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
5129 source = new_src;
5131 if (code == SET)
5133 can_negate = can_invert;
5134 can_invert = 0;
5135 code = PLUS;
5137 else if (code == MINUS)
5138 code = PLUS;
5142 if (final_invert)
5144 if (generate)
5145 emit_constant_insn (cond, gen_rtx_SET (target,
5146 gen_rtx_NOT (mode, source)));
5147 insns++;
5150 return insns;
5153 /* Canonicalize a comparison so that we are more likely to recognize it.
5154 This can be done for a few constant compares, where we can make the
5155 immediate value easier to load. */
5157 static void
5158 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5159 bool op0_preserve_value)
5161 machine_mode mode;
5162 unsigned HOST_WIDE_INT i, maxval;
5164 mode = GET_MODE (*op0);
5165 if (mode == VOIDmode)
5166 mode = GET_MODE (*op1);
5168 maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5170 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
5171 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
5172 reversed or (for constant OP1) adjusted to GE/LT. Similarly
5173 for GTU/LEU in Thumb mode. */
5174 if (mode == DImode)
5177 if (*code == GT || *code == LE
5178 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
5180 /* Missing comparison. First try to use an available
5181 comparison. */
5182 if (CONST_INT_P (*op1))
5184 i = INTVAL (*op1);
5185 switch (*code)
5187 case GT:
5188 case LE:
5189 if (i != maxval
5190 && arm_const_double_by_immediates (GEN_INT (i + 1)))
5192 *op1 = GEN_INT (i + 1);
5193 *code = *code == GT ? GE : LT;
5194 return;
5196 break;
5197 case GTU:
5198 case LEU:
5199 if (i != ~((unsigned HOST_WIDE_INT) 0)
5200 && arm_const_double_by_immediates (GEN_INT (i + 1)))
5202 *op1 = GEN_INT (i + 1);
5203 *code = *code == GTU ? GEU : LTU;
5204 return;
5206 break;
5207 default:
5208 gcc_unreachable ();
5212 /* If that did not work, reverse the condition. */
5213 if (!op0_preserve_value)
5215 std::swap (*op0, *op1);
5216 *code = (int)swap_condition ((enum rtx_code)*code);
5219 return;
5222 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5223 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5224 to facilitate possible combining with a cmp into 'ands'. */
5225 if (mode == SImode
5226 && GET_CODE (*op0) == ZERO_EXTEND
5227 && GET_CODE (XEXP (*op0, 0)) == SUBREG
5228 && GET_MODE (XEXP (*op0, 0)) == QImode
5229 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5230 && subreg_lowpart_p (XEXP (*op0, 0))
5231 && *op1 == const0_rtx)
5232 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5233 GEN_INT (255));
5235 /* Comparisons smaller than DImode. Only adjust comparisons against
5236 an out-of-range constant. */
5237 if (!CONST_INT_P (*op1)
5238 || const_ok_for_arm (INTVAL (*op1))
5239 || const_ok_for_arm (- INTVAL (*op1)))
5240 return;
5242 i = INTVAL (*op1);
5244 switch (*code)
5246 case EQ:
5247 case NE:
5248 return;
5250 case GT:
5251 case LE:
5252 if (i != maxval
5253 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5255 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5256 *code = *code == GT ? GE : LT;
5257 return;
5259 break;
5261 case GE:
5262 case LT:
5263 if (i != ~maxval
5264 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5266 *op1 = GEN_INT (i - 1);
5267 *code = *code == GE ? GT : LE;
5268 return;
5270 break;
5272 case GTU:
5273 case LEU:
5274 if (i != ~((unsigned HOST_WIDE_INT) 0)
5275 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5277 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5278 *code = *code == GTU ? GEU : LTU;
5279 return;
5281 break;
5283 case GEU:
5284 case LTU:
5285 if (i != 0
5286 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5288 *op1 = GEN_INT (i - 1);
5289 *code = *code == GEU ? GTU : LEU;
5290 return;
5292 break;
5294 default:
5295 gcc_unreachable ();
5300 /* Define how to find the value returned by a function. */
5302 static rtx
5303 arm_function_value(const_tree type, const_tree func,
5304 bool outgoing ATTRIBUTE_UNUSED)
5306 machine_mode mode;
5307 int unsignedp ATTRIBUTE_UNUSED;
5308 rtx r ATTRIBUTE_UNUSED;
5310 mode = TYPE_MODE (type);
5312 if (TARGET_AAPCS_BASED)
5313 return aapcs_allocate_return_reg (mode, type, func);
5315 /* Promote integer types. */
5316 if (INTEGRAL_TYPE_P (type))
5317 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5319 /* Promotes small structs returned in a register to full-word size
5320 for big-endian AAPCS. */
5321 if (arm_return_in_msb (type))
5323 HOST_WIDE_INT size = int_size_in_bytes (type);
5324 if (size % UNITS_PER_WORD != 0)
5326 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5327 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5331 return arm_libcall_value_1 (mode);
5334 /* libcall hashtable helpers. */
5336 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5338 static inline hashval_t hash (const rtx_def *);
5339 static inline bool equal (const rtx_def *, const rtx_def *);
5340 static inline void remove (rtx_def *);
5343 inline bool
5344 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5346 return rtx_equal_p (p1, p2);
5349 inline hashval_t
5350 libcall_hasher::hash (const rtx_def *p1)
5352 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5355 typedef hash_table<libcall_hasher> libcall_table_type;
5357 static void
5358 add_libcall (libcall_table_type *htab, rtx libcall)
5360 *htab->find_slot (libcall, INSERT) = libcall;
5363 static bool
5364 arm_libcall_uses_aapcs_base (const_rtx libcall)
5366 static bool init_done = false;
5367 static libcall_table_type *libcall_htab = NULL;
5369 if (!init_done)
5371 init_done = true;
5373 libcall_htab = new libcall_table_type (31);
5374 add_libcall (libcall_htab,
5375 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5376 add_libcall (libcall_htab,
5377 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5378 add_libcall (libcall_htab,
5379 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5380 add_libcall (libcall_htab,
5381 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5383 add_libcall (libcall_htab,
5384 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5385 add_libcall (libcall_htab,
5386 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5387 add_libcall (libcall_htab,
5388 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5389 add_libcall (libcall_htab,
5390 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5392 add_libcall (libcall_htab,
5393 convert_optab_libfunc (sext_optab, SFmode, HFmode));
5394 add_libcall (libcall_htab,
5395 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5396 add_libcall (libcall_htab,
5397 convert_optab_libfunc (sfix_optab, SImode, DFmode));
5398 add_libcall (libcall_htab,
5399 convert_optab_libfunc (ufix_optab, SImode, DFmode));
5400 add_libcall (libcall_htab,
5401 convert_optab_libfunc (sfix_optab, DImode, DFmode));
5402 add_libcall (libcall_htab,
5403 convert_optab_libfunc (ufix_optab, DImode, DFmode));
5404 add_libcall (libcall_htab,
5405 convert_optab_libfunc (sfix_optab, DImode, SFmode));
5406 add_libcall (libcall_htab,
5407 convert_optab_libfunc (ufix_optab, DImode, SFmode));
5409 /* Values from double-precision helper functions are returned in core
5410 registers if the selected core only supports single-precision
5411 arithmetic, even if we are using the hard-float ABI. The same is
5412 true for single-precision helpers, but we will never be using the
5413 hard-float ABI on a CPU which doesn't support single-precision
5414 operations in hardware. */
5415 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5416 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5417 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5418 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5419 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5420 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5421 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5422 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5423 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5424 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5425 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5426 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5427 SFmode));
5428 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5429 DFmode));
5430 add_libcall (libcall_htab,
5431 convert_optab_libfunc (trunc_optab, HFmode, DFmode));
5434 return libcall && libcall_htab->find (libcall) != NULL;
5437 static rtx
5438 arm_libcall_value_1 (machine_mode mode)
5440 if (TARGET_AAPCS_BASED)
5441 return aapcs_libcall_value (mode);
5442 else if (TARGET_IWMMXT_ABI
5443 && arm_vector_mode_supported_p (mode))
5444 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5445 else
5446 return gen_rtx_REG (mode, ARG_REGISTER (1));
5449 /* Define how to find the value returned by a library function
5450 assuming the value has mode MODE. */
5452 static rtx
5453 arm_libcall_value (machine_mode mode, const_rtx libcall)
5455 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5456 && GET_MODE_CLASS (mode) == MODE_FLOAT)
5458 /* The following libcalls return their result in integer registers,
5459 even though they return a floating point value. */
5460 if (arm_libcall_uses_aapcs_base (libcall))
5461 return gen_rtx_REG (mode, ARG_REGISTER(1));
5465 return arm_libcall_value_1 (mode);
5468 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5470 static bool
5471 arm_function_value_regno_p (const unsigned int regno)
5473 if (regno == ARG_REGISTER (1)
5474 || (TARGET_32BIT
5475 && TARGET_AAPCS_BASED
5476 && TARGET_HARD_FLOAT
5477 && regno == FIRST_VFP_REGNUM)
5478 || (TARGET_IWMMXT_ABI
5479 && regno == FIRST_IWMMXT_REGNUM))
5480 return true;
5482 return false;
5485 /* Determine the amount of memory needed to store the possible return
5486 registers of an untyped call. */
5488 arm_apply_result_size (void)
5490 int size = 16;
5492 if (TARGET_32BIT)
5494 if (TARGET_HARD_FLOAT_ABI)
5495 size += 32;
5496 if (TARGET_IWMMXT_ABI)
5497 size += 8;
5500 return size;
5503 /* Decide whether TYPE should be returned in memory (true)
5504 or in a register (false). FNTYPE is the type of the function making
5505 the call. */
5506 static bool
5507 arm_return_in_memory (const_tree type, const_tree fntype)
5509 HOST_WIDE_INT size;
5511 size = int_size_in_bytes (type); /* Negative if not fixed size. */
5513 if (TARGET_AAPCS_BASED)
5515 /* Simple, non-aggregate types (ie not including vectors and
5516 complex) are always returned in a register (or registers).
5517 We don't care about which register here, so we can short-cut
5518 some of the detail. */
5519 if (!AGGREGATE_TYPE_P (type)
5520 && TREE_CODE (type) != VECTOR_TYPE
5521 && TREE_CODE (type) != COMPLEX_TYPE)
5522 return false;
5524 /* Any return value that is no larger than one word can be
5525 returned in r0. */
5526 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5527 return false;
5529 /* Check any available co-processors to see if they accept the
5530 type as a register candidate (VFP, for example, can return
5531 some aggregates in consecutive registers). These aren't
5532 available if the call is variadic. */
5533 if (aapcs_select_return_coproc (type, fntype) >= 0)
5534 return false;
5536 /* Vector values should be returned using ARM registers, not
5537 memory (unless they're over 16 bytes, which will break since
5538 we only have four call-clobbered registers to play with). */
5539 if (TREE_CODE (type) == VECTOR_TYPE)
5540 return (size < 0 || size > (4 * UNITS_PER_WORD));
5542 /* The rest go in memory. */
5543 return true;
5546 if (TREE_CODE (type) == VECTOR_TYPE)
5547 return (size < 0 || size > (4 * UNITS_PER_WORD));
5549 if (!AGGREGATE_TYPE_P (type) &&
5550 (TREE_CODE (type) != VECTOR_TYPE))
5551 /* All simple types are returned in registers. */
5552 return false;
5554 if (arm_abi != ARM_ABI_APCS)
5556 /* ATPCS and later return aggregate types in memory only if they are
5557 larger than a word (or are variable size). */
5558 return (size < 0 || size > UNITS_PER_WORD);
5561 /* For the arm-wince targets we choose to be compatible with Microsoft's
5562 ARM and Thumb compilers, which always return aggregates in memory. */
5563 #ifndef ARM_WINCE
5564 /* All structures/unions bigger than one word are returned in memory.
5565 Also catch the case where int_size_in_bytes returns -1. In this case
5566 the aggregate is either huge or of variable size, and in either case
5567 we will want to return it via memory and not in a register. */
5568 if (size < 0 || size > UNITS_PER_WORD)
5569 return true;
5571 if (TREE_CODE (type) == RECORD_TYPE)
5573 tree field;
5575 /* For a struct the APCS says that we only return in a register
5576 if the type is 'integer like' and every addressable element
5577 has an offset of zero. For practical purposes this means
5578 that the structure can have at most one non bit-field element
5579 and that this element must be the first one in the structure. */
5581 /* Find the first field, ignoring non FIELD_DECL things which will
5582 have been created by C++. */
5583 for (field = TYPE_FIELDS (type);
5584 field && TREE_CODE (field) != FIELD_DECL;
5585 field = DECL_CHAIN (field))
5586 continue;
5588 if (field == NULL)
5589 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5591 /* Check that the first field is valid for returning in a register. */
5593 /* ... Floats are not allowed */
5594 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5595 return true;
5597 /* ... Aggregates that are not themselves valid for returning in
5598 a register are not allowed. */
5599 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5600 return true;
5602 /* Now check the remaining fields, if any. Only bitfields are allowed,
5603 since they are not addressable. */
5604 for (field = DECL_CHAIN (field);
5605 field;
5606 field = DECL_CHAIN (field))
5608 if (TREE_CODE (field) != FIELD_DECL)
5609 continue;
5611 if (!DECL_BIT_FIELD_TYPE (field))
5612 return true;
5615 return false;
5618 if (TREE_CODE (type) == UNION_TYPE)
5620 tree field;
5622 /* Unions can be returned in registers if every element is
5623 integral, or can be returned in an integer register. */
5624 for (field = TYPE_FIELDS (type);
5625 field;
5626 field = DECL_CHAIN (field))
5628 if (TREE_CODE (field) != FIELD_DECL)
5629 continue;
5631 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5632 return true;
5634 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5635 return true;
5638 return false;
5640 #endif /* not ARM_WINCE */
5642 /* Return all other types in memory. */
5643 return true;
5646 const struct pcs_attribute_arg
5648 const char *arg;
5649 enum arm_pcs value;
5650 } pcs_attribute_args[] =
5652 {"aapcs", ARM_PCS_AAPCS},
5653 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5654 #if 0
5655 /* We could recognize these, but changes would be needed elsewhere
5656 * to implement them. */
5657 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5658 {"atpcs", ARM_PCS_ATPCS},
5659 {"apcs", ARM_PCS_APCS},
5660 #endif
5661 {NULL, ARM_PCS_UNKNOWN}
5664 static enum arm_pcs
5665 arm_pcs_from_attribute (tree attr)
5667 const struct pcs_attribute_arg *ptr;
5668 const char *arg;
5670 /* Get the value of the argument. */
5671 if (TREE_VALUE (attr) == NULL_TREE
5672 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5673 return ARM_PCS_UNKNOWN;
5675 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5677 /* Check it against the list of known arguments. */
5678 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5679 if (streq (arg, ptr->arg))
5680 return ptr->value;
5682 /* An unrecognized interrupt type. */
5683 return ARM_PCS_UNKNOWN;
5686 /* Get the PCS variant to use for this call. TYPE is the function's type
5687 specification, DECL is the specific declartion. DECL may be null if
5688 the call could be indirect or if this is a library call. */
5689 static enum arm_pcs
5690 arm_get_pcs_model (const_tree type, const_tree decl)
5692 bool user_convention = false;
5693 enum arm_pcs user_pcs = arm_pcs_default;
5694 tree attr;
5696 gcc_assert (type);
5698 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5699 if (attr)
5701 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5702 user_convention = true;
5705 if (TARGET_AAPCS_BASED)
5707 /* Detect varargs functions. These always use the base rules
5708 (no argument is ever a candidate for a co-processor
5709 register). */
5710 bool base_rules = stdarg_p (type);
5712 if (user_convention)
5714 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5715 sorry ("non-AAPCS derived PCS variant");
5716 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5717 error ("variadic functions must use the base AAPCS variant");
5720 if (base_rules)
5721 return ARM_PCS_AAPCS;
5722 else if (user_convention)
5723 return user_pcs;
5724 else if (decl && flag_unit_at_a_time)
5726 /* Local functions never leak outside this compilation unit,
5727 so we are free to use whatever conventions are
5728 appropriate. */
5729 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5730 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5731 if (i && i->local)
5732 return ARM_PCS_AAPCS_LOCAL;
5735 else if (user_convention && user_pcs != arm_pcs_default)
5736 sorry ("PCS variant");
5738 /* For everything else we use the target's default. */
5739 return arm_pcs_default;
5743 static void
5744 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5745 const_tree fntype ATTRIBUTE_UNUSED,
5746 rtx libcall ATTRIBUTE_UNUSED,
5747 const_tree fndecl ATTRIBUTE_UNUSED)
5749 /* Record the unallocated VFP registers. */
5750 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5751 pcum->aapcs_vfp_reg_alloc = 0;
5754 /* Walk down the type tree of TYPE counting consecutive base elements.
5755 If *MODEP is VOIDmode, then set it to the first valid floating point
5756 type. If a non-floating point type is found, or if a floating point
5757 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5758 otherwise return the count in the sub-tree. */
5759 static int
5760 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5762 machine_mode mode;
5763 HOST_WIDE_INT size;
5765 switch (TREE_CODE (type))
5767 case REAL_TYPE:
5768 mode = TYPE_MODE (type);
5769 if (mode != DFmode && mode != SFmode && mode != HFmode)
5770 return -1;
5772 if (*modep == VOIDmode)
5773 *modep = mode;
5775 if (*modep == mode)
5776 return 1;
5778 break;
5780 case COMPLEX_TYPE:
5781 mode = TYPE_MODE (TREE_TYPE (type));
5782 if (mode != DFmode && mode != SFmode)
5783 return -1;
5785 if (*modep == VOIDmode)
5786 *modep = mode;
5788 if (*modep == mode)
5789 return 2;
5791 break;
5793 case VECTOR_TYPE:
5794 /* Use V2SImode and V4SImode as representatives of all 64-bit
5795 and 128-bit vector types, whether or not those modes are
5796 supported with the present options. */
5797 size = int_size_in_bytes (type);
5798 switch (size)
5800 case 8:
5801 mode = V2SImode;
5802 break;
5803 case 16:
5804 mode = V4SImode;
5805 break;
5806 default:
5807 return -1;
5810 if (*modep == VOIDmode)
5811 *modep = mode;
5813 /* Vector modes are considered to be opaque: two vectors are
5814 equivalent for the purposes of being homogeneous aggregates
5815 if they are the same size. */
5816 if (*modep == mode)
5817 return 1;
5819 break;
5821 case ARRAY_TYPE:
5823 int count;
5824 tree index = TYPE_DOMAIN (type);
5826 /* Can't handle incomplete types nor sizes that are not
5827 fixed. */
5828 if (!COMPLETE_TYPE_P (type)
5829 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5830 return -1;
5832 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5833 if (count == -1
5834 || !index
5835 || !TYPE_MAX_VALUE (index)
5836 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5837 || !TYPE_MIN_VALUE (index)
5838 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5839 || count < 0)
5840 return -1;
5842 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5843 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5845 /* There must be no padding. */
5846 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5847 return -1;
5849 return count;
5852 case RECORD_TYPE:
5854 int count = 0;
5855 int sub_count;
5856 tree field;
5858 /* Can't handle incomplete types nor sizes that are not
5859 fixed. */
5860 if (!COMPLETE_TYPE_P (type)
5861 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5862 return -1;
5864 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5866 if (TREE_CODE (field) != FIELD_DECL)
5867 continue;
5869 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5870 if (sub_count < 0)
5871 return -1;
5872 count += sub_count;
5875 /* There must be no padding. */
5876 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5877 return -1;
5879 return count;
5882 case UNION_TYPE:
5883 case QUAL_UNION_TYPE:
5885 /* These aren't very interesting except in a degenerate case. */
5886 int count = 0;
5887 int sub_count;
5888 tree field;
5890 /* Can't handle incomplete types nor sizes that are not
5891 fixed. */
5892 if (!COMPLETE_TYPE_P (type)
5893 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5894 return -1;
5896 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5898 if (TREE_CODE (field) != FIELD_DECL)
5899 continue;
5901 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5902 if (sub_count < 0)
5903 return -1;
5904 count = count > sub_count ? count : sub_count;
5907 /* There must be no padding. */
5908 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5909 return -1;
5911 return count;
5914 default:
5915 break;
5918 return -1;
5921 /* Return true if PCS_VARIANT should use VFP registers. */
5922 static bool
5923 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5925 if (pcs_variant == ARM_PCS_AAPCS_VFP)
5927 static bool seen_thumb1_vfp = false;
5929 if (TARGET_THUMB1 && !seen_thumb1_vfp)
5931 sorry ("Thumb-1 hard-float VFP ABI");
5932 /* sorry() is not immediately fatal, so only display this once. */
5933 seen_thumb1_vfp = true;
5936 return true;
5939 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5940 return false;
5942 return (TARGET_32BIT && TARGET_HARD_FLOAT &&
5943 (TARGET_VFP_DOUBLE || !is_double));
5946 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5947 suitable for passing or returning in VFP registers for the PCS
5948 variant selected. If it is, then *BASE_MODE is updated to contain
5949 a machine mode describing each element of the argument's type and
5950 *COUNT to hold the number of such elements. */
5951 static bool
5952 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5953 machine_mode mode, const_tree type,
5954 machine_mode *base_mode, int *count)
5956 machine_mode new_mode = VOIDmode;
5958 /* If we have the type information, prefer that to working things
5959 out from the mode. */
5960 if (type)
5962 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5964 if (ag_count > 0 && ag_count <= 4)
5965 *count = ag_count;
5966 else
5967 return false;
5969 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
5970 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5971 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5973 *count = 1;
5974 new_mode = mode;
5976 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5978 *count = 2;
5979 new_mode = (mode == DCmode ? DFmode : SFmode);
5981 else
5982 return false;
5985 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
5986 return false;
5988 *base_mode = new_mode;
5989 return true;
5992 static bool
5993 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
5994 machine_mode mode, const_tree type)
5996 int count ATTRIBUTE_UNUSED;
5997 machine_mode ag_mode ATTRIBUTE_UNUSED;
5999 if (!use_vfp_abi (pcs_variant, false))
6000 return false;
6001 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6002 &ag_mode, &count);
6005 static bool
6006 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6007 const_tree type)
6009 if (!use_vfp_abi (pcum->pcs_variant, false))
6010 return false;
6012 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
6013 &pcum->aapcs_vfp_rmode,
6014 &pcum->aapcs_vfp_rcount);
6017 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
6018 for the behaviour of this function. */
6020 static bool
6021 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6022 const_tree type ATTRIBUTE_UNUSED)
6024 int rmode_size
6025 = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
6026 int shift = rmode_size / GET_MODE_SIZE (SFmode);
6027 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
6028 int regno;
6030 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
6031 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
6033 pcum->aapcs_vfp_reg_alloc = mask << regno;
6034 if (mode == BLKmode
6035 || (mode == TImode && ! TARGET_NEON)
6036 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
6038 int i;
6039 int rcount = pcum->aapcs_vfp_rcount;
6040 int rshift = shift;
6041 machine_mode rmode = pcum->aapcs_vfp_rmode;
6042 rtx par;
6043 if (!TARGET_NEON)
6045 /* Avoid using unsupported vector modes. */
6046 if (rmode == V2SImode)
6047 rmode = DImode;
6048 else if (rmode == V4SImode)
6050 rmode = DImode;
6051 rcount *= 2;
6052 rshift /= 2;
6055 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
6056 for (i = 0; i < rcount; i++)
6058 rtx tmp = gen_rtx_REG (rmode,
6059 FIRST_VFP_REGNUM + regno + i * rshift);
6060 tmp = gen_rtx_EXPR_LIST
6061 (VOIDmode, tmp,
6062 GEN_INT (i * GET_MODE_SIZE (rmode)));
6063 XVECEXP (par, 0, i) = tmp;
6066 pcum->aapcs_reg = par;
6068 else
6069 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
6070 return true;
6072 return false;
6075 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
6076 comment there for the behaviour of this function. */
6078 static rtx
6079 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
6080 machine_mode mode,
6081 const_tree type ATTRIBUTE_UNUSED)
6083 if (!use_vfp_abi (pcs_variant, false))
6084 return NULL;
6086 if (mode == BLKmode
6087 || (GET_MODE_CLASS (mode) == MODE_INT
6088 && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
6089 && !TARGET_NEON))
6091 int count;
6092 machine_mode ag_mode;
6093 int i;
6094 rtx par;
6095 int shift;
6097 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6098 &ag_mode, &count);
6100 if (!TARGET_NEON)
6102 if (ag_mode == V2SImode)
6103 ag_mode = DImode;
6104 else if (ag_mode == V4SImode)
6106 ag_mode = DImode;
6107 count *= 2;
6110 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
6111 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
6112 for (i = 0; i < count; i++)
6114 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
6115 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
6116 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
6117 XVECEXP (par, 0, i) = tmp;
6120 return par;
6123 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
6126 static void
6127 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
6128 machine_mode mode ATTRIBUTE_UNUSED,
6129 const_tree type ATTRIBUTE_UNUSED)
6131 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
6132 pcum->aapcs_vfp_reg_alloc = 0;
6133 return;
6136 #define AAPCS_CP(X) \
6138 aapcs_ ## X ## _cum_init, \
6139 aapcs_ ## X ## _is_call_candidate, \
6140 aapcs_ ## X ## _allocate, \
6141 aapcs_ ## X ## _is_return_candidate, \
6142 aapcs_ ## X ## _allocate_return_reg, \
6143 aapcs_ ## X ## _advance \
6146 /* Table of co-processors that can be used to pass arguments in
6147 registers. Idealy no arugment should be a candidate for more than
6148 one co-processor table entry, but the table is processed in order
6149 and stops after the first match. If that entry then fails to put
6150 the argument into a co-processor register, the argument will go on
6151 the stack. */
6152 static struct
6154 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
6155 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
6157 /* Return true if an argument of mode MODE (or type TYPE if MODE is
6158 BLKmode) is a candidate for this co-processor's registers; this
6159 function should ignore any position-dependent state in
6160 CUMULATIVE_ARGS and only use call-type dependent information. */
6161 bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6163 /* Return true if the argument does get a co-processor register; it
6164 should set aapcs_reg to an RTX of the register allocated as is
6165 required for a return from FUNCTION_ARG. */
6166 bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6168 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6169 be returned in this co-processor's registers. */
6170 bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6172 /* Allocate and return an RTX element to hold the return type of a call. This
6173 routine must not fail and will only be called if is_return_candidate
6174 returned true with the same parameters. */
6175 rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6177 /* Finish processing this argument and prepare to start processing
6178 the next one. */
6179 void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6180 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6182 AAPCS_CP(vfp)
6185 #undef AAPCS_CP
6187 static int
6188 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6189 const_tree type)
6191 int i;
6193 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6194 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6195 return i;
6197 return -1;
6200 static int
6201 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6203 /* We aren't passed a decl, so we can't check that a call is local.
6204 However, it isn't clear that that would be a win anyway, since it
6205 might limit some tail-calling opportunities. */
6206 enum arm_pcs pcs_variant;
6208 if (fntype)
6210 const_tree fndecl = NULL_TREE;
6212 if (TREE_CODE (fntype) == FUNCTION_DECL)
6214 fndecl = fntype;
6215 fntype = TREE_TYPE (fntype);
6218 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6220 else
6221 pcs_variant = arm_pcs_default;
6223 if (pcs_variant != ARM_PCS_AAPCS)
6225 int i;
6227 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6228 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6229 TYPE_MODE (type),
6230 type))
6231 return i;
6233 return -1;
6236 static rtx
6237 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6238 const_tree fntype)
6240 /* We aren't passed a decl, so we can't check that a call is local.
6241 However, it isn't clear that that would be a win anyway, since it
6242 might limit some tail-calling opportunities. */
6243 enum arm_pcs pcs_variant;
6244 int unsignedp ATTRIBUTE_UNUSED;
6246 if (fntype)
6248 const_tree fndecl = NULL_TREE;
6250 if (TREE_CODE (fntype) == FUNCTION_DECL)
6252 fndecl = fntype;
6253 fntype = TREE_TYPE (fntype);
6256 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6258 else
6259 pcs_variant = arm_pcs_default;
6261 /* Promote integer types. */
6262 if (type && INTEGRAL_TYPE_P (type))
6263 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
6265 if (pcs_variant != ARM_PCS_AAPCS)
6267 int i;
6269 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6270 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6271 type))
6272 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6273 mode, type);
6276 /* Promotes small structs returned in a register to full-word size
6277 for big-endian AAPCS. */
6278 if (type && arm_return_in_msb (type))
6280 HOST_WIDE_INT size = int_size_in_bytes (type);
6281 if (size % UNITS_PER_WORD != 0)
6283 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6284 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
6288 return gen_rtx_REG (mode, R0_REGNUM);
6291 static rtx
6292 aapcs_libcall_value (machine_mode mode)
6294 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6295 && GET_MODE_SIZE (mode) <= 4)
6296 mode = SImode;
6298 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
6301 /* Lay out a function argument using the AAPCS rules. The rule
6302 numbers referred to here are those in the AAPCS. */
6303 static void
6304 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
6305 const_tree type, bool named)
6307 int nregs, nregs2;
6308 int ncrn;
6310 /* We only need to do this once per argument. */
6311 if (pcum->aapcs_arg_processed)
6312 return;
6314 pcum->aapcs_arg_processed = true;
6316 /* Special case: if named is false then we are handling an incoming
6317 anonymous argument which is on the stack. */
6318 if (!named)
6319 return;
6321 /* Is this a potential co-processor register candidate? */
6322 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6324 int slot = aapcs_select_call_coproc (pcum, mode, type);
6325 pcum->aapcs_cprc_slot = slot;
6327 /* We don't have to apply any of the rules from part B of the
6328 preparation phase, these are handled elsewhere in the
6329 compiler. */
6331 if (slot >= 0)
6333 /* A Co-processor register candidate goes either in its own
6334 class of registers or on the stack. */
6335 if (!pcum->aapcs_cprc_failed[slot])
6337 /* C1.cp - Try to allocate the argument to co-processor
6338 registers. */
6339 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
6340 return;
6342 /* C2.cp - Put the argument on the stack and note that we
6343 can't assign any more candidates in this slot. We also
6344 need to note that we have allocated stack space, so that
6345 we won't later try to split a non-cprc candidate between
6346 core registers and the stack. */
6347 pcum->aapcs_cprc_failed[slot] = true;
6348 pcum->can_split = false;
6351 /* We didn't get a register, so this argument goes on the
6352 stack. */
6353 gcc_assert (pcum->can_split == false);
6354 return;
6358 /* C3 - For double-word aligned arguments, round the NCRN up to the
6359 next even number. */
6360 ncrn = pcum->aapcs_ncrn;
6361 if (ncrn & 1)
6363 int res = arm_needs_doubleword_align (mode, type);
6364 /* Only warn during RTL expansion of call stmts, otherwise we would
6365 warn e.g. during gimplification even on functions that will be
6366 always inlined, and we'd warn multiple times. Don't warn when
6367 called in expand_function_start either, as we warn instead in
6368 arm_function_arg_boundary in that case. */
6369 if (res < 0 && warn_psabi && currently_expanding_gimple_stmt)
6370 inform (input_location, "parameter passing for argument of type "
6371 "%qT changed in GCC 7.1", type);
6372 else if (res > 0)
6373 ncrn++;
6376 nregs = ARM_NUM_REGS2(mode, type);
6378 /* Sigh, this test should really assert that nregs > 0, but a GCC
6379 extension allows empty structs and then gives them empty size; it
6380 then allows such a structure to be passed by value. For some of
6381 the code below we have to pretend that such an argument has
6382 non-zero size so that we 'locate' it correctly either in
6383 registers or on the stack. */
6384 gcc_assert (nregs >= 0);
6386 nregs2 = nregs ? nregs : 1;
6388 /* C4 - Argument fits entirely in core registers. */
6389 if (ncrn + nregs2 <= NUM_ARG_REGS)
6391 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6392 pcum->aapcs_next_ncrn = ncrn + nregs;
6393 return;
6396 /* C5 - Some core registers left and there are no arguments already
6397 on the stack: split this argument between the remaining core
6398 registers and the stack. */
6399 if (ncrn < NUM_ARG_REGS && pcum->can_split)
6401 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6402 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6403 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
6404 return;
6407 /* C6 - NCRN is set to 4. */
6408 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6410 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
6411 return;
6414 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6415 for a call to a function whose data type is FNTYPE.
6416 For a library call, FNTYPE is NULL. */
6417 void
6418 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
6419 rtx libname,
6420 tree fndecl ATTRIBUTE_UNUSED)
6422 /* Long call handling. */
6423 if (fntype)
6424 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
6425 else
6426 pcum->pcs_variant = arm_pcs_default;
6428 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6430 if (arm_libcall_uses_aapcs_base (libname))
6431 pcum->pcs_variant = ARM_PCS_AAPCS;
6433 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
6434 pcum->aapcs_reg = NULL_RTX;
6435 pcum->aapcs_partial = 0;
6436 pcum->aapcs_arg_processed = false;
6437 pcum->aapcs_cprc_slot = -1;
6438 pcum->can_split = true;
6440 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6442 int i;
6444 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6446 pcum->aapcs_cprc_failed[i] = false;
6447 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
6450 return;
6453 /* Legacy ABIs */
6455 /* On the ARM, the offset starts at 0. */
6456 pcum->nregs = 0;
6457 pcum->iwmmxt_nregs = 0;
6458 pcum->can_split = true;
6460 /* Varargs vectors are treated the same as long long.
6461 named_count avoids having to change the way arm handles 'named' */
6462 pcum->named_count = 0;
6463 pcum->nargs = 0;
6465 if (TARGET_REALLY_IWMMXT && fntype)
6467 tree fn_arg;
6469 for (fn_arg = TYPE_ARG_TYPES (fntype);
6470 fn_arg;
6471 fn_arg = TREE_CHAIN (fn_arg))
6472 pcum->named_count += 1;
6474 if (! pcum->named_count)
6475 pcum->named_count = INT_MAX;
6479 /* Return 1 if double word alignment is required for argument passing.
6480 Return -1 if double word alignment used to be required for argument
6481 passing before PR77728 ABI fix, but is not required anymore.
6482 Return 0 if double word alignment is not required and wasn't requried
6483 before either. */
6484 static int
6485 arm_needs_doubleword_align (machine_mode mode, const_tree type)
6487 if (!type)
6488 return GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY;
6490 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
6491 if (!AGGREGATE_TYPE_P (type))
6492 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
6494 /* Array types: Use member alignment of element type. */
6495 if (TREE_CODE (type) == ARRAY_TYPE)
6496 return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
6498 int ret = 0;
6499 /* Record/aggregate types: Use greatest member alignment of any member. */
6500 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6501 if (DECL_ALIGN (field) > PARM_BOUNDARY)
6503 if (TREE_CODE (field) == FIELD_DECL)
6504 return 1;
6505 else
6506 /* Before PR77728 fix, we were incorrectly considering also
6507 other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
6508 Make sure we can warn about that with -Wpsabi. */
6509 ret = -1;
6512 return ret;
6516 /* Determine where to put an argument to a function.
6517 Value is zero to push the argument on the stack,
6518 or a hard register in which to store the argument.
6520 MODE is the argument's machine mode.
6521 TYPE is the data type of the argument (as a tree).
6522 This is null for libcalls where that information may
6523 not be available.
6524 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6525 the preceding args and about the function being called.
6526 NAMED is nonzero if this argument is a named parameter
6527 (otherwise it is an extra parameter matching an ellipsis).
6529 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6530 other arguments are passed on the stack. If (NAMED == 0) (which happens
6531 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6532 defined), say it is passed in the stack (function_prologue will
6533 indeed make it pass in the stack if necessary). */
6535 static rtx
6536 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
6537 const_tree type, bool named)
6539 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6540 int nregs;
6542 /* Handle the special case quickly. Pick an arbitrary value for op2 of
6543 a call insn (op3 of a call_value insn). */
6544 if (mode == VOIDmode)
6545 return const0_rtx;
6547 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6549 aapcs_layout_arg (pcum, mode, type, named);
6550 return pcum->aapcs_reg;
6553 /* Varargs vectors are treated the same as long long.
6554 named_count avoids having to change the way arm handles 'named' */
6555 if (TARGET_IWMMXT_ABI
6556 && arm_vector_mode_supported_p (mode)
6557 && pcum->named_count > pcum->nargs + 1)
6559 if (pcum->iwmmxt_nregs <= 9)
6560 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
6561 else
6563 pcum->can_split = false;
6564 return NULL_RTX;
6568 /* Put doubleword aligned quantities in even register pairs. */
6569 if ((pcum->nregs & 1) && ARM_DOUBLEWORD_ALIGN)
6571 int res = arm_needs_doubleword_align (mode, type);
6572 if (res < 0 && warn_psabi)
6573 inform (input_location, "parameter passing for argument of type "
6574 "%qT changed in GCC 7.1", type);
6575 else if (res > 0)
6576 pcum->nregs++;
6579 /* Only allow splitting an arg between regs and memory if all preceding
6580 args were allocated to regs. For args passed by reference we only count
6581 the reference pointer. */
6582 if (pcum->can_split)
6583 nregs = 1;
6584 else
6585 nregs = ARM_NUM_REGS2 (mode, type);
6587 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
6588 return NULL_RTX;
6590 return gen_rtx_REG (mode, pcum->nregs);
6593 static unsigned int
6594 arm_function_arg_boundary (machine_mode mode, const_tree type)
6596 if (!ARM_DOUBLEWORD_ALIGN)
6597 return PARM_BOUNDARY;
6599 int res = arm_needs_doubleword_align (mode, type);
6600 if (res < 0 && warn_psabi)
6601 inform (input_location, "parameter passing for argument of type %qT "
6602 "changed in GCC 7.1", type);
6604 return res > 0 ? DOUBLEWORD_ALIGNMENT : PARM_BOUNDARY;
6607 static int
6608 arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
6609 tree type, bool named)
6611 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6612 int nregs = pcum->nregs;
6614 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6616 aapcs_layout_arg (pcum, mode, type, named);
6617 return pcum->aapcs_partial;
6620 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6621 return 0;
6623 if (NUM_ARG_REGS > nregs
6624 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6625 && pcum->can_split)
6626 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6628 return 0;
6631 /* Update the data in PCUM to advance over an argument
6632 of mode MODE and data type TYPE.
6633 (TYPE is null for libcalls where that information may not be available.) */
6635 static void
6636 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6637 const_tree type, bool named)
6639 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6641 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6643 aapcs_layout_arg (pcum, mode, type, named);
6645 if (pcum->aapcs_cprc_slot >= 0)
6647 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6648 type);
6649 pcum->aapcs_cprc_slot = -1;
6652 /* Generic stuff. */
6653 pcum->aapcs_arg_processed = false;
6654 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6655 pcum->aapcs_reg = NULL_RTX;
6656 pcum->aapcs_partial = 0;
6658 else
6660 pcum->nargs += 1;
6661 if (arm_vector_mode_supported_p (mode)
6662 && pcum->named_count > pcum->nargs
6663 && TARGET_IWMMXT_ABI)
6664 pcum->iwmmxt_nregs += 1;
6665 else
6666 pcum->nregs += ARM_NUM_REGS2 (mode, type);
6670 /* Variable sized types are passed by reference. This is a GCC
6671 extension to the ARM ABI. */
6673 static bool
6674 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6675 machine_mode mode ATTRIBUTE_UNUSED,
6676 const_tree type, bool named ATTRIBUTE_UNUSED)
6678 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6681 /* Encode the current state of the #pragma [no_]long_calls. */
6682 typedef enum
6684 OFF, /* No #pragma [no_]long_calls is in effect. */
6685 LONG, /* #pragma long_calls is in effect. */
6686 SHORT /* #pragma no_long_calls is in effect. */
6687 } arm_pragma_enum;
6689 static arm_pragma_enum arm_pragma_long_calls = OFF;
6691 void
6692 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6694 arm_pragma_long_calls = LONG;
6697 void
6698 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6700 arm_pragma_long_calls = SHORT;
6703 void
6704 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6706 arm_pragma_long_calls = OFF;
6709 /* Handle an attribute requiring a FUNCTION_DECL;
6710 arguments as in struct attribute_spec.handler. */
6711 static tree
6712 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6713 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6715 if (TREE_CODE (*node) != FUNCTION_DECL)
6717 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6718 name);
6719 *no_add_attrs = true;
6722 return NULL_TREE;
6725 /* Handle an "interrupt" or "isr" attribute;
6726 arguments as in struct attribute_spec.handler. */
6727 static tree
6728 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6729 bool *no_add_attrs)
6731 if (DECL_P (*node))
6733 if (TREE_CODE (*node) != FUNCTION_DECL)
6735 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6736 name);
6737 *no_add_attrs = true;
6739 /* FIXME: the argument if any is checked for type attributes;
6740 should it be checked for decl ones? */
6742 else
6744 if (TREE_CODE (*node) == FUNCTION_TYPE
6745 || TREE_CODE (*node) == METHOD_TYPE)
6747 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6749 warning (OPT_Wattributes, "%qE attribute ignored",
6750 name);
6751 *no_add_attrs = true;
6754 else if (TREE_CODE (*node) == POINTER_TYPE
6755 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6756 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6757 && arm_isr_value (args) != ARM_FT_UNKNOWN)
6759 *node = build_variant_type_copy (*node);
6760 TREE_TYPE (*node) = build_type_attribute_variant
6761 (TREE_TYPE (*node),
6762 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6763 *no_add_attrs = true;
6765 else
6767 /* Possibly pass this attribute on from the type to a decl. */
6768 if (flags & ((int) ATTR_FLAG_DECL_NEXT
6769 | (int) ATTR_FLAG_FUNCTION_NEXT
6770 | (int) ATTR_FLAG_ARRAY_NEXT))
6772 *no_add_attrs = true;
6773 return tree_cons (name, args, NULL_TREE);
6775 else
6777 warning (OPT_Wattributes, "%qE attribute ignored",
6778 name);
6783 return NULL_TREE;
6786 /* Handle a "pcs" attribute; arguments as in struct
6787 attribute_spec.handler. */
6788 static tree
6789 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6790 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6792 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6794 warning (OPT_Wattributes, "%qE attribute ignored", name);
6795 *no_add_attrs = true;
6797 return NULL_TREE;
6800 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6801 /* Handle the "notshared" attribute. This attribute is another way of
6802 requesting hidden visibility. ARM's compiler supports
6803 "__declspec(notshared)"; we support the same thing via an
6804 attribute. */
6806 static tree
6807 arm_handle_notshared_attribute (tree *node,
6808 tree name ATTRIBUTE_UNUSED,
6809 tree args ATTRIBUTE_UNUSED,
6810 int flags ATTRIBUTE_UNUSED,
6811 bool *no_add_attrs)
6813 tree decl = TYPE_NAME (*node);
6815 if (decl)
6817 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6818 DECL_VISIBILITY_SPECIFIED (decl) = 1;
6819 *no_add_attrs = false;
6821 return NULL_TREE;
6823 #endif
6825 /* This function returns true if a function with declaration FNDECL and type
6826 FNTYPE uses the stack to pass arguments or return variables and false
6827 otherwise. This is used for functions with the attributes
6828 'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
6829 diagnostic messages if the stack is used. NAME is the name of the attribute
6830 used. */
6832 static bool
6833 cmse_func_args_or_return_in_stack (tree fndecl, tree name, tree fntype)
6835 function_args_iterator args_iter;
6836 CUMULATIVE_ARGS args_so_far_v;
6837 cumulative_args_t args_so_far;
6838 bool first_param = true;
6839 tree arg_type, prev_arg_type = NULL_TREE, ret_type;
6841 /* Error out if any argument is passed on the stack. */
6842 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
6843 args_so_far = pack_cumulative_args (&args_so_far_v);
6844 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
6846 rtx arg_rtx;
6847 machine_mode arg_mode = TYPE_MODE (arg_type);
6849 prev_arg_type = arg_type;
6850 if (VOID_TYPE_P (arg_type))
6851 continue;
6853 if (!first_param)
6854 arm_function_arg_advance (args_so_far, arg_mode, arg_type, true);
6855 arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type, true);
6856 if (!arg_rtx
6857 || arm_arg_partial_bytes (args_so_far, arg_mode, arg_type, true))
6859 error ("%qE attribute not available to functions with arguments "
6860 "passed on the stack", name);
6861 return true;
6863 first_param = false;
6866 /* Error out for variadic functions since we cannot control how many
6867 arguments will be passed and thus stack could be used. stdarg_p () is not
6868 used for the checking to avoid browsing arguments twice. */
6869 if (prev_arg_type != NULL_TREE && !VOID_TYPE_P (prev_arg_type))
6871 error ("%qE attribute not available to functions with variable number "
6872 "of arguments", name);
6873 return true;
6876 /* Error out if return value is passed on the stack. */
6877 ret_type = TREE_TYPE (fntype);
6878 if (arm_return_in_memory (ret_type, fntype))
6880 error ("%qE attribute not available to functions that return value on "
6881 "the stack", name);
6882 return true;
6884 return false;
6887 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
6888 function will check whether the attribute is allowed here and will add the
6889 attribute to the function declaration tree or otherwise issue a warning. */
6891 static tree
6892 arm_handle_cmse_nonsecure_entry (tree *node, tree name,
6893 tree /* args */,
6894 int /* flags */,
6895 bool *no_add_attrs)
6897 tree fndecl;
6899 if (!use_cmse)
6901 *no_add_attrs = true;
6902 warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
6903 name);
6904 return NULL_TREE;
6907 /* Ignore attribute for function types. */
6908 if (TREE_CODE (*node) != FUNCTION_DECL)
6910 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6911 name);
6912 *no_add_attrs = true;
6913 return NULL_TREE;
6916 fndecl = *node;
6918 /* Warn for static linkage functions. */
6919 if (!TREE_PUBLIC (fndecl))
6921 warning (OPT_Wattributes, "%qE attribute has no effect on functions "
6922 "with static linkage", name);
6923 *no_add_attrs = true;
6924 return NULL_TREE;
6927 *no_add_attrs |= cmse_func_args_or_return_in_stack (fndecl, name,
6928 TREE_TYPE (fndecl));
6929 return NULL_TREE;
6933 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
6934 function will check whether the attribute is allowed here and will add the
6935 attribute to the function type tree or otherwise issue a diagnostic. The
6936 reason we check this at declaration time is to only allow the use of the
6937 attribute with declarations of function pointers and not function
6938 declarations. This function checks NODE is of the expected type and issues
6939 diagnostics otherwise using NAME. If it is not of the expected type
6940 *NO_ADD_ATTRS will be set to true. */
6942 static tree
6943 arm_handle_cmse_nonsecure_call (tree *node, tree name,
6944 tree /* args */,
6945 int /* flags */,
6946 bool *no_add_attrs)
6948 tree decl = NULL_TREE, fntype = NULL_TREE;
6949 tree type;
6951 if (!use_cmse)
6953 *no_add_attrs = true;
6954 warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
6955 name);
6956 return NULL_TREE;
6959 if (TREE_CODE (*node) == VAR_DECL || TREE_CODE (*node) == TYPE_DECL)
6961 decl = *node;
6962 fntype = TREE_TYPE (decl);
6965 while (fntype != NULL_TREE && TREE_CODE (fntype) == POINTER_TYPE)
6966 fntype = TREE_TYPE (fntype);
6968 if (!decl || TREE_CODE (fntype) != FUNCTION_TYPE)
6970 warning (OPT_Wattributes, "%qE attribute only applies to base type of a "
6971 "function pointer", name);
6972 *no_add_attrs = true;
6973 return NULL_TREE;
6976 *no_add_attrs |= cmse_func_args_or_return_in_stack (NULL, name, fntype);
6978 if (*no_add_attrs)
6979 return NULL_TREE;
6981 /* Prevent trees being shared among function types with and without
6982 cmse_nonsecure_call attribute. */
6983 type = TREE_TYPE (decl);
6985 type = build_distinct_type_copy (type);
6986 TREE_TYPE (decl) = type;
6987 fntype = type;
6989 while (TREE_CODE (fntype) != FUNCTION_TYPE)
6991 type = fntype;
6992 fntype = TREE_TYPE (fntype);
6993 fntype = build_distinct_type_copy (fntype);
6994 TREE_TYPE (type) = fntype;
6997 /* Construct a type attribute and add it to the function type. */
6998 tree attrs = tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE,
6999 TYPE_ATTRIBUTES (fntype));
7000 TYPE_ATTRIBUTES (fntype) = attrs;
7001 return NULL_TREE;
7004 /* Return 0 if the attributes for two types are incompatible, 1 if they
7005 are compatible, and 2 if they are nearly compatible (which causes a
7006 warning to be generated). */
7007 static int
7008 arm_comp_type_attributes (const_tree type1, const_tree type2)
7010 int l1, l2, s1, s2;
7012 /* Check for mismatch of non-default calling convention. */
7013 if (TREE_CODE (type1) != FUNCTION_TYPE)
7014 return 1;
7016 /* Check for mismatched call attributes. */
7017 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
7018 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
7019 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
7020 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
7022 /* Only bother to check if an attribute is defined. */
7023 if (l1 | l2 | s1 | s2)
7025 /* If one type has an attribute, the other must have the same attribute. */
7026 if ((l1 != l2) || (s1 != s2))
7027 return 0;
7029 /* Disallow mixed attributes. */
7030 if ((l1 & s2) || (l2 & s1))
7031 return 0;
7034 /* Check for mismatched ISR attribute. */
7035 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
7036 if (! l1)
7037 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
7038 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
7039 if (! l2)
7040 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
7041 if (l1 != l2)
7042 return 0;
7044 l1 = lookup_attribute ("cmse_nonsecure_call",
7045 TYPE_ATTRIBUTES (type1)) != NULL;
7046 l2 = lookup_attribute ("cmse_nonsecure_call",
7047 TYPE_ATTRIBUTES (type2)) != NULL;
7049 if (l1 != l2)
7050 return 0;
7052 return 1;
7055 /* Assigns default attributes to newly defined type. This is used to
7056 set short_call/long_call attributes for function types of
7057 functions defined inside corresponding #pragma scopes. */
7058 static void
7059 arm_set_default_type_attributes (tree type)
7061 /* Add __attribute__ ((long_call)) to all functions, when
7062 inside #pragma long_calls or __attribute__ ((short_call)),
7063 when inside #pragma no_long_calls. */
7064 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
7066 tree type_attr_list, attr_name;
7067 type_attr_list = TYPE_ATTRIBUTES (type);
7069 if (arm_pragma_long_calls == LONG)
7070 attr_name = get_identifier ("long_call");
7071 else if (arm_pragma_long_calls == SHORT)
7072 attr_name = get_identifier ("short_call");
7073 else
7074 return;
7076 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
7077 TYPE_ATTRIBUTES (type) = type_attr_list;
7081 /* Return true if DECL is known to be linked into section SECTION. */
7083 static bool
7084 arm_function_in_section_p (tree decl, section *section)
7086 /* We can only be certain about the prevailing symbol definition. */
7087 if (!decl_binds_to_current_def_p (decl))
7088 return false;
7090 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
7091 if (!DECL_SECTION_NAME (decl))
7093 /* Make sure that we will not create a unique section for DECL. */
7094 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
7095 return false;
7098 return function_section (decl) == section;
7101 /* Return nonzero if a 32-bit "long_call" should be generated for
7102 a call from the current function to DECL. We generate a long_call
7103 if the function:
7105 a. has an __attribute__((long call))
7106 or b. is within the scope of a #pragma long_calls
7107 or c. the -mlong-calls command line switch has been specified
7109 However we do not generate a long call if the function:
7111 d. has an __attribute__ ((short_call))
7112 or e. is inside the scope of a #pragma no_long_calls
7113 or f. is defined in the same section as the current function. */
7115 bool
7116 arm_is_long_call_p (tree decl)
7118 tree attrs;
7120 if (!decl)
7121 return TARGET_LONG_CALLS;
7123 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
7124 if (lookup_attribute ("short_call", attrs))
7125 return false;
7127 /* For "f", be conservative, and only cater for cases in which the
7128 whole of the current function is placed in the same section. */
7129 if (!flag_reorder_blocks_and_partition
7130 && TREE_CODE (decl) == FUNCTION_DECL
7131 && arm_function_in_section_p (decl, current_function_section ()))
7132 return false;
7134 if (lookup_attribute ("long_call", attrs))
7135 return true;
7137 return TARGET_LONG_CALLS;
7140 /* Return nonzero if it is ok to make a tail-call to DECL. */
7141 static bool
7142 arm_function_ok_for_sibcall (tree decl, tree exp)
7144 unsigned long func_type;
7146 if (cfun->machine->sibcall_blocked)
7147 return false;
7149 /* Never tailcall something if we are generating code for Thumb-1. */
7150 if (TARGET_THUMB1)
7151 return false;
7153 /* The PIC register is live on entry to VxWorks PLT entries, so we
7154 must make the call before restoring the PIC register. */
7155 if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
7156 return false;
7158 /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7159 may be used both as target of the call and base register for restoring
7160 the VFP registers */
7161 if (TARGET_APCS_FRAME && TARGET_ARM
7162 && TARGET_HARD_FLOAT
7163 && decl && arm_is_long_call_p (decl))
7164 return false;
7166 /* If we are interworking and the function is not declared static
7167 then we can't tail-call it unless we know that it exists in this
7168 compilation unit (since it might be a Thumb routine). */
7169 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
7170 && !TREE_ASM_WRITTEN (decl))
7171 return false;
7173 func_type = arm_current_func_type ();
7174 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
7175 if (IS_INTERRUPT (func_type))
7176 return false;
7178 /* ARMv8-M non-secure entry functions need to return with bxns which is only
7179 generated for entry functions themselves. */
7180 if (IS_CMSE_ENTRY (arm_current_func_type ()))
7181 return false;
7183 /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7184 this would complicate matters for later code generation. */
7185 if (TREE_CODE (exp) == CALL_EXPR)
7187 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7188 if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype)))
7189 return false;
7192 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
7194 /* Check that the return value locations are the same. For
7195 example that we aren't returning a value from the sibling in
7196 a VFP register but then need to transfer it to a core
7197 register. */
7198 rtx a, b;
7199 tree decl_or_type = decl;
7201 /* If it is an indirect function pointer, get the function type. */
7202 if (!decl)
7203 decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7205 a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
7206 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
7207 cfun->decl, false);
7208 if (!rtx_equal_p (a, b))
7209 return false;
7212 /* Never tailcall if function may be called with a misaligned SP. */
7213 if (IS_STACKALIGN (func_type))
7214 return false;
7216 /* The AAPCS says that, on bare-metal, calls to unresolved weak
7217 references should become a NOP. Don't convert such calls into
7218 sibling calls. */
7219 if (TARGET_AAPCS_BASED
7220 && arm_abi == ARM_ABI_AAPCS
7221 && decl
7222 && DECL_WEAK (decl))
7223 return false;
7225 /* We cannot do a tailcall for an indirect call by descriptor if all the
7226 argument registers are used because the only register left to load the
7227 address is IP and it will already contain the static chain. */
7228 if (!decl && CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines)
7230 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7231 CUMULATIVE_ARGS cum;
7232 cumulative_args_t cum_v;
7234 arm_init_cumulative_args (&cum, fntype, NULL_RTX, NULL_TREE);
7235 cum_v = pack_cumulative_args (&cum);
7237 for (tree t = TYPE_ARG_TYPES (fntype); t; t = TREE_CHAIN (t))
7239 tree type = TREE_VALUE (t);
7240 if (!VOID_TYPE_P (type))
7241 arm_function_arg_advance (cum_v, TYPE_MODE (type), type, true);
7244 if (!arm_function_arg (cum_v, SImode, integer_type_node, true))
7245 return false;
7248 /* Everything else is ok. */
7249 return true;
7253 /* Addressing mode support functions. */
7255 /* Return nonzero if X is a legitimate immediate operand when compiling
7256 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
7258 legitimate_pic_operand_p (rtx x)
7260 if (GET_CODE (x) == SYMBOL_REF
7261 || (GET_CODE (x) == CONST
7262 && GET_CODE (XEXP (x, 0)) == PLUS
7263 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
7264 return 0;
7266 return 1;
7269 /* Record that the current function needs a PIC register. Initialize
7270 cfun->machine->pic_reg if we have not already done so. */
7272 static void
7273 require_pic_register (void)
7275 /* A lot of the logic here is made obscure by the fact that this
7276 routine gets called as part of the rtx cost estimation process.
7277 We don't want those calls to affect any assumptions about the real
7278 function; and further, we can't call entry_of_function() until we
7279 start the real expansion process. */
7280 if (!crtl->uses_pic_offset_table)
7282 gcc_assert (can_create_pseudo_p ());
7283 if (arm_pic_register != INVALID_REGNUM
7284 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
7286 if (!cfun->machine->pic_reg)
7287 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
7289 /* Play games to avoid marking the function as needing pic
7290 if we are being called as part of the cost-estimation
7291 process. */
7292 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7293 crtl->uses_pic_offset_table = 1;
7295 else
7297 rtx_insn *seq, *insn;
7299 if (!cfun->machine->pic_reg)
7300 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
7302 /* Play games to avoid marking the function as needing pic
7303 if we are being called as part of the cost-estimation
7304 process. */
7305 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7307 crtl->uses_pic_offset_table = 1;
7308 start_sequence ();
7310 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
7311 && arm_pic_register > LAST_LO_REGNUM)
7312 emit_move_insn (cfun->machine->pic_reg,
7313 gen_rtx_REG (Pmode, arm_pic_register));
7314 else
7315 arm_load_pic_register (0UL);
7317 seq = get_insns ();
7318 end_sequence ();
7320 for (insn = seq; insn; insn = NEXT_INSN (insn))
7321 if (INSN_P (insn))
7322 INSN_LOCATION (insn) = prologue_location;
7324 /* We can be called during expansion of PHI nodes, where
7325 we can't yet emit instructions directly in the final
7326 insn stream. Queue the insns on the entry edge, they will
7327 be committed after everything else is expanded. */
7328 insert_insn_on_edge (seq,
7329 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
7336 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
7338 if (GET_CODE (orig) == SYMBOL_REF
7339 || GET_CODE (orig) == LABEL_REF)
7341 if (reg == 0)
7343 gcc_assert (can_create_pseudo_p ());
7344 reg = gen_reg_rtx (Pmode);
7347 /* VxWorks does not impose a fixed gap between segments; the run-time
7348 gap can be different from the object-file gap. We therefore can't
7349 use GOTOFF unless we are absolutely sure that the symbol is in the
7350 same segment as the GOT. Unfortunately, the flexibility of linker
7351 scripts means that we can't be sure of that in general, so assume
7352 that GOTOFF is never valid on VxWorks. */
7353 /* References to weak symbols cannot be resolved locally: they
7354 may be overridden by a non-weak definition at link time. */
7355 rtx_insn *insn;
7356 if ((GET_CODE (orig) == LABEL_REF
7357 || (GET_CODE (orig) == SYMBOL_REF
7358 && SYMBOL_REF_LOCAL_P (orig)
7359 && (SYMBOL_REF_DECL (orig)
7360 ? !DECL_WEAK (SYMBOL_REF_DECL (orig)) : 1)))
7361 && NEED_GOT_RELOC
7362 && arm_pic_data_is_text_relative)
7363 insn = arm_pic_static_addr (orig, reg);
7364 else
7366 rtx pat;
7367 rtx mem;
7369 /* If this function doesn't have a pic register, create one now. */
7370 require_pic_register ();
7372 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
7374 /* Make the MEM as close to a constant as possible. */
7375 mem = SET_SRC (pat);
7376 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
7377 MEM_READONLY_P (mem) = 1;
7378 MEM_NOTRAP_P (mem) = 1;
7380 insn = emit_insn (pat);
7383 /* Put a REG_EQUAL note on this insn, so that it can be optimized
7384 by loop. */
7385 set_unique_reg_note (insn, REG_EQUAL, orig);
7387 return reg;
7389 else if (GET_CODE (orig) == CONST)
7391 rtx base, offset;
7393 if (GET_CODE (XEXP (orig, 0)) == PLUS
7394 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
7395 return orig;
7397 /* Handle the case where we have: const (UNSPEC_TLS). */
7398 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
7399 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
7400 return orig;
7402 /* Handle the case where we have:
7403 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
7404 CONST_INT. */
7405 if (GET_CODE (XEXP (orig, 0)) == PLUS
7406 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
7407 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
7409 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
7410 return orig;
7413 if (reg == 0)
7415 gcc_assert (can_create_pseudo_p ());
7416 reg = gen_reg_rtx (Pmode);
7419 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
7421 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
7422 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
7423 base == reg ? 0 : reg);
7425 if (CONST_INT_P (offset))
7427 /* The base register doesn't really matter, we only want to
7428 test the index for the appropriate mode. */
7429 if (!arm_legitimate_index_p (mode, offset, SET, 0))
7431 gcc_assert (can_create_pseudo_p ());
7432 offset = force_reg (Pmode, offset);
7435 if (CONST_INT_P (offset))
7436 return plus_constant (Pmode, base, INTVAL (offset));
7439 if (GET_MODE_SIZE (mode) > 4
7440 && (GET_MODE_CLASS (mode) == MODE_INT
7441 || TARGET_SOFT_FLOAT))
7443 emit_insn (gen_addsi3 (reg, base, offset));
7444 return reg;
7447 return gen_rtx_PLUS (Pmode, base, offset);
7450 return orig;
7454 /* Find a spare register to use during the prolog of a function. */
7456 static int
7457 thumb_find_work_register (unsigned long pushed_regs_mask)
7459 int reg;
7461 /* Check the argument registers first as these are call-used. The
7462 register allocation order means that sometimes r3 might be used
7463 but earlier argument registers might not, so check them all. */
7464 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
7465 if (!df_regs_ever_live_p (reg))
7466 return reg;
7468 /* Before going on to check the call-saved registers we can try a couple
7469 more ways of deducing that r3 is available. The first is when we are
7470 pushing anonymous arguments onto the stack and we have less than 4
7471 registers worth of fixed arguments(*). In this case r3 will be part of
7472 the variable argument list and so we can be sure that it will be
7473 pushed right at the start of the function. Hence it will be available
7474 for the rest of the prologue.
7475 (*): ie crtl->args.pretend_args_size is greater than 0. */
7476 if (cfun->machine->uses_anonymous_args
7477 && crtl->args.pretend_args_size > 0)
7478 return LAST_ARG_REGNUM;
7480 /* The other case is when we have fixed arguments but less than 4 registers
7481 worth. In this case r3 might be used in the body of the function, but
7482 it is not being used to convey an argument into the function. In theory
7483 we could just check crtl->args.size to see how many bytes are
7484 being passed in argument registers, but it seems that it is unreliable.
7485 Sometimes it will have the value 0 when in fact arguments are being
7486 passed. (See testcase execute/20021111-1.c for an example). So we also
7487 check the args_info.nregs field as well. The problem with this field is
7488 that it makes no allowances for arguments that are passed to the
7489 function but which are not used. Hence we could miss an opportunity
7490 when a function has an unused argument in r3. But it is better to be
7491 safe than to be sorry. */
7492 if (! cfun->machine->uses_anonymous_args
7493 && crtl->args.size >= 0
7494 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
7495 && (TARGET_AAPCS_BASED
7496 ? crtl->args.info.aapcs_ncrn < 4
7497 : crtl->args.info.nregs < 4))
7498 return LAST_ARG_REGNUM;
7500 /* Otherwise look for a call-saved register that is going to be pushed. */
7501 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
7502 if (pushed_regs_mask & (1 << reg))
7503 return reg;
7505 if (TARGET_THUMB2)
7507 /* Thumb-2 can use high regs. */
7508 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
7509 if (pushed_regs_mask & (1 << reg))
7510 return reg;
7512 /* Something went wrong - thumb_compute_save_reg_mask()
7513 should have arranged for a suitable register to be pushed. */
7514 gcc_unreachable ();
7517 static GTY(()) int pic_labelno;
7519 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
7520 low register. */
7522 void
7523 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
7525 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
7527 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
7528 return;
7530 gcc_assert (flag_pic);
7532 pic_reg = cfun->machine->pic_reg;
7533 if (TARGET_VXWORKS_RTP)
7535 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
7536 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7537 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
7539 emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
7541 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7542 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
7544 else
7546 /* We use an UNSPEC rather than a LABEL_REF because this label
7547 never appears in the code stream. */
7549 labelno = GEN_INT (pic_labelno++);
7550 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7551 l1 = gen_rtx_CONST (VOIDmode, l1);
7553 /* On the ARM the PC register contains 'dot + 8' at the time of the
7554 addition, on the Thumb it is 'dot + 4'. */
7555 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7556 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
7557 UNSPEC_GOTSYM_OFF);
7558 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7560 if (TARGET_32BIT)
7562 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7564 else /* TARGET_THUMB1 */
7566 if (arm_pic_register != INVALID_REGNUM
7567 && REGNO (pic_reg) > LAST_LO_REGNUM)
7569 /* We will have pushed the pic register, so we should always be
7570 able to find a work register. */
7571 pic_tmp = gen_rtx_REG (SImode,
7572 thumb_find_work_register (saved_regs));
7573 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
7574 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
7575 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
7577 else if (arm_pic_register != INVALID_REGNUM
7578 && arm_pic_register > LAST_LO_REGNUM
7579 && REGNO (pic_reg) <= LAST_LO_REGNUM)
7581 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7582 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
7583 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
7585 else
7586 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7590 /* Need to emit this whether or not we obey regdecls,
7591 since setjmp/longjmp can cause life info to screw up. */
7592 emit_use (pic_reg);
7595 /* Generate code to load the address of a static var when flag_pic is set. */
7596 static rtx_insn *
7597 arm_pic_static_addr (rtx orig, rtx reg)
7599 rtx l1, labelno, offset_rtx;
7601 gcc_assert (flag_pic);
7603 /* We use an UNSPEC rather than a LABEL_REF because this label
7604 never appears in the code stream. */
7605 labelno = GEN_INT (pic_labelno++);
7606 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7607 l1 = gen_rtx_CONST (VOIDmode, l1);
7609 /* On the ARM the PC register contains 'dot + 8' at the time of the
7610 addition, on the Thumb it is 'dot + 4'. */
7611 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7612 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
7613 UNSPEC_SYMBOL_OFFSET);
7614 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
7616 return emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
7619 /* Return nonzero if X is valid as an ARM state addressing register. */
7620 static int
7621 arm_address_register_rtx_p (rtx x, int strict_p)
7623 int regno;
7625 if (!REG_P (x))
7626 return 0;
7628 regno = REGNO (x);
7630 if (strict_p)
7631 return ARM_REGNO_OK_FOR_BASE_P (regno);
7633 return (regno <= LAST_ARM_REGNUM
7634 || regno >= FIRST_PSEUDO_REGISTER
7635 || regno == FRAME_POINTER_REGNUM
7636 || regno == ARG_POINTER_REGNUM);
7639 /* Return TRUE if this rtx is the difference of a symbol and a label,
7640 and will reduce to a PC-relative relocation in the object file.
7641 Expressions like this can be left alone when generating PIC, rather
7642 than forced through the GOT. */
7643 static int
7644 pcrel_constant_p (rtx x)
7646 if (GET_CODE (x) == MINUS)
7647 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
7649 return FALSE;
7652 /* Return true if X will surely end up in an index register after next
7653 splitting pass. */
7654 static bool
7655 will_be_in_index_register (const_rtx x)
7657 /* arm.md: calculate_pic_address will split this into a register. */
7658 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
7661 /* Return nonzero if X is a valid ARM state address operand. */
7663 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
7664 int strict_p)
7666 bool use_ldrd;
7667 enum rtx_code code = GET_CODE (x);
7669 if (arm_address_register_rtx_p (x, strict_p))
7670 return 1;
7672 use_ldrd = (TARGET_LDRD
7673 && (mode == DImode || mode == DFmode));
7675 if (code == POST_INC || code == PRE_DEC
7676 || ((code == PRE_INC || code == POST_DEC)
7677 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7678 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7680 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7681 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7682 && GET_CODE (XEXP (x, 1)) == PLUS
7683 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7685 rtx addend = XEXP (XEXP (x, 1), 1);
7687 /* Don't allow ldrd post increment by register because it's hard
7688 to fixup invalid register choices. */
7689 if (use_ldrd
7690 && GET_CODE (x) == POST_MODIFY
7691 && REG_P (addend))
7692 return 0;
7694 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
7695 && arm_legitimate_index_p (mode, addend, outer, strict_p));
7698 /* After reload constants split into minipools will have addresses
7699 from a LABEL_REF. */
7700 else if (reload_completed
7701 && (code == LABEL_REF
7702 || (code == CONST
7703 && GET_CODE (XEXP (x, 0)) == PLUS
7704 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7705 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7706 return 1;
7708 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7709 return 0;
7711 else if (code == PLUS)
7713 rtx xop0 = XEXP (x, 0);
7714 rtx xop1 = XEXP (x, 1);
7716 return ((arm_address_register_rtx_p (xop0, strict_p)
7717 && ((CONST_INT_P (xop1)
7718 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
7719 || (!strict_p && will_be_in_index_register (xop1))))
7720 || (arm_address_register_rtx_p (xop1, strict_p)
7721 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
7724 #if 0
7725 /* Reload currently can't handle MINUS, so disable this for now */
7726 else if (GET_CODE (x) == MINUS)
7728 rtx xop0 = XEXP (x, 0);
7729 rtx xop1 = XEXP (x, 1);
7731 return (arm_address_register_rtx_p (xop0, strict_p)
7732 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
7734 #endif
7736 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7737 && code == SYMBOL_REF
7738 && CONSTANT_POOL_ADDRESS_P (x)
7739 && ! (flag_pic
7740 && symbol_mentioned_p (get_pool_constant (x))
7741 && ! pcrel_constant_p (get_pool_constant (x))))
7742 return 1;
7744 return 0;
7747 /* Return true if we can avoid creating a constant pool entry for x. */
7748 static bool
7749 can_avoid_literal_pool_for_label_p (rtx x)
7751 /* Normally we can assign constant values to target registers without
7752 the help of constant pool. But there are cases we have to use constant
7753 pool like:
7754 1) assign a label to register.
7755 2) sign-extend a 8bit value to 32bit and then assign to register.
7757 Constant pool access in format:
7758 (set (reg r0) (mem (symbol_ref (".LC0"))))
7759 will cause the use of literal pool (later in function arm_reorg).
7760 So here we mark such format as an invalid format, then the compiler
7761 will adjust it into:
7762 (set (reg r0) (symbol_ref (".LC0")))
7763 (set (reg r0) (mem (reg r0))).
7764 No extra register is required, and (mem (reg r0)) won't cause the use
7765 of literal pools. */
7766 if (arm_disable_literal_pool && GET_CODE (x) == SYMBOL_REF
7767 && CONSTANT_POOL_ADDRESS_P (x))
7768 return 1;
7769 return 0;
7773 /* Return nonzero if X is a valid Thumb-2 address operand. */
7774 static int
7775 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7777 bool use_ldrd;
7778 enum rtx_code code = GET_CODE (x);
7780 if (arm_address_register_rtx_p (x, strict_p))
7781 return 1;
7783 use_ldrd = (TARGET_LDRD
7784 && (mode == DImode || mode == DFmode));
7786 if (code == POST_INC || code == PRE_DEC
7787 || ((code == PRE_INC || code == POST_DEC)
7788 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7789 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7791 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7792 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7793 && GET_CODE (XEXP (x, 1)) == PLUS
7794 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7796 /* Thumb-2 only has autoincrement by constant. */
7797 rtx addend = XEXP (XEXP (x, 1), 1);
7798 HOST_WIDE_INT offset;
7800 if (!CONST_INT_P (addend))
7801 return 0;
7803 offset = INTVAL(addend);
7804 if (GET_MODE_SIZE (mode) <= 4)
7805 return (offset > -256 && offset < 256);
7807 return (use_ldrd && offset > -1024 && offset < 1024
7808 && (offset & 3) == 0);
7811 /* After reload constants split into minipools will have addresses
7812 from a LABEL_REF. */
7813 else if (reload_completed
7814 && (code == LABEL_REF
7815 || (code == CONST
7816 && GET_CODE (XEXP (x, 0)) == PLUS
7817 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7818 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7819 return 1;
7821 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7822 return 0;
7824 else if (code == PLUS)
7826 rtx xop0 = XEXP (x, 0);
7827 rtx xop1 = XEXP (x, 1);
7829 return ((arm_address_register_rtx_p (xop0, strict_p)
7830 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
7831 || (!strict_p && will_be_in_index_register (xop1))))
7832 || (arm_address_register_rtx_p (xop1, strict_p)
7833 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
7836 else if (can_avoid_literal_pool_for_label_p (x))
7837 return 0;
7839 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7840 && code == SYMBOL_REF
7841 && CONSTANT_POOL_ADDRESS_P (x)
7842 && ! (flag_pic
7843 && symbol_mentioned_p (get_pool_constant (x))
7844 && ! pcrel_constant_p (get_pool_constant (x))))
7845 return 1;
7847 return 0;
7850 /* Return nonzero if INDEX is valid for an address index operand in
7851 ARM state. */
7852 static int
7853 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
7854 int strict_p)
7856 HOST_WIDE_INT range;
7857 enum rtx_code code = GET_CODE (index);
7859 /* Standard coprocessor addressing modes. */
7860 if (TARGET_HARD_FLOAT
7861 && (mode == SFmode || mode == DFmode))
7862 return (code == CONST_INT && INTVAL (index) < 1024
7863 && INTVAL (index) > -1024
7864 && (INTVAL (index) & 3) == 0);
7866 /* For quad modes, we restrict the constant offset to be slightly less
7867 than what the instruction format permits. We do this because for
7868 quad mode moves, we will actually decompose them into two separate
7869 double-mode reads or writes. INDEX must therefore be a valid
7870 (double-mode) offset and so should INDEX+8. */
7871 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7872 return (code == CONST_INT
7873 && INTVAL (index) < 1016
7874 && INTVAL (index) > -1024
7875 && (INTVAL (index) & 3) == 0);
7877 /* We have no such constraint on double mode offsets, so we permit the
7878 full range of the instruction format. */
7879 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7880 return (code == CONST_INT
7881 && INTVAL (index) < 1024
7882 && INTVAL (index) > -1024
7883 && (INTVAL (index) & 3) == 0);
7885 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7886 return (code == CONST_INT
7887 && INTVAL (index) < 1024
7888 && INTVAL (index) > -1024
7889 && (INTVAL (index) & 3) == 0);
7891 if (arm_address_register_rtx_p (index, strict_p)
7892 && (GET_MODE_SIZE (mode) <= 4))
7893 return 1;
7895 if (mode == DImode || mode == DFmode)
7897 if (code == CONST_INT)
7899 HOST_WIDE_INT val = INTVAL (index);
7901 if (TARGET_LDRD)
7902 return val > -256 && val < 256;
7903 else
7904 return val > -4096 && val < 4092;
7907 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
7910 if (GET_MODE_SIZE (mode) <= 4
7911 && ! (arm_arch4
7912 && (mode == HImode
7913 || mode == HFmode
7914 || (mode == QImode && outer == SIGN_EXTEND))))
7916 if (code == MULT)
7918 rtx xiop0 = XEXP (index, 0);
7919 rtx xiop1 = XEXP (index, 1);
7921 return ((arm_address_register_rtx_p (xiop0, strict_p)
7922 && power_of_two_operand (xiop1, SImode))
7923 || (arm_address_register_rtx_p (xiop1, strict_p)
7924 && power_of_two_operand (xiop0, SImode)));
7926 else if (code == LSHIFTRT || code == ASHIFTRT
7927 || code == ASHIFT || code == ROTATERT)
7929 rtx op = XEXP (index, 1);
7931 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7932 && CONST_INT_P (op)
7933 && INTVAL (op) > 0
7934 && INTVAL (op) <= 31);
7938 /* For ARM v4 we may be doing a sign-extend operation during the
7939 load. */
7940 if (arm_arch4)
7942 if (mode == HImode
7943 || mode == HFmode
7944 || (outer == SIGN_EXTEND && mode == QImode))
7945 range = 256;
7946 else
7947 range = 4096;
7949 else
7950 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
7952 return (code == CONST_INT
7953 && INTVAL (index) < range
7954 && INTVAL (index) > -range);
7957 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7958 index operand. i.e. 1, 2, 4 or 8. */
7959 static bool
7960 thumb2_index_mul_operand (rtx op)
7962 HOST_WIDE_INT val;
7964 if (!CONST_INT_P (op))
7965 return false;
7967 val = INTVAL(op);
7968 return (val == 1 || val == 2 || val == 4 || val == 8);
7971 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
7972 static int
7973 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
7975 enum rtx_code code = GET_CODE (index);
7977 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
7978 /* Standard coprocessor addressing modes. */
7979 if (TARGET_HARD_FLOAT
7980 && (mode == SFmode || mode == DFmode))
7981 return (code == CONST_INT && INTVAL (index) < 1024
7982 /* Thumb-2 allows only > -256 index range for it's core register
7983 load/stores. Since we allow SF/DF in core registers, we have
7984 to use the intersection between -256~4096 (core) and -1024~1024
7985 (coprocessor). */
7986 && INTVAL (index) > -256
7987 && (INTVAL (index) & 3) == 0);
7989 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7991 /* For DImode assume values will usually live in core regs
7992 and only allow LDRD addressing modes. */
7993 if (!TARGET_LDRD || mode != DImode)
7994 return (code == CONST_INT
7995 && INTVAL (index) < 1024
7996 && INTVAL (index) > -1024
7997 && (INTVAL (index) & 3) == 0);
8000 /* For quad modes, we restrict the constant offset to be slightly less
8001 than what the instruction format permits. We do this because for
8002 quad mode moves, we will actually decompose them into two separate
8003 double-mode reads or writes. INDEX must therefore be a valid
8004 (double-mode) offset and so should INDEX+8. */
8005 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8006 return (code == CONST_INT
8007 && INTVAL (index) < 1016
8008 && INTVAL (index) > -1024
8009 && (INTVAL (index) & 3) == 0);
8011 /* We have no such constraint on double mode offsets, so we permit the
8012 full range of the instruction format. */
8013 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8014 return (code == CONST_INT
8015 && INTVAL (index) < 1024
8016 && INTVAL (index) > -1024
8017 && (INTVAL (index) & 3) == 0);
8019 if (arm_address_register_rtx_p (index, strict_p)
8020 && (GET_MODE_SIZE (mode) <= 4))
8021 return 1;
8023 if (mode == DImode || mode == DFmode)
8025 if (code == CONST_INT)
8027 HOST_WIDE_INT val = INTVAL (index);
8028 /* ??? Can we assume ldrd for thumb2? */
8029 /* Thumb-2 ldrd only has reg+const addressing modes. */
8030 /* ldrd supports offsets of +-1020.
8031 However the ldr fallback does not. */
8032 return val > -256 && val < 256 && (val & 3) == 0;
8034 else
8035 return 0;
8038 if (code == MULT)
8040 rtx xiop0 = XEXP (index, 0);
8041 rtx xiop1 = XEXP (index, 1);
8043 return ((arm_address_register_rtx_p (xiop0, strict_p)
8044 && thumb2_index_mul_operand (xiop1))
8045 || (arm_address_register_rtx_p (xiop1, strict_p)
8046 && thumb2_index_mul_operand (xiop0)));
8048 else if (code == ASHIFT)
8050 rtx op = XEXP (index, 1);
8052 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8053 && CONST_INT_P (op)
8054 && INTVAL (op) > 0
8055 && INTVAL (op) <= 3);
8058 return (code == CONST_INT
8059 && INTVAL (index) < 4096
8060 && INTVAL (index) > -256);
8063 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
8064 static int
8065 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
8067 int regno;
8069 if (!REG_P (x))
8070 return 0;
8072 regno = REGNO (x);
8074 if (strict_p)
8075 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
8077 return (regno <= LAST_LO_REGNUM
8078 || regno > LAST_VIRTUAL_REGISTER
8079 || regno == FRAME_POINTER_REGNUM
8080 || (GET_MODE_SIZE (mode) >= 4
8081 && (regno == STACK_POINTER_REGNUM
8082 || regno >= FIRST_PSEUDO_REGISTER
8083 || x == hard_frame_pointer_rtx
8084 || x == arg_pointer_rtx)));
8087 /* Return nonzero if x is a legitimate index register. This is the case
8088 for any base register that can access a QImode object. */
8089 inline static int
8090 thumb1_index_register_rtx_p (rtx x, int strict_p)
8092 return thumb1_base_register_rtx_p (x, QImode, strict_p);
8095 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8097 The AP may be eliminated to either the SP or the FP, so we use the
8098 least common denominator, e.g. SImode, and offsets from 0 to 64.
8100 ??? Verify whether the above is the right approach.
8102 ??? Also, the FP may be eliminated to the SP, so perhaps that
8103 needs special handling also.
8105 ??? Look at how the mips16 port solves this problem. It probably uses
8106 better ways to solve some of these problems.
8108 Although it is not incorrect, we don't accept QImode and HImode
8109 addresses based on the frame pointer or arg pointer until the
8110 reload pass starts. This is so that eliminating such addresses
8111 into stack based ones won't produce impossible code. */
8113 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8115 if (TARGET_HAVE_MOVT && can_avoid_literal_pool_for_label_p (x))
8116 return 0;
8118 /* ??? Not clear if this is right. Experiment. */
8119 if (GET_MODE_SIZE (mode) < 4
8120 && !(reload_in_progress || reload_completed)
8121 && (reg_mentioned_p (frame_pointer_rtx, x)
8122 || reg_mentioned_p (arg_pointer_rtx, x)
8123 || reg_mentioned_p (virtual_incoming_args_rtx, x)
8124 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
8125 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
8126 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
8127 return 0;
8129 /* Accept any base register. SP only in SImode or larger. */
8130 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
8131 return 1;
8133 /* This is PC relative data before arm_reorg runs. */
8134 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
8135 && GET_CODE (x) == SYMBOL_REF
8136 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
8137 return 1;
8139 /* This is PC relative data after arm_reorg runs. */
8140 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
8141 && reload_completed
8142 && (GET_CODE (x) == LABEL_REF
8143 || (GET_CODE (x) == CONST
8144 && GET_CODE (XEXP (x, 0)) == PLUS
8145 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8146 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8147 return 1;
8149 /* Post-inc indexing only supported for SImode and larger. */
8150 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
8151 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
8152 return 1;
8154 else if (GET_CODE (x) == PLUS)
8156 /* REG+REG address can be any two index registers. */
8157 /* We disallow FRAME+REG addressing since we know that FRAME
8158 will be replaced with STACK, and SP relative addressing only
8159 permits SP+OFFSET. */
8160 if (GET_MODE_SIZE (mode) <= 4
8161 && XEXP (x, 0) != frame_pointer_rtx
8162 && XEXP (x, 1) != frame_pointer_rtx
8163 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8164 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
8165 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
8166 return 1;
8168 /* REG+const has 5-7 bit offset for non-SP registers. */
8169 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8170 || XEXP (x, 0) == arg_pointer_rtx)
8171 && CONST_INT_P (XEXP (x, 1))
8172 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
8173 return 1;
8175 /* REG+const has 10-bit offset for SP, but only SImode and
8176 larger is supported. */
8177 /* ??? Should probably check for DI/DFmode overflow here
8178 just like GO_IF_LEGITIMATE_OFFSET does. */
8179 else if (REG_P (XEXP (x, 0))
8180 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
8181 && GET_MODE_SIZE (mode) >= 4
8182 && CONST_INT_P (XEXP (x, 1))
8183 && INTVAL (XEXP (x, 1)) >= 0
8184 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
8185 && (INTVAL (XEXP (x, 1)) & 3) == 0)
8186 return 1;
8188 else if (REG_P (XEXP (x, 0))
8189 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
8190 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
8191 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
8192 && REGNO (XEXP (x, 0))
8193 <= LAST_VIRTUAL_POINTER_REGISTER))
8194 && GET_MODE_SIZE (mode) >= 4
8195 && CONST_INT_P (XEXP (x, 1))
8196 && (INTVAL (XEXP (x, 1)) & 3) == 0)
8197 return 1;
8200 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8201 && GET_MODE_SIZE (mode) == 4
8202 && GET_CODE (x) == SYMBOL_REF
8203 && CONSTANT_POOL_ADDRESS_P (x)
8204 && ! (flag_pic
8205 && symbol_mentioned_p (get_pool_constant (x))
8206 && ! pcrel_constant_p (get_pool_constant (x))))
8207 return 1;
8209 return 0;
8212 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
8213 instruction of mode MODE. */
8215 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
8217 switch (GET_MODE_SIZE (mode))
8219 case 1:
8220 return val >= 0 && val < 32;
8222 case 2:
8223 return val >= 0 && val < 64 && (val & 1) == 0;
8225 default:
8226 return (val >= 0
8227 && (val + GET_MODE_SIZE (mode)) <= 128
8228 && (val & 3) == 0);
8232 bool
8233 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
8235 if (TARGET_ARM)
8236 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
8237 else if (TARGET_THUMB2)
8238 return thumb2_legitimate_address_p (mode, x, strict_p);
8239 else /* if (TARGET_THUMB1) */
8240 return thumb1_legitimate_address_p (mode, x, strict_p);
8243 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
8245 Given an rtx X being reloaded into a reg required to be
8246 in class CLASS, return the class of reg to actually use.
8247 In general this is just CLASS, but for the Thumb core registers and
8248 immediate constants we prefer a LO_REGS class or a subset. */
8250 static reg_class_t
8251 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
8253 if (TARGET_32BIT)
8254 return rclass;
8255 else
8257 if (rclass == GENERAL_REGS)
8258 return LO_REGS;
8259 else
8260 return rclass;
8264 /* Build the SYMBOL_REF for __tls_get_addr. */
8266 static GTY(()) rtx tls_get_addr_libfunc;
8268 static rtx
8269 get_tls_get_addr (void)
8271 if (!tls_get_addr_libfunc)
8272 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
8273 return tls_get_addr_libfunc;
8277 arm_load_tp (rtx target)
8279 if (!target)
8280 target = gen_reg_rtx (SImode);
8282 if (TARGET_HARD_TP)
8284 /* Can return in any reg. */
8285 emit_insn (gen_load_tp_hard (target));
8287 else
8289 /* Always returned in r0. Immediately copy the result into a pseudo,
8290 otherwise other uses of r0 (e.g. setting up function arguments) may
8291 clobber the value. */
8293 rtx tmp;
8295 emit_insn (gen_load_tp_soft ());
8297 tmp = gen_rtx_REG (SImode, R0_REGNUM);
8298 emit_move_insn (target, tmp);
8300 return target;
8303 static rtx
8304 load_tls_operand (rtx x, rtx reg)
8306 rtx tmp;
8308 if (reg == NULL_RTX)
8309 reg = gen_reg_rtx (SImode);
8311 tmp = gen_rtx_CONST (SImode, x);
8313 emit_move_insn (reg, tmp);
8315 return reg;
8318 static rtx_insn *
8319 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
8321 rtx label, labelno, sum;
8323 gcc_assert (reloc != TLS_DESCSEQ);
8324 start_sequence ();
8326 labelno = GEN_INT (pic_labelno++);
8327 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8328 label = gen_rtx_CONST (VOIDmode, label);
8330 sum = gen_rtx_UNSPEC (Pmode,
8331 gen_rtvec (4, x, GEN_INT (reloc), label,
8332 GEN_INT (TARGET_ARM ? 8 : 4)),
8333 UNSPEC_TLS);
8334 reg = load_tls_operand (sum, reg);
8336 if (TARGET_ARM)
8337 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
8338 else
8339 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8341 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
8342 LCT_PURE, /* LCT_CONST? */
8343 Pmode, 1, reg, Pmode);
8345 rtx_insn *insns = get_insns ();
8346 end_sequence ();
8348 return insns;
8351 static rtx
8352 arm_tls_descseq_addr (rtx x, rtx reg)
8354 rtx labelno = GEN_INT (pic_labelno++);
8355 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8356 rtx sum = gen_rtx_UNSPEC (Pmode,
8357 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
8358 gen_rtx_CONST (VOIDmode, label),
8359 GEN_INT (!TARGET_ARM)),
8360 UNSPEC_TLS);
8361 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
8363 emit_insn (gen_tlscall (x, labelno));
8364 if (!reg)
8365 reg = gen_reg_rtx (SImode);
8366 else
8367 gcc_assert (REGNO (reg) != R0_REGNUM);
8369 emit_move_insn (reg, reg0);
8371 return reg;
8375 legitimize_tls_address (rtx x, rtx reg)
8377 rtx dest, tp, label, labelno, sum, ret, eqv, addend;
8378 rtx_insn *insns;
8379 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
8381 switch (model)
8383 case TLS_MODEL_GLOBAL_DYNAMIC:
8384 if (TARGET_GNU2_TLS)
8386 reg = arm_tls_descseq_addr (x, reg);
8388 tp = arm_load_tp (NULL_RTX);
8390 dest = gen_rtx_PLUS (Pmode, tp, reg);
8392 else
8394 /* Original scheme */
8395 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
8396 dest = gen_reg_rtx (Pmode);
8397 emit_libcall_block (insns, dest, ret, x);
8399 return dest;
8401 case TLS_MODEL_LOCAL_DYNAMIC:
8402 if (TARGET_GNU2_TLS)
8404 reg = arm_tls_descseq_addr (x, reg);
8406 tp = arm_load_tp (NULL_RTX);
8408 dest = gen_rtx_PLUS (Pmode, tp, reg);
8410 else
8412 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
8414 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
8415 share the LDM result with other LD model accesses. */
8416 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
8417 UNSPEC_TLS);
8418 dest = gen_reg_rtx (Pmode);
8419 emit_libcall_block (insns, dest, ret, eqv);
8421 /* Load the addend. */
8422 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
8423 GEN_INT (TLS_LDO32)),
8424 UNSPEC_TLS);
8425 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
8426 dest = gen_rtx_PLUS (Pmode, dest, addend);
8428 return dest;
8430 case TLS_MODEL_INITIAL_EXEC:
8431 labelno = GEN_INT (pic_labelno++);
8432 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8433 label = gen_rtx_CONST (VOIDmode, label);
8434 sum = gen_rtx_UNSPEC (Pmode,
8435 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
8436 GEN_INT (TARGET_ARM ? 8 : 4)),
8437 UNSPEC_TLS);
8438 reg = load_tls_operand (sum, reg);
8440 if (TARGET_ARM)
8441 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
8442 else if (TARGET_THUMB2)
8443 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
8444 else
8446 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8447 emit_move_insn (reg, gen_const_mem (SImode, reg));
8450 tp = arm_load_tp (NULL_RTX);
8452 return gen_rtx_PLUS (Pmode, tp, reg);
8454 case TLS_MODEL_LOCAL_EXEC:
8455 tp = arm_load_tp (NULL_RTX);
8457 reg = gen_rtx_UNSPEC (Pmode,
8458 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
8459 UNSPEC_TLS);
8460 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
8462 return gen_rtx_PLUS (Pmode, tp, reg);
8464 default:
8465 abort ();
8469 /* Try machine-dependent ways of modifying an illegitimate address
8470 to be legitimate. If we find one, return the new, valid address. */
8472 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8474 if (arm_tls_referenced_p (x))
8476 rtx addend = NULL;
8478 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
8480 addend = XEXP (XEXP (x, 0), 1);
8481 x = XEXP (XEXP (x, 0), 0);
8484 if (GET_CODE (x) != SYMBOL_REF)
8485 return x;
8487 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
8489 x = legitimize_tls_address (x, NULL_RTX);
8491 if (addend)
8493 x = gen_rtx_PLUS (SImode, x, addend);
8494 orig_x = x;
8496 else
8497 return x;
8500 if (!TARGET_ARM)
8502 /* TODO: legitimize_address for Thumb2. */
8503 if (TARGET_THUMB2)
8504 return x;
8505 return thumb_legitimize_address (x, orig_x, mode);
8508 if (GET_CODE (x) == PLUS)
8510 rtx xop0 = XEXP (x, 0);
8511 rtx xop1 = XEXP (x, 1);
8513 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
8514 xop0 = force_reg (SImode, xop0);
8516 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
8517 && !symbol_mentioned_p (xop1))
8518 xop1 = force_reg (SImode, xop1);
8520 if (ARM_BASE_REGISTER_RTX_P (xop0)
8521 && CONST_INT_P (xop1))
8523 HOST_WIDE_INT n, low_n;
8524 rtx base_reg, val;
8525 n = INTVAL (xop1);
8527 /* VFP addressing modes actually allow greater offsets, but for
8528 now we just stick with the lowest common denominator. */
8529 if (mode == DImode || mode == DFmode)
8531 low_n = n & 0x0f;
8532 n &= ~0x0f;
8533 if (low_n > 4)
8535 n += 16;
8536 low_n -= 16;
8539 else
8541 low_n = ((mode) == TImode ? 0
8542 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
8543 n -= low_n;
8546 base_reg = gen_reg_rtx (SImode);
8547 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
8548 emit_move_insn (base_reg, val);
8549 x = plus_constant (Pmode, base_reg, low_n);
8551 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8552 x = gen_rtx_PLUS (SImode, xop0, xop1);
8555 /* XXX We don't allow MINUS any more -- see comment in
8556 arm_legitimate_address_outer_p (). */
8557 else if (GET_CODE (x) == MINUS)
8559 rtx xop0 = XEXP (x, 0);
8560 rtx xop1 = XEXP (x, 1);
8562 if (CONSTANT_P (xop0))
8563 xop0 = force_reg (SImode, xop0);
8565 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
8566 xop1 = force_reg (SImode, xop1);
8568 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8569 x = gen_rtx_MINUS (SImode, xop0, xop1);
8572 /* Make sure to take full advantage of the pre-indexed addressing mode
8573 with absolute addresses which often allows for the base register to
8574 be factorized for multiple adjacent memory references, and it might
8575 even allows for the mini pool to be avoided entirely. */
8576 else if (CONST_INT_P (x) && optimize > 0)
8578 unsigned int bits;
8579 HOST_WIDE_INT mask, base, index;
8580 rtx base_reg;
8582 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
8583 use a 8-bit index. So let's use a 12-bit index for SImode only and
8584 hope that arm_gen_constant will enable ldrb to use more bits. */
8585 bits = (mode == SImode) ? 12 : 8;
8586 mask = (1 << bits) - 1;
8587 base = INTVAL (x) & ~mask;
8588 index = INTVAL (x) & mask;
8589 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
8591 /* It'll most probably be more efficient to generate the base
8592 with more bits set and use a negative index instead. */
8593 base |= mask;
8594 index -= mask;
8596 base_reg = force_reg (SImode, GEN_INT (base));
8597 x = plus_constant (Pmode, base_reg, index);
8600 if (flag_pic)
8602 /* We need to find and carefully transform any SYMBOL and LABEL
8603 references; so go back to the original address expression. */
8604 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8606 if (new_x != orig_x)
8607 x = new_x;
8610 return x;
8614 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8615 to be legitimate. If we find one, return the new, valid address. */
8617 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8619 if (GET_CODE (x) == PLUS
8620 && CONST_INT_P (XEXP (x, 1))
8621 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
8622 || INTVAL (XEXP (x, 1)) < 0))
8624 rtx xop0 = XEXP (x, 0);
8625 rtx xop1 = XEXP (x, 1);
8626 HOST_WIDE_INT offset = INTVAL (xop1);
8628 /* Try and fold the offset into a biasing of the base register and
8629 then offsetting that. Don't do this when optimizing for space
8630 since it can cause too many CSEs. */
8631 if (optimize_size && offset >= 0
8632 && offset < 256 + 31 * GET_MODE_SIZE (mode))
8634 HOST_WIDE_INT delta;
8636 if (offset >= 256)
8637 delta = offset - (256 - GET_MODE_SIZE (mode));
8638 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
8639 delta = 31 * GET_MODE_SIZE (mode);
8640 else
8641 delta = offset & (~31 * GET_MODE_SIZE (mode));
8643 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
8644 NULL_RTX);
8645 x = plus_constant (Pmode, xop0, delta);
8647 else if (offset < 0 && offset > -256)
8648 /* Small negative offsets are best done with a subtract before the
8649 dereference, forcing these into a register normally takes two
8650 instructions. */
8651 x = force_operand (x, NULL_RTX);
8652 else
8654 /* For the remaining cases, force the constant into a register. */
8655 xop1 = force_reg (SImode, xop1);
8656 x = gen_rtx_PLUS (SImode, xop0, xop1);
8659 else if (GET_CODE (x) == PLUS
8660 && s_register_operand (XEXP (x, 1), SImode)
8661 && !s_register_operand (XEXP (x, 0), SImode))
8663 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
8665 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
8668 if (flag_pic)
8670 /* We need to find and carefully transform any SYMBOL and LABEL
8671 references; so go back to the original address expression. */
8672 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8674 if (new_x != orig_x)
8675 x = new_x;
8678 return x;
8681 /* Return TRUE if X contains any TLS symbol references. */
8683 bool
8684 arm_tls_referenced_p (rtx x)
8686 if (! TARGET_HAVE_TLS)
8687 return false;
8689 subrtx_iterator::array_type array;
8690 FOR_EACH_SUBRTX (iter, array, x, ALL)
8692 const_rtx x = *iter;
8693 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
8695 /* ARM currently does not provide relocations to encode TLS variables
8696 into AArch32 instructions, only data, so there is no way to
8697 currently implement these if a literal pool is disabled. */
8698 if (arm_disable_literal_pool)
8699 sorry ("accessing thread-local storage is not currently supported "
8700 "with -mpure-code or -mslow-flash-data");
8702 return true;
8705 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8706 TLS offsets, not real symbol references. */
8707 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8708 iter.skip_subrtxes ();
8710 return false;
8713 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8715 On the ARM, allow any integer (invalid ones are removed later by insn
8716 patterns), nice doubles and symbol_refs which refer to the function's
8717 constant pool XXX.
8719 When generating pic allow anything. */
8721 static bool
8722 arm_legitimate_constant_p_1 (machine_mode, rtx x)
8724 return flag_pic || !label_mentioned_p (x);
8727 static bool
8728 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8730 /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
8731 RTX. These RTX must therefore be allowed for Thumb-1 so that when run
8732 for ARMv8-M Baseline or later the result is valid. */
8733 if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
8734 x = XEXP (x, 0);
8736 return (CONST_INT_P (x)
8737 || CONST_DOUBLE_P (x)
8738 || CONSTANT_ADDRESS_P (x)
8739 || (TARGET_HAVE_MOVT && GET_CODE (x) == SYMBOL_REF)
8740 || flag_pic);
8743 static bool
8744 arm_legitimate_constant_p (machine_mode mode, rtx x)
8746 return (!arm_cannot_force_const_mem (mode, x)
8747 && (TARGET_32BIT
8748 ? arm_legitimate_constant_p_1 (mode, x)
8749 : thumb_legitimate_constant_p (mode, x)));
8752 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8754 static bool
8755 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8757 rtx base, offset;
8759 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8761 split_const (x, &base, &offset);
8762 if (GET_CODE (base) == SYMBOL_REF
8763 && !offset_within_block_p (base, INTVAL (offset)))
8764 return true;
8766 return arm_tls_referenced_p (x);
8769 #define REG_OR_SUBREG_REG(X) \
8770 (REG_P (X) \
8771 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8773 #define REG_OR_SUBREG_RTX(X) \
8774 (REG_P (X) ? (X) : SUBREG_REG (X))
8776 static inline int
8777 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8779 machine_mode mode = GET_MODE (x);
8780 int total, words;
8782 switch (code)
8784 case ASHIFT:
8785 case ASHIFTRT:
8786 case LSHIFTRT:
8787 case ROTATERT:
8788 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8790 case PLUS:
8791 case MINUS:
8792 case COMPARE:
8793 case NEG:
8794 case NOT:
8795 return COSTS_N_INSNS (1);
8797 case MULT:
8798 if (arm_arch6m && arm_m_profile_small_mul)
8799 return COSTS_N_INSNS (32);
8801 if (CONST_INT_P (XEXP (x, 1)))
8803 int cycles = 0;
8804 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8806 while (i)
8808 i >>= 2;
8809 cycles++;
8811 return COSTS_N_INSNS (2) + cycles;
8813 return COSTS_N_INSNS (1) + 16;
8815 case SET:
8816 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8817 the mode. */
8818 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8819 return (COSTS_N_INSNS (words)
8820 + 4 * ((MEM_P (SET_SRC (x)))
8821 + MEM_P (SET_DEST (x))));
8823 case CONST_INT:
8824 if (outer == SET)
8826 if (UINTVAL (x) < 256
8827 /* 16-bit constant. */
8828 || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
8829 return 0;
8830 if (thumb_shiftable_const (INTVAL (x)))
8831 return COSTS_N_INSNS (2);
8832 return COSTS_N_INSNS (3);
8834 else if ((outer == PLUS || outer == COMPARE)
8835 && INTVAL (x) < 256 && INTVAL (x) > -256)
8836 return 0;
8837 else if ((outer == IOR || outer == XOR || outer == AND)
8838 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8839 return COSTS_N_INSNS (1);
8840 else if (outer == AND)
8842 int i;
8843 /* This duplicates the tests in the andsi3 expander. */
8844 for (i = 9; i <= 31; i++)
8845 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
8846 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
8847 return COSTS_N_INSNS (2);
8849 else if (outer == ASHIFT || outer == ASHIFTRT
8850 || outer == LSHIFTRT)
8851 return 0;
8852 return COSTS_N_INSNS (2);
8854 case CONST:
8855 case CONST_DOUBLE:
8856 case LABEL_REF:
8857 case SYMBOL_REF:
8858 return COSTS_N_INSNS (3);
8860 case UDIV:
8861 case UMOD:
8862 case DIV:
8863 case MOD:
8864 return 100;
8866 case TRUNCATE:
8867 return 99;
8869 case AND:
8870 case XOR:
8871 case IOR:
8872 /* XXX guess. */
8873 return 8;
8875 case MEM:
8876 /* XXX another guess. */
8877 /* Memory costs quite a lot for the first word, but subsequent words
8878 load at the equivalent of a single insn each. */
8879 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8880 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8881 ? 4 : 0));
8883 case IF_THEN_ELSE:
8884 /* XXX a guess. */
8885 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8886 return 14;
8887 return 2;
8889 case SIGN_EXTEND:
8890 case ZERO_EXTEND:
8891 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
8892 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
8894 if (mode == SImode)
8895 return total;
8897 if (arm_arch6)
8898 return total + COSTS_N_INSNS (1);
8900 /* Assume a two-shift sequence. Increase the cost slightly so
8901 we prefer actual shifts over an extend operation. */
8902 return total + 1 + COSTS_N_INSNS (2);
8904 default:
8905 return 99;
8909 /* Estimates the size cost of thumb1 instructions.
8910 For now most of the code is copied from thumb1_rtx_costs. We need more
8911 fine grain tuning when we have more related test cases. */
8912 static inline int
8913 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8915 machine_mode mode = GET_MODE (x);
8916 int words, cost;
8918 switch (code)
8920 case ASHIFT:
8921 case ASHIFTRT:
8922 case LSHIFTRT:
8923 case ROTATERT:
8924 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8926 case PLUS:
8927 case MINUS:
8928 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8929 defined by RTL expansion, especially for the expansion of
8930 multiplication. */
8931 if ((GET_CODE (XEXP (x, 0)) == MULT
8932 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
8933 || (GET_CODE (XEXP (x, 1)) == MULT
8934 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
8935 return COSTS_N_INSNS (2);
8936 /* Fall through. */
8937 case COMPARE:
8938 case NEG:
8939 case NOT:
8940 return COSTS_N_INSNS (1);
8942 case MULT:
8943 if (CONST_INT_P (XEXP (x, 1)))
8945 /* Thumb1 mul instruction can't operate on const. We must Load it
8946 into a register first. */
8947 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
8948 /* For the targets which have a very small and high-latency multiply
8949 unit, we prefer to synthesize the mult with up to 5 instructions,
8950 giving a good balance between size and performance. */
8951 if (arm_arch6m && arm_m_profile_small_mul)
8952 return COSTS_N_INSNS (5);
8953 else
8954 return COSTS_N_INSNS (1) + const_size;
8956 return COSTS_N_INSNS (1);
8958 case SET:
8959 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8960 the mode. */
8961 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8962 cost = COSTS_N_INSNS (words);
8963 if (satisfies_constraint_J (SET_SRC (x))
8964 || satisfies_constraint_K (SET_SRC (x))
8965 /* Too big an immediate for a 2-byte mov, using MOVT. */
8966 || (CONST_INT_P (SET_SRC (x))
8967 && UINTVAL (SET_SRC (x)) >= 256
8968 && TARGET_HAVE_MOVT
8969 && satisfies_constraint_j (SET_SRC (x)))
8970 /* thumb1_movdi_insn. */
8971 || ((words > 1) && MEM_P (SET_SRC (x))))
8972 cost += COSTS_N_INSNS (1);
8973 return cost;
8975 case CONST_INT:
8976 if (outer == SET)
8978 if (UINTVAL (x) < 256)
8979 return COSTS_N_INSNS (1);
8980 /* movw is 4byte long. */
8981 if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
8982 return COSTS_N_INSNS (2);
8983 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
8984 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
8985 return COSTS_N_INSNS (2);
8986 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
8987 if (thumb_shiftable_const (INTVAL (x)))
8988 return COSTS_N_INSNS (2);
8989 return COSTS_N_INSNS (3);
8991 else if ((outer == PLUS || outer == COMPARE)
8992 && INTVAL (x) < 256 && INTVAL (x) > -256)
8993 return 0;
8994 else if ((outer == IOR || outer == XOR || outer == AND)
8995 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8996 return COSTS_N_INSNS (1);
8997 else if (outer == AND)
8999 int i;
9000 /* This duplicates the tests in the andsi3 expander. */
9001 for (i = 9; i <= 31; i++)
9002 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9003 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9004 return COSTS_N_INSNS (2);
9006 else if (outer == ASHIFT || outer == ASHIFTRT
9007 || outer == LSHIFTRT)
9008 return 0;
9009 return COSTS_N_INSNS (2);
9011 case CONST:
9012 case CONST_DOUBLE:
9013 case LABEL_REF:
9014 case SYMBOL_REF:
9015 return COSTS_N_INSNS (3);
9017 case UDIV:
9018 case UMOD:
9019 case DIV:
9020 case MOD:
9021 return 100;
9023 case TRUNCATE:
9024 return 99;
9026 case AND:
9027 case XOR:
9028 case IOR:
9029 return COSTS_N_INSNS (1);
9031 case MEM:
9032 return (COSTS_N_INSNS (1)
9033 + COSTS_N_INSNS (1)
9034 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9035 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9036 ? COSTS_N_INSNS (1) : 0));
9038 case IF_THEN_ELSE:
9039 /* XXX a guess. */
9040 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9041 return 14;
9042 return 2;
9044 case ZERO_EXTEND:
9045 /* XXX still guessing. */
9046 switch (GET_MODE (XEXP (x, 0)))
9048 case QImode:
9049 return (1 + (mode == DImode ? 4 : 0)
9050 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9052 case HImode:
9053 return (4 + (mode == DImode ? 4 : 0)
9054 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9056 case SImode:
9057 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9059 default:
9060 return 99;
9063 default:
9064 return 99;
9068 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9069 operand, then return the operand that is being shifted. If the shift
9070 is not by a constant, then set SHIFT_REG to point to the operand.
9071 Return NULL if OP is not a shifter operand. */
9072 static rtx
9073 shifter_op_p (rtx op, rtx *shift_reg)
9075 enum rtx_code code = GET_CODE (op);
9077 if (code == MULT && CONST_INT_P (XEXP (op, 1))
9078 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9079 return XEXP (op, 0);
9080 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9081 return XEXP (op, 0);
9082 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9083 || code == ASHIFTRT)
9085 if (!CONST_INT_P (XEXP (op, 1)))
9086 *shift_reg = XEXP (op, 1);
9087 return XEXP (op, 0);
9090 return NULL;
9093 static bool
9094 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9096 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9097 rtx_code code = GET_CODE (x);
9098 gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
9100 switch (XINT (x, 1))
9102 case UNSPEC_UNALIGNED_LOAD:
9103 /* We can only do unaligned loads into the integer unit, and we can't
9104 use LDM or LDRD. */
9105 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9106 if (speed_p)
9107 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9108 + extra_cost->ldst.load_unaligned);
9110 #ifdef NOT_YET
9111 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9112 ADDR_SPACE_GENERIC, speed_p);
9113 #endif
9114 return true;
9116 case UNSPEC_UNALIGNED_STORE:
9117 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9118 if (speed_p)
9119 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9120 + extra_cost->ldst.store_unaligned);
9122 *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
9123 #ifdef NOT_YET
9124 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9125 ADDR_SPACE_GENERIC, speed_p);
9126 #endif
9127 return true;
9129 case UNSPEC_VRINTZ:
9130 case UNSPEC_VRINTP:
9131 case UNSPEC_VRINTM:
9132 case UNSPEC_VRINTR:
9133 case UNSPEC_VRINTX:
9134 case UNSPEC_VRINTA:
9135 if (speed_p)
9136 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9138 return true;
9139 default:
9140 *cost = COSTS_N_INSNS (2);
9141 break;
9143 return true;
9146 /* Cost of a libcall. We assume one insn per argument, an amount for the
9147 call (one insn for -Os) and then one for processing the result. */
9148 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9150 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9151 do \
9153 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9154 if (shift_op != NULL \
9155 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9157 if (shift_reg) \
9159 if (speed_p) \
9160 *cost += extra_cost->alu.arith_shift_reg; \
9161 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9162 ASHIFT, 1, speed_p); \
9164 else if (speed_p) \
9165 *cost += extra_cost->alu.arith_shift; \
9167 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
9168 ASHIFT, 0, speed_p) \
9169 + rtx_cost (XEXP (x, 1 - IDX), \
9170 GET_MODE (shift_op), \
9171 OP, 1, speed_p)); \
9172 return true; \
9175 while (0);
9177 /* RTX costs. Make an estimate of the cost of executing the operation
9178 X, which is contained with an operation with code OUTER_CODE.
9179 SPEED_P indicates whether the cost desired is the performance cost,
9180 or the size cost. The estimate is stored in COST and the return
9181 value is TRUE if the cost calculation is final, or FALSE if the
9182 caller should recurse through the operands of X to add additional
9183 costs.
9185 We currently make no attempt to model the size savings of Thumb-2
9186 16-bit instructions. At the normal points in compilation where
9187 this code is called we have no measure of whether the condition
9188 flags are live or not, and thus no realistic way to determine what
9189 the size will eventually be. */
9190 static bool
9191 arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
9192 const struct cpu_cost_table *extra_cost,
9193 int *cost, bool speed_p)
9195 machine_mode mode = GET_MODE (x);
9197 *cost = COSTS_N_INSNS (1);
9199 if (TARGET_THUMB1)
9201 if (speed_p)
9202 *cost = thumb1_rtx_costs (x, code, outer_code);
9203 else
9204 *cost = thumb1_size_rtx_costs (x, code, outer_code);
9205 return true;
9208 switch (code)
9210 case SET:
9211 *cost = 0;
9212 /* SET RTXs don't have a mode so we get it from the destination. */
9213 mode = GET_MODE (SET_DEST (x));
9215 if (REG_P (SET_SRC (x))
9216 && REG_P (SET_DEST (x)))
9218 /* Assume that most copies can be done with a single insn,
9219 unless we don't have HW FP, in which case everything
9220 larger than word mode will require two insns. */
9221 *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9222 && GET_MODE_SIZE (mode) > 4)
9223 || mode == DImode)
9224 ? 2 : 1);
9225 /* Conditional register moves can be encoded
9226 in 16 bits in Thumb mode. */
9227 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9228 *cost >>= 1;
9230 return true;
9233 if (CONST_INT_P (SET_SRC (x)))
9235 /* Handle CONST_INT here, since the value doesn't have a mode
9236 and we would otherwise be unable to work out the true cost. */
9237 *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
9238 0, speed_p);
9239 outer_code = SET;
9240 /* Slightly lower the cost of setting a core reg to a constant.
9241 This helps break up chains and allows for better scheduling. */
9242 if (REG_P (SET_DEST (x))
9243 && REGNO (SET_DEST (x)) <= LR_REGNUM)
9244 *cost -= 1;
9245 x = SET_SRC (x);
9246 /* Immediate moves with an immediate in the range [0, 255] can be
9247 encoded in 16 bits in Thumb mode. */
9248 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9249 && INTVAL (x) >= 0 && INTVAL (x) <=255)
9250 *cost >>= 1;
9251 goto const_int_cost;
9254 return false;
9256 case MEM:
9257 /* A memory access costs 1 insn if the mode is small, or the address is
9258 a single register, otherwise it costs one insn per word. */
9259 if (REG_P (XEXP (x, 0)))
9260 *cost = COSTS_N_INSNS (1);
9261 else if (flag_pic
9262 && GET_CODE (XEXP (x, 0)) == PLUS
9263 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9264 /* This will be split into two instructions.
9265 See arm.md:calculate_pic_address. */
9266 *cost = COSTS_N_INSNS (2);
9267 else
9268 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9270 /* For speed optimizations, add the costs of the address and
9271 accessing memory. */
9272 if (speed_p)
9273 #ifdef NOT_YET
9274 *cost += (extra_cost->ldst.load
9275 + arm_address_cost (XEXP (x, 0), mode,
9276 ADDR_SPACE_GENERIC, speed_p));
9277 #else
9278 *cost += extra_cost->ldst.load;
9279 #endif
9280 return true;
9282 case PARALLEL:
9284 /* Calculations of LDM costs are complex. We assume an initial cost
9285 (ldm_1st) which will load the number of registers mentioned in
9286 ldm_regs_per_insn_1st registers; then each additional
9287 ldm_regs_per_insn_subsequent registers cost one more insn. The
9288 formula for N regs is thus:
9290 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9291 + ldm_regs_per_insn_subsequent - 1)
9292 / ldm_regs_per_insn_subsequent).
9294 Additional costs may also be added for addressing. A similar
9295 formula is used for STM. */
9297 bool is_ldm = load_multiple_operation (x, SImode);
9298 bool is_stm = store_multiple_operation (x, SImode);
9300 if (is_ldm || is_stm)
9302 if (speed_p)
9304 HOST_WIDE_INT nregs = XVECLEN (x, 0);
9305 HOST_WIDE_INT regs_per_insn_1st = is_ldm
9306 ? extra_cost->ldst.ldm_regs_per_insn_1st
9307 : extra_cost->ldst.stm_regs_per_insn_1st;
9308 HOST_WIDE_INT regs_per_insn_sub = is_ldm
9309 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9310 : extra_cost->ldst.stm_regs_per_insn_subsequent;
9312 *cost += regs_per_insn_1st
9313 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9314 + regs_per_insn_sub - 1)
9315 / regs_per_insn_sub);
9316 return true;
9320 return false;
9322 case DIV:
9323 case UDIV:
9324 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9325 && (mode == SFmode || !TARGET_VFP_SINGLE))
9326 *cost += COSTS_N_INSNS (speed_p
9327 ? extra_cost->fp[mode != SFmode].div : 0);
9328 else if (mode == SImode && TARGET_IDIV)
9329 *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
9330 else
9331 *cost = LIBCALL_COST (2);
9332 return false; /* All arguments must be in registers. */
9334 case MOD:
9335 /* MOD by a power of 2 can be expanded as:
9336 rsbs r1, r0, #0
9337 and r0, r0, #(n - 1)
9338 and r1, r1, #(n - 1)
9339 rsbpl r0, r1, #0. */
9340 if (CONST_INT_P (XEXP (x, 1))
9341 && exact_log2 (INTVAL (XEXP (x, 1))) > 0
9342 && mode == SImode)
9344 *cost += COSTS_N_INSNS (3);
9346 if (speed_p)
9347 *cost += 2 * extra_cost->alu.logical
9348 + extra_cost->alu.arith;
9349 return true;
9352 /* Fall-through. */
9353 case UMOD:
9354 *cost = LIBCALL_COST (2);
9355 return false; /* All arguments must be in registers. */
9357 case ROTATE:
9358 if (mode == SImode && REG_P (XEXP (x, 1)))
9360 *cost += (COSTS_N_INSNS (1)
9361 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9362 if (speed_p)
9363 *cost += extra_cost->alu.shift_reg;
9364 return true;
9366 /* Fall through */
9367 case ROTATERT:
9368 case ASHIFT:
9369 case LSHIFTRT:
9370 case ASHIFTRT:
9371 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9373 *cost += (COSTS_N_INSNS (2)
9374 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9375 if (speed_p)
9376 *cost += 2 * extra_cost->alu.shift;
9377 return true;
9379 else if (mode == SImode)
9381 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9382 /* Slightly disparage register shifts at -Os, but not by much. */
9383 if (!CONST_INT_P (XEXP (x, 1)))
9384 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9385 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9386 return true;
9388 else if (GET_MODE_CLASS (mode) == MODE_INT
9389 && GET_MODE_SIZE (mode) < 4)
9391 if (code == ASHIFT)
9393 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9394 /* Slightly disparage register shifts at -Os, but not by
9395 much. */
9396 if (!CONST_INT_P (XEXP (x, 1)))
9397 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9398 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9400 else if (code == LSHIFTRT || code == ASHIFTRT)
9402 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9404 /* Can use SBFX/UBFX. */
9405 if (speed_p)
9406 *cost += extra_cost->alu.bfx;
9407 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9409 else
9411 *cost += COSTS_N_INSNS (1);
9412 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9413 if (speed_p)
9415 if (CONST_INT_P (XEXP (x, 1)))
9416 *cost += 2 * extra_cost->alu.shift;
9417 else
9418 *cost += (extra_cost->alu.shift
9419 + extra_cost->alu.shift_reg);
9421 else
9422 /* Slightly disparage register shifts. */
9423 *cost += !CONST_INT_P (XEXP (x, 1));
9426 else /* Rotates. */
9428 *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
9429 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9430 if (speed_p)
9432 if (CONST_INT_P (XEXP (x, 1)))
9433 *cost += (2 * extra_cost->alu.shift
9434 + extra_cost->alu.log_shift);
9435 else
9436 *cost += (extra_cost->alu.shift
9437 + extra_cost->alu.shift_reg
9438 + extra_cost->alu.log_shift_reg);
9441 return true;
9444 *cost = LIBCALL_COST (2);
9445 return false;
9447 case BSWAP:
9448 if (arm_arch6)
9450 if (mode == SImode)
9452 if (speed_p)
9453 *cost += extra_cost->alu.rev;
9455 return false;
9458 else
9460 /* No rev instruction available. Look at arm_legacy_rev
9461 and thumb_legacy_rev for the form of RTL used then. */
9462 if (TARGET_THUMB)
9464 *cost += COSTS_N_INSNS (9);
9466 if (speed_p)
9468 *cost += 6 * extra_cost->alu.shift;
9469 *cost += 3 * extra_cost->alu.logical;
9472 else
9474 *cost += COSTS_N_INSNS (4);
9476 if (speed_p)
9478 *cost += 2 * extra_cost->alu.shift;
9479 *cost += extra_cost->alu.arith_shift;
9480 *cost += 2 * extra_cost->alu.logical;
9483 return true;
9485 return false;
9487 case MINUS:
9488 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9489 && (mode == SFmode || !TARGET_VFP_SINGLE))
9491 if (GET_CODE (XEXP (x, 0)) == MULT
9492 || GET_CODE (XEXP (x, 1)) == MULT)
9494 rtx mul_op0, mul_op1, sub_op;
9496 if (speed_p)
9497 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9499 if (GET_CODE (XEXP (x, 0)) == MULT)
9501 mul_op0 = XEXP (XEXP (x, 0), 0);
9502 mul_op1 = XEXP (XEXP (x, 0), 1);
9503 sub_op = XEXP (x, 1);
9505 else
9507 mul_op0 = XEXP (XEXP (x, 1), 0);
9508 mul_op1 = XEXP (XEXP (x, 1), 1);
9509 sub_op = XEXP (x, 0);
9512 /* The first operand of the multiply may be optionally
9513 negated. */
9514 if (GET_CODE (mul_op0) == NEG)
9515 mul_op0 = XEXP (mul_op0, 0);
9517 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9518 + rtx_cost (mul_op1, mode, code, 0, speed_p)
9519 + rtx_cost (sub_op, mode, code, 0, speed_p));
9521 return true;
9524 if (speed_p)
9525 *cost += extra_cost->fp[mode != SFmode].addsub;
9526 return false;
9529 if (mode == SImode)
9531 rtx shift_by_reg = NULL;
9532 rtx shift_op;
9533 rtx non_shift_op;
9535 shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9536 if (shift_op == NULL)
9538 shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9539 non_shift_op = XEXP (x, 0);
9541 else
9542 non_shift_op = XEXP (x, 1);
9544 if (shift_op != NULL)
9546 if (shift_by_reg != NULL)
9548 if (speed_p)
9549 *cost += extra_cost->alu.arith_shift_reg;
9550 *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
9552 else if (speed_p)
9553 *cost += extra_cost->alu.arith_shift;
9555 *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
9556 *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
9557 return true;
9560 if (arm_arch_thumb2
9561 && GET_CODE (XEXP (x, 1)) == MULT)
9563 /* MLS. */
9564 if (speed_p)
9565 *cost += extra_cost->mult[0].add;
9566 *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
9567 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
9568 *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
9569 return true;
9572 if (CONST_INT_P (XEXP (x, 0)))
9574 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9575 INTVAL (XEXP (x, 0)), NULL_RTX,
9576 NULL_RTX, 1, 0);
9577 *cost = COSTS_N_INSNS (insns);
9578 if (speed_p)
9579 *cost += insns * extra_cost->alu.arith;
9580 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9581 return true;
9583 else if (speed_p)
9584 *cost += extra_cost->alu.arith;
9586 return false;
9589 if (GET_MODE_CLASS (mode) == MODE_INT
9590 && GET_MODE_SIZE (mode) < 4)
9592 rtx shift_op, shift_reg;
9593 shift_reg = NULL;
9595 /* We check both sides of the MINUS for shifter operands since,
9596 unlike PLUS, it's not commutative. */
9598 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
9599 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
9601 /* Slightly disparage, as we might need to widen the result. */
9602 *cost += 1;
9603 if (speed_p)
9604 *cost += extra_cost->alu.arith;
9606 if (CONST_INT_P (XEXP (x, 0)))
9608 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9609 return true;
9612 return false;
9615 if (mode == DImode)
9617 *cost += COSTS_N_INSNS (1);
9619 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9621 rtx op1 = XEXP (x, 1);
9623 if (speed_p)
9624 *cost += 2 * extra_cost->alu.arith;
9626 if (GET_CODE (op1) == ZERO_EXTEND)
9627 *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
9628 0, speed_p);
9629 else
9630 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
9631 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9632 0, speed_p);
9633 return true;
9635 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9637 if (speed_p)
9638 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9639 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
9640 0, speed_p)
9641 + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
9642 return true;
9644 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9645 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9647 if (speed_p)
9648 *cost += (extra_cost->alu.arith
9649 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9650 ? extra_cost->alu.arith
9651 : extra_cost->alu.arith_shift));
9652 *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
9653 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
9654 GET_CODE (XEXP (x, 1)), 0, speed_p));
9655 return true;
9658 if (speed_p)
9659 *cost += 2 * extra_cost->alu.arith;
9660 return false;
9663 /* Vector mode? */
9665 *cost = LIBCALL_COST (2);
9666 return false;
9668 case PLUS:
9669 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9670 && (mode == SFmode || !TARGET_VFP_SINGLE))
9672 if (GET_CODE (XEXP (x, 0)) == MULT)
9674 rtx mul_op0, mul_op1, add_op;
9676 if (speed_p)
9677 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9679 mul_op0 = XEXP (XEXP (x, 0), 0);
9680 mul_op1 = XEXP (XEXP (x, 0), 1);
9681 add_op = XEXP (x, 1);
9683 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9684 + rtx_cost (mul_op1, mode, code, 0, speed_p)
9685 + rtx_cost (add_op, mode, code, 0, speed_p));
9687 return true;
9690 if (speed_p)
9691 *cost += extra_cost->fp[mode != SFmode].addsub;
9692 return false;
9694 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9696 *cost = LIBCALL_COST (2);
9697 return false;
9700 /* Narrow modes can be synthesized in SImode, but the range
9701 of useful sub-operations is limited. Check for shift operations
9702 on one of the operands. Only left shifts can be used in the
9703 narrow modes. */
9704 if (GET_MODE_CLASS (mode) == MODE_INT
9705 && GET_MODE_SIZE (mode) < 4)
9707 rtx shift_op, shift_reg;
9708 shift_reg = NULL;
9710 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
9712 if (CONST_INT_P (XEXP (x, 1)))
9714 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9715 INTVAL (XEXP (x, 1)), NULL_RTX,
9716 NULL_RTX, 1, 0);
9717 *cost = COSTS_N_INSNS (insns);
9718 if (speed_p)
9719 *cost += insns * extra_cost->alu.arith;
9720 /* Slightly penalize a narrow operation as the result may
9721 need widening. */
9722 *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9723 return true;
9726 /* Slightly penalize a narrow operation as the result may
9727 need widening. */
9728 *cost += 1;
9729 if (speed_p)
9730 *cost += extra_cost->alu.arith;
9732 return false;
9735 if (mode == SImode)
9737 rtx shift_op, shift_reg;
9739 if (TARGET_INT_SIMD
9740 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9741 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9743 /* UXTA[BH] or SXTA[BH]. */
9744 if (speed_p)
9745 *cost += extra_cost->alu.extend_arith;
9746 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9747 0, speed_p)
9748 + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
9749 return true;
9752 shift_reg = NULL;
9753 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9754 if (shift_op != NULL)
9756 if (shift_reg)
9758 if (speed_p)
9759 *cost += extra_cost->alu.arith_shift_reg;
9760 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
9762 else if (speed_p)
9763 *cost += extra_cost->alu.arith_shift;
9765 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
9766 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9767 return true;
9769 if (GET_CODE (XEXP (x, 0)) == MULT)
9771 rtx mul_op = XEXP (x, 0);
9773 if (TARGET_DSP_MULTIPLY
9774 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9775 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9776 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9777 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9778 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9779 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9780 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9781 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9782 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9783 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9784 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9785 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9786 == 16))))))
9788 /* SMLA[BT][BT]. */
9789 if (speed_p)
9790 *cost += extra_cost->mult[0].extend_add;
9791 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
9792 SIGN_EXTEND, 0, speed_p)
9793 + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
9794 SIGN_EXTEND, 0, speed_p)
9795 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9796 return true;
9799 if (speed_p)
9800 *cost += extra_cost->mult[0].add;
9801 *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
9802 + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
9803 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9804 return true;
9806 if (CONST_INT_P (XEXP (x, 1)))
9808 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9809 INTVAL (XEXP (x, 1)), NULL_RTX,
9810 NULL_RTX, 1, 0);
9811 *cost = COSTS_N_INSNS (insns);
9812 if (speed_p)
9813 *cost += insns * extra_cost->alu.arith;
9814 *cost += rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9815 return true;
9817 else if (speed_p)
9818 *cost += extra_cost->alu.arith;
9820 return false;
9823 if (mode == DImode)
9825 if (arm_arch3m
9826 && GET_CODE (XEXP (x, 0)) == MULT
9827 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
9828 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
9829 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
9830 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
9832 if (speed_p)
9833 *cost += extra_cost->mult[1].extend_add;
9834 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
9835 ZERO_EXTEND, 0, speed_p)
9836 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
9837 ZERO_EXTEND, 0, speed_p)
9838 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9839 return true;
9842 *cost += COSTS_N_INSNS (1);
9844 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9845 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9847 if (speed_p)
9848 *cost += (extra_cost->alu.arith
9849 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9850 ? extra_cost->alu.arith
9851 : extra_cost->alu.arith_shift));
9853 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9854 0, speed_p)
9855 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9856 return true;
9859 if (speed_p)
9860 *cost += 2 * extra_cost->alu.arith;
9861 return false;
9864 /* Vector mode? */
9865 *cost = LIBCALL_COST (2);
9866 return false;
9867 case IOR:
9868 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
9870 if (speed_p)
9871 *cost += extra_cost->alu.rev;
9873 return true;
9875 /* Fall through. */
9876 case AND: case XOR:
9877 if (mode == SImode)
9879 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9880 rtx op0 = XEXP (x, 0);
9881 rtx shift_op, shift_reg;
9883 if (subcode == NOT
9884 && (code == AND
9885 || (code == IOR && TARGET_THUMB2)))
9886 op0 = XEXP (op0, 0);
9888 shift_reg = NULL;
9889 shift_op = shifter_op_p (op0, &shift_reg);
9890 if (shift_op != NULL)
9892 if (shift_reg)
9894 if (speed_p)
9895 *cost += extra_cost->alu.log_shift_reg;
9896 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
9898 else if (speed_p)
9899 *cost += extra_cost->alu.log_shift;
9901 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
9902 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9903 return true;
9906 if (CONST_INT_P (XEXP (x, 1)))
9908 int insns = arm_gen_constant (code, SImode, NULL_RTX,
9909 INTVAL (XEXP (x, 1)), NULL_RTX,
9910 NULL_RTX, 1, 0);
9912 *cost = COSTS_N_INSNS (insns);
9913 if (speed_p)
9914 *cost += insns * extra_cost->alu.logical;
9915 *cost += rtx_cost (op0, mode, code, 0, speed_p);
9916 return true;
9919 if (speed_p)
9920 *cost += extra_cost->alu.logical;
9921 *cost += (rtx_cost (op0, mode, code, 0, speed_p)
9922 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9923 return true;
9926 if (mode == DImode)
9928 rtx op0 = XEXP (x, 0);
9929 enum rtx_code subcode = GET_CODE (op0);
9931 *cost += COSTS_N_INSNS (1);
9933 if (subcode == NOT
9934 && (code == AND
9935 || (code == IOR && TARGET_THUMB2)))
9936 op0 = XEXP (op0, 0);
9938 if (GET_CODE (op0) == ZERO_EXTEND)
9940 if (speed_p)
9941 *cost += 2 * extra_cost->alu.logical;
9943 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
9944 0, speed_p)
9945 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
9946 return true;
9948 else if (GET_CODE (op0) == SIGN_EXTEND)
9950 if (speed_p)
9951 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
9953 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
9954 0, speed_p)
9955 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
9956 return true;
9959 if (speed_p)
9960 *cost += 2 * extra_cost->alu.logical;
9962 return true;
9964 /* Vector mode? */
9966 *cost = LIBCALL_COST (2);
9967 return false;
9969 case MULT:
9970 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9971 && (mode == SFmode || !TARGET_VFP_SINGLE))
9973 rtx op0 = XEXP (x, 0);
9975 if (GET_CODE (op0) == NEG && !flag_rounding_math)
9976 op0 = XEXP (op0, 0);
9978 if (speed_p)
9979 *cost += extra_cost->fp[mode != SFmode].mult;
9981 *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
9982 + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
9983 return true;
9985 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9987 *cost = LIBCALL_COST (2);
9988 return false;
9991 if (mode == SImode)
9993 if (TARGET_DSP_MULTIPLY
9994 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
9995 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
9996 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
9997 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
9998 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
9999 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10000 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10001 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10002 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10003 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10004 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10005 && (INTVAL (XEXP (XEXP (x, 1), 1))
10006 == 16))))))
10008 /* SMUL[TB][TB]. */
10009 if (speed_p)
10010 *cost += extra_cost->mult[0].extend;
10011 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
10012 SIGN_EXTEND, 0, speed_p);
10013 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
10014 SIGN_EXTEND, 1, speed_p);
10015 return true;
10017 if (speed_p)
10018 *cost += extra_cost->mult[0].simple;
10019 return false;
10022 if (mode == DImode)
10024 if (arm_arch3m
10025 && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10026 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10027 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10028 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
10030 if (speed_p)
10031 *cost += extra_cost->mult[1].extend;
10032 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
10033 ZERO_EXTEND, 0, speed_p)
10034 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10035 ZERO_EXTEND, 0, speed_p));
10036 return true;
10039 *cost = LIBCALL_COST (2);
10040 return false;
10043 /* Vector mode? */
10044 *cost = LIBCALL_COST (2);
10045 return false;
10047 case NEG:
10048 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10049 && (mode == SFmode || !TARGET_VFP_SINGLE))
10051 if (GET_CODE (XEXP (x, 0)) == MULT)
10053 /* VNMUL. */
10054 *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
10055 return true;
10058 if (speed_p)
10059 *cost += extra_cost->fp[mode != SFmode].neg;
10061 return false;
10063 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10065 *cost = LIBCALL_COST (1);
10066 return false;
10069 if (mode == SImode)
10071 if (GET_CODE (XEXP (x, 0)) == ABS)
10073 *cost += COSTS_N_INSNS (1);
10074 /* Assume the non-flag-changing variant. */
10075 if (speed_p)
10076 *cost += (extra_cost->alu.log_shift
10077 + extra_cost->alu.arith_shift);
10078 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
10079 return true;
10082 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10083 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10085 *cost += COSTS_N_INSNS (1);
10086 /* No extra cost for MOV imm and MVN imm. */
10087 /* If the comparison op is using the flags, there's no further
10088 cost, otherwise we need to add the cost of the comparison. */
10089 if (!(REG_P (XEXP (XEXP (x, 0), 0))
10090 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10091 && XEXP (XEXP (x, 0), 1) == const0_rtx))
10093 mode = GET_MODE (XEXP (XEXP (x, 0), 0));
10094 *cost += (COSTS_N_INSNS (1)
10095 + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
10096 0, speed_p)
10097 + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
10098 1, speed_p));
10099 if (speed_p)
10100 *cost += extra_cost->alu.arith;
10102 return true;
10105 if (speed_p)
10106 *cost += extra_cost->alu.arith;
10107 return false;
10110 if (GET_MODE_CLASS (mode) == MODE_INT
10111 && GET_MODE_SIZE (mode) < 4)
10113 /* Slightly disparage, as we might need an extend operation. */
10114 *cost += 1;
10115 if (speed_p)
10116 *cost += extra_cost->alu.arith;
10117 return false;
10120 if (mode == DImode)
10122 *cost += COSTS_N_INSNS (1);
10123 if (speed_p)
10124 *cost += 2 * extra_cost->alu.arith;
10125 return false;
10128 /* Vector mode? */
10129 *cost = LIBCALL_COST (1);
10130 return false;
10132 case NOT:
10133 if (mode == SImode)
10135 rtx shift_op;
10136 rtx shift_reg = NULL;
10138 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10140 if (shift_op)
10142 if (shift_reg != NULL)
10144 if (speed_p)
10145 *cost += extra_cost->alu.log_shift_reg;
10146 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10148 else if (speed_p)
10149 *cost += extra_cost->alu.log_shift;
10150 *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
10151 return true;
10154 if (speed_p)
10155 *cost += extra_cost->alu.logical;
10156 return false;
10158 if (mode == DImode)
10160 *cost += COSTS_N_INSNS (1);
10161 return false;
10164 /* Vector mode? */
10166 *cost += LIBCALL_COST (1);
10167 return false;
10169 case IF_THEN_ELSE:
10171 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10173 *cost += COSTS_N_INSNS (3);
10174 return true;
10176 int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
10177 int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
10179 *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
10180 /* Assume that if one arm of the if_then_else is a register,
10181 that it will be tied with the result and eliminate the
10182 conditional insn. */
10183 if (REG_P (XEXP (x, 1)))
10184 *cost += op2cost;
10185 else if (REG_P (XEXP (x, 2)))
10186 *cost += op1cost;
10187 else
10189 if (speed_p)
10191 if (extra_cost->alu.non_exec_costs_exec)
10192 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10193 else
10194 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10196 else
10197 *cost += op1cost + op2cost;
10200 return true;
10202 case COMPARE:
10203 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10204 *cost = 0;
10205 else
10207 machine_mode op0mode;
10208 /* We'll mostly assume that the cost of a compare is the cost of the
10209 LHS. However, there are some notable exceptions. */
10211 /* Floating point compares are never done as side-effects. */
10212 op0mode = GET_MODE (XEXP (x, 0));
10213 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10214 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10216 if (speed_p)
10217 *cost += extra_cost->fp[op0mode != SFmode].compare;
10219 if (XEXP (x, 1) == CONST0_RTX (op0mode))
10221 *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
10222 return true;
10225 return false;
10227 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10229 *cost = LIBCALL_COST (2);
10230 return false;
10233 /* DImode compares normally take two insns. */
10234 if (op0mode == DImode)
10236 *cost += COSTS_N_INSNS (1);
10237 if (speed_p)
10238 *cost += 2 * extra_cost->alu.arith;
10239 return false;
10242 if (op0mode == SImode)
10244 rtx shift_op;
10245 rtx shift_reg;
10247 if (XEXP (x, 1) == const0_rtx
10248 && !(REG_P (XEXP (x, 0))
10249 || (GET_CODE (XEXP (x, 0)) == SUBREG
10250 && REG_P (SUBREG_REG (XEXP (x, 0))))))
10252 *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10254 /* Multiply operations that set the flags are often
10255 significantly more expensive. */
10256 if (speed_p
10257 && GET_CODE (XEXP (x, 0)) == MULT
10258 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10259 *cost += extra_cost->mult[0].flag_setting;
10261 if (speed_p
10262 && GET_CODE (XEXP (x, 0)) == PLUS
10263 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10264 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10265 0), 1), mode))
10266 *cost += extra_cost->mult[0].flag_setting;
10267 return true;
10270 shift_reg = NULL;
10271 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10272 if (shift_op != NULL)
10274 if (shift_reg != NULL)
10276 *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
10277 1, speed_p);
10278 if (speed_p)
10279 *cost += extra_cost->alu.arith_shift_reg;
10281 else if (speed_p)
10282 *cost += extra_cost->alu.arith_shift;
10283 *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
10284 *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
10285 return true;
10288 if (speed_p)
10289 *cost += extra_cost->alu.arith;
10290 if (CONST_INT_P (XEXP (x, 1))
10291 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10293 *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10294 return true;
10296 return false;
10299 /* Vector mode? */
10301 *cost = LIBCALL_COST (2);
10302 return false;
10304 return true;
10306 case EQ:
10307 case NE:
10308 case LT:
10309 case LE:
10310 case GT:
10311 case GE:
10312 case LTU:
10313 case LEU:
10314 case GEU:
10315 case GTU:
10316 case ORDERED:
10317 case UNORDERED:
10318 case UNEQ:
10319 case UNLE:
10320 case UNLT:
10321 case UNGE:
10322 case UNGT:
10323 case LTGT:
10324 if (outer_code == SET)
10326 /* Is it a store-flag operation? */
10327 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10328 && XEXP (x, 1) == const0_rtx)
10330 /* Thumb also needs an IT insn. */
10331 *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
10332 return true;
10334 if (XEXP (x, 1) == const0_rtx)
10336 switch (code)
10338 case LT:
10339 /* LSR Rd, Rn, #31. */
10340 if (speed_p)
10341 *cost += extra_cost->alu.shift;
10342 break;
10344 case EQ:
10345 /* RSBS T1, Rn, #0
10346 ADC Rd, Rn, T1. */
10348 case NE:
10349 /* SUBS T1, Rn, #1
10350 SBC Rd, Rn, T1. */
10351 *cost += COSTS_N_INSNS (1);
10352 break;
10354 case LE:
10355 /* RSBS T1, Rn, Rn, LSR #31
10356 ADC Rd, Rn, T1. */
10357 *cost += COSTS_N_INSNS (1);
10358 if (speed_p)
10359 *cost += extra_cost->alu.arith_shift;
10360 break;
10362 case GT:
10363 /* RSB Rd, Rn, Rn, ASR #1
10364 LSR Rd, Rd, #31. */
10365 *cost += COSTS_N_INSNS (1);
10366 if (speed_p)
10367 *cost += (extra_cost->alu.arith_shift
10368 + extra_cost->alu.shift);
10369 break;
10371 case GE:
10372 /* ASR Rd, Rn, #31
10373 ADD Rd, Rn, #1. */
10374 *cost += COSTS_N_INSNS (1);
10375 if (speed_p)
10376 *cost += extra_cost->alu.shift;
10377 break;
10379 default:
10380 /* Remaining cases are either meaningless or would take
10381 three insns anyway. */
10382 *cost = COSTS_N_INSNS (3);
10383 break;
10385 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10386 return true;
10388 else
10390 *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10391 if (CONST_INT_P (XEXP (x, 1))
10392 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10394 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10395 return true;
10398 return false;
10401 /* Not directly inside a set. If it involves the condition code
10402 register it must be the condition for a branch, cond_exec or
10403 I_T_E operation. Since the comparison is performed elsewhere
10404 this is just the control part which has no additional
10405 cost. */
10406 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10407 && XEXP (x, 1) == const0_rtx)
10409 *cost = 0;
10410 return true;
10412 return false;
10414 case ABS:
10415 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10416 && (mode == SFmode || !TARGET_VFP_SINGLE))
10418 if (speed_p)
10419 *cost += extra_cost->fp[mode != SFmode].neg;
10421 return false;
10423 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10425 *cost = LIBCALL_COST (1);
10426 return false;
10429 if (mode == SImode)
10431 if (speed_p)
10432 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10433 return false;
10435 /* Vector mode? */
10436 *cost = LIBCALL_COST (1);
10437 return false;
10439 case SIGN_EXTEND:
10440 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10441 && MEM_P (XEXP (x, 0)))
10443 if (mode == DImode)
10444 *cost += COSTS_N_INSNS (1);
10446 if (!speed_p)
10447 return true;
10449 if (GET_MODE (XEXP (x, 0)) == SImode)
10450 *cost += extra_cost->ldst.load;
10451 else
10452 *cost += extra_cost->ldst.load_sign_extend;
10454 if (mode == DImode)
10455 *cost += extra_cost->alu.shift;
10457 return true;
10460 /* Widening from less than 32-bits requires an extend operation. */
10461 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10463 /* We have SXTB/SXTH. */
10464 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10465 if (speed_p)
10466 *cost += extra_cost->alu.extend;
10468 else if (GET_MODE (XEXP (x, 0)) != SImode)
10470 /* Needs two shifts. */
10471 *cost += COSTS_N_INSNS (1);
10472 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10473 if (speed_p)
10474 *cost += 2 * extra_cost->alu.shift;
10477 /* Widening beyond 32-bits requires one more insn. */
10478 if (mode == DImode)
10480 *cost += COSTS_N_INSNS (1);
10481 if (speed_p)
10482 *cost += extra_cost->alu.shift;
10485 return true;
10487 case ZERO_EXTEND:
10488 if ((arm_arch4
10489 || GET_MODE (XEXP (x, 0)) == SImode
10490 || GET_MODE (XEXP (x, 0)) == QImode)
10491 && MEM_P (XEXP (x, 0)))
10493 *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10495 if (mode == DImode)
10496 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10498 return true;
10501 /* Widening from less than 32-bits requires an extend operation. */
10502 if (GET_MODE (XEXP (x, 0)) == QImode)
10504 /* UXTB can be a shorter instruction in Thumb2, but it might
10505 be slower than the AND Rd, Rn, #255 alternative. When
10506 optimizing for speed it should never be slower to use
10507 AND, and we don't really model 16-bit vs 32-bit insns
10508 here. */
10509 if (speed_p)
10510 *cost += extra_cost->alu.logical;
10512 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10514 /* We have UXTB/UXTH. */
10515 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10516 if (speed_p)
10517 *cost += extra_cost->alu.extend;
10519 else if (GET_MODE (XEXP (x, 0)) != SImode)
10521 /* Needs two shifts. It's marginally preferable to use
10522 shifts rather than two BIC instructions as the second
10523 shift may merge with a subsequent insn as a shifter
10524 op. */
10525 *cost = COSTS_N_INSNS (2);
10526 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10527 if (speed_p)
10528 *cost += 2 * extra_cost->alu.shift;
10531 /* Widening beyond 32-bits requires one more insn. */
10532 if (mode == DImode)
10534 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10537 return true;
10539 case CONST_INT:
10540 *cost = 0;
10541 /* CONST_INT has no mode, so we cannot tell for sure how many
10542 insns are really going to be needed. The best we can do is
10543 look at the value passed. If it fits in SImode, then assume
10544 that's the mode it will be used for. Otherwise assume it
10545 will be used in DImode. */
10546 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10547 mode = SImode;
10548 else
10549 mode = DImode;
10551 /* Avoid blowing up in arm_gen_constant (). */
10552 if (!(outer_code == PLUS
10553 || outer_code == AND
10554 || outer_code == IOR
10555 || outer_code == XOR
10556 || outer_code == MINUS))
10557 outer_code = SET;
10559 const_int_cost:
10560 if (mode == SImode)
10562 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10563 INTVAL (x), NULL, NULL,
10564 0, 0));
10565 /* Extra costs? */
10567 else
10569 *cost += COSTS_N_INSNS (arm_gen_constant
10570 (outer_code, SImode, NULL,
10571 trunc_int_for_mode (INTVAL (x), SImode),
10572 NULL, NULL, 0, 0)
10573 + arm_gen_constant (outer_code, SImode, NULL,
10574 INTVAL (x) >> 32, NULL,
10575 NULL, 0, 0));
10576 /* Extra costs? */
10579 return true;
10581 case CONST:
10582 case LABEL_REF:
10583 case SYMBOL_REF:
10584 if (speed_p)
10586 if (arm_arch_thumb2 && !flag_pic)
10587 *cost += COSTS_N_INSNS (1);
10588 else
10589 *cost += extra_cost->ldst.load;
10591 else
10592 *cost += COSTS_N_INSNS (1);
10594 if (flag_pic)
10596 *cost += COSTS_N_INSNS (1);
10597 if (speed_p)
10598 *cost += extra_cost->alu.arith;
10601 return true;
10603 case CONST_FIXED:
10604 *cost = COSTS_N_INSNS (4);
10605 /* Fixme. */
10606 return true;
10608 case CONST_DOUBLE:
10609 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10610 && (mode == SFmode || !TARGET_VFP_SINGLE))
10612 if (vfp3_const_double_rtx (x))
10614 if (speed_p)
10615 *cost += extra_cost->fp[mode == DFmode].fpconst;
10616 return true;
10619 if (speed_p)
10621 if (mode == DFmode)
10622 *cost += extra_cost->ldst.loadd;
10623 else
10624 *cost += extra_cost->ldst.loadf;
10626 else
10627 *cost += COSTS_N_INSNS (1 + (mode == DFmode));
10629 return true;
10631 *cost = COSTS_N_INSNS (4);
10632 return true;
10634 case CONST_VECTOR:
10635 /* Fixme. */
10636 if (TARGET_NEON
10637 && TARGET_HARD_FLOAT
10638 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10639 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10640 *cost = COSTS_N_INSNS (1);
10641 else
10642 *cost = COSTS_N_INSNS (4);
10643 return true;
10645 case HIGH:
10646 case LO_SUM:
10647 /* When optimizing for size, we prefer constant pool entries to
10648 MOVW/MOVT pairs, so bump the cost of these slightly. */
10649 if (!speed_p)
10650 *cost += 1;
10651 return true;
10653 case CLZ:
10654 if (speed_p)
10655 *cost += extra_cost->alu.clz;
10656 return false;
10658 case SMIN:
10659 if (XEXP (x, 1) == const0_rtx)
10661 if (speed_p)
10662 *cost += extra_cost->alu.log_shift;
10663 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10664 return true;
10666 /* Fall through. */
10667 case SMAX:
10668 case UMIN:
10669 case UMAX:
10670 *cost += COSTS_N_INSNS (1);
10671 return false;
10673 case TRUNCATE:
10674 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10675 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10676 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10677 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10678 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10679 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10680 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10681 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10682 == ZERO_EXTEND))))
10684 if (speed_p)
10685 *cost += extra_cost->mult[1].extend;
10686 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
10687 ZERO_EXTEND, 0, speed_p)
10688 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
10689 ZERO_EXTEND, 0, speed_p));
10690 return true;
10692 *cost = LIBCALL_COST (1);
10693 return false;
10695 case UNSPEC_VOLATILE:
10696 case UNSPEC:
10697 return arm_unspec_cost (x, outer_code, speed_p, cost);
10699 case PC:
10700 /* Reading the PC is like reading any other register. Writing it
10701 is more expensive, but we take that into account elsewhere. */
10702 *cost = 0;
10703 return true;
10705 case ZERO_EXTRACT:
10706 /* TODO: Simple zero_extract of bottom bits using AND. */
10707 /* Fall through. */
10708 case SIGN_EXTRACT:
10709 if (arm_arch6
10710 && mode == SImode
10711 && CONST_INT_P (XEXP (x, 1))
10712 && CONST_INT_P (XEXP (x, 2)))
10714 if (speed_p)
10715 *cost += extra_cost->alu.bfx;
10716 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10717 return true;
10719 /* Without UBFX/SBFX, need to resort to shift operations. */
10720 *cost += COSTS_N_INSNS (1);
10721 if (speed_p)
10722 *cost += 2 * extra_cost->alu.shift;
10723 *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
10724 return true;
10726 case FLOAT_EXTEND:
10727 if (TARGET_HARD_FLOAT)
10729 if (speed_p)
10730 *cost += extra_cost->fp[mode == DFmode].widen;
10731 if (!TARGET_FPU_ARMV8
10732 && GET_MODE (XEXP (x, 0)) == HFmode)
10734 /* Pre v8, widening HF->DF is a two-step process, first
10735 widening to SFmode. */
10736 *cost += COSTS_N_INSNS (1);
10737 if (speed_p)
10738 *cost += extra_cost->fp[0].widen;
10740 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10741 return true;
10744 *cost = LIBCALL_COST (1);
10745 return false;
10747 case FLOAT_TRUNCATE:
10748 if (TARGET_HARD_FLOAT)
10750 if (speed_p)
10751 *cost += extra_cost->fp[mode == DFmode].narrow;
10752 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10753 return true;
10754 /* Vector modes? */
10756 *cost = LIBCALL_COST (1);
10757 return false;
10759 case FMA:
10760 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
10762 rtx op0 = XEXP (x, 0);
10763 rtx op1 = XEXP (x, 1);
10764 rtx op2 = XEXP (x, 2);
10767 /* vfms or vfnma. */
10768 if (GET_CODE (op0) == NEG)
10769 op0 = XEXP (op0, 0);
10771 /* vfnms or vfnma. */
10772 if (GET_CODE (op2) == NEG)
10773 op2 = XEXP (op2, 0);
10775 *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
10776 *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
10777 *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
10779 if (speed_p)
10780 *cost += extra_cost->fp[mode ==DFmode].fma;
10782 return true;
10785 *cost = LIBCALL_COST (3);
10786 return false;
10788 case FIX:
10789 case UNSIGNED_FIX:
10790 if (TARGET_HARD_FLOAT)
10792 /* The *combine_vcvtf2i reduces a vmul+vcvt into
10793 a vcvt fixed-point conversion. */
10794 if (code == FIX && mode == SImode
10795 && GET_CODE (XEXP (x, 0)) == FIX
10796 && GET_MODE (XEXP (x, 0)) == SFmode
10797 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10798 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
10799 > 0)
10801 if (speed_p)
10802 *cost += extra_cost->fp[0].toint;
10804 *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10805 code, 0, speed_p);
10806 return true;
10809 if (GET_MODE_CLASS (mode) == MODE_INT)
10811 mode = GET_MODE (XEXP (x, 0));
10812 if (speed_p)
10813 *cost += extra_cost->fp[mode == DFmode].toint;
10814 /* Strip of the 'cost' of rounding towards zero. */
10815 if (GET_CODE (XEXP (x, 0)) == FIX)
10816 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
10817 0, speed_p);
10818 else
10819 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10820 /* ??? Increase the cost to deal with transferring from
10821 FP -> CORE registers? */
10822 return true;
10824 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
10825 && TARGET_FPU_ARMV8)
10827 if (speed_p)
10828 *cost += extra_cost->fp[mode == DFmode].roundint;
10829 return false;
10831 /* Vector costs? */
10833 *cost = LIBCALL_COST (1);
10834 return false;
10836 case FLOAT:
10837 case UNSIGNED_FLOAT:
10838 if (TARGET_HARD_FLOAT)
10840 /* ??? Increase the cost to deal with transferring from CORE
10841 -> FP registers? */
10842 if (speed_p)
10843 *cost += extra_cost->fp[mode == DFmode].fromint;
10844 return false;
10846 *cost = LIBCALL_COST (1);
10847 return false;
10849 case CALL:
10850 return true;
10852 case ASM_OPERANDS:
10854 /* Just a guess. Guess number of instructions in the asm
10855 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
10856 though (see PR60663). */
10857 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
10858 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
10860 *cost = COSTS_N_INSNS (asm_length + num_operands);
10861 return true;
10863 default:
10864 if (mode != VOIDmode)
10865 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
10866 else
10867 *cost = COSTS_N_INSNS (4); /* Who knows? */
10868 return false;
10872 #undef HANDLE_NARROW_SHIFT_ARITH
10874 /* RTX costs entry point. */
10876 static bool
10877 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
10878 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
10880 bool result;
10881 int code = GET_CODE (x);
10882 gcc_assert (current_tune->insn_extra_cost);
10884 result = arm_rtx_costs_internal (x, (enum rtx_code) code,
10885 (enum rtx_code) outer_code,
10886 current_tune->insn_extra_cost,
10887 total, speed);
10889 if (dump_file && (dump_flags & TDF_DETAILS))
10891 print_rtl_single (dump_file, x);
10892 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
10893 *total, result ? "final" : "partial");
10895 return result;
10898 /* All address computations that can be done are free, but rtx cost returns
10899 the same for practically all of them. So we weight the different types
10900 of address here in the order (most pref first):
10901 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
10902 static inline int
10903 arm_arm_address_cost (rtx x)
10905 enum rtx_code c = GET_CODE (x);
10907 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
10908 return 0;
10909 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
10910 return 10;
10912 if (c == PLUS)
10914 if (CONST_INT_P (XEXP (x, 1)))
10915 return 2;
10917 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
10918 return 3;
10920 return 4;
10923 return 6;
10926 static inline int
10927 arm_thumb_address_cost (rtx x)
10929 enum rtx_code c = GET_CODE (x);
10931 if (c == REG)
10932 return 1;
10933 if (c == PLUS
10934 && REG_P (XEXP (x, 0))
10935 && CONST_INT_P (XEXP (x, 1)))
10936 return 1;
10938 return 2;
10941 static int
10942 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
10943 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
10945 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
10948 /* Adjust cost hook for XScale. */
10949 static bool
10950 xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
10951 int * cost)
10953 /* Some true dependencies can have a higher cost depending
10954 on precisely how certain input operands are used. */
10955 if (dep_type == 0
10956 && recog_memoized (insn) >= 0
10957 && recog_memoized (dep) >= 0)
10959 int shift_opnum = get_attr_shift (insn);
10960 enum attr_type attr_type = get_attr_type (dep);
10962 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
10963 operand for INSN. If we have a shifted input operand and the
10964 instruction we depend on is another ALU instruction, then we may
10965 have to account for an additional stall. */
10966 if (shift_opnum != 0
10967 && (attr_type == TYPE_ALU_SHIFT_IMM
10968 || attr_type == TYPE_ALUS_SHIFT_IMM
10969 || attr_type == TYPE_LOGIC_SHIFT_IMM
10970 || attr_type == TYPE_LOGICS_SHIFT_IMM
10971 || attr_type == TYPE_ALU_SHIFT_REG
10972 || attr_type == TYPE_ALUS_SHIFT_REG
10973 || attr_type == TYPE_LOGIC_SHIFT_REG
10974 || attr_type == TYPE_LOGICS_SHIFT_REG
10975 || attr_type == TYPE_MOV_SHIFT
10976 || attr_type == TYPE_MVN_SHIFT
10977 || attr_type == TYPE_MOV_SHIFT_REG
10978 || attr_type == TYPE_MVN_SHIFT_REG))
10980 rtx shifted_operand;
10981 int opno;
10983 /* Get the shifted operand. */
10984 extract_insn (insn);
10985 shifted_operand = recog_data.operand[shift_opnum];
10987 /* Iterate over all the operands in DEP. If we write an operand
10988 that overlaps with SHIFTED_OPERAND, then we have increase the
10989 cost of this dependency. */
10990 extract_insn (dep);
10991 preprocess_constraints (dep);
10992 for (opno = 0; opno < recog_data.n_operands; opno++)
10994 /* We can ignore strict inputs. */
10995 if (recog_data.operand_type[opno] == OP_IN)
10996 continue;
10998 if (reg_overlap_mentioned_p (recog_data.operand[opno],
10999 shifted_operand))
11001 *cost = 2;
11002 return false;
11007 return true;
11010 /* Adjust cost hook for Cortex A9. */
11011 static bool
11012 cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11013 int * cost)
11015 switch (dep_type)
11017 case REG_DEP_ANTI:
11018 *cost = 0;
11019 return false;
11021 case REG_DEP_TRUE:
11022 case REG_DEP_OUTPUT:
11023 if (recog_memoized (insn) >= 0
11024 && recog_memoized (dep) >= 0)
11026 if (GET_CODE (PATTERN (insn)) == SET)
11028 if (GET_MODE_CLASS
11029 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11030 || GET_MODE_CLASS
11031 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11033 enum attr_type attr_type_insn = get_attr_type (insn);
11034 enum attr_type attr_type_dep = get_attr_type (dep);
11036 /* By default all dependencies of the form
11037 s0 = s0 <op> s1
11038 s0 = s0 <op> s2
11039 have an extra latency of 1 cycle because
11040 of the input and output dependency in this
11041 case. However this gets modeled as an true
11042 dependency and hence all these checks. */
11043 if (REG_P (SET_DEST (PATTERN (insn)))
11044 && reg_set_p (SET_DEST (PATTERN (insn)), dep))
11046 /* FMACS is a special case where the dependent
11047 instruction can be issued 3 cycles before
11048 the normal latency in case of an output
11049 dependency. */
11050 if ((attr_type_insn == TYPE_FMACS
11051 || attr_type_insn == TYPE_FMACD)
11052 && (attr_type_dep == TYPE_FMACS
11053 || attr_type_dep == TYPE_FMACD))
11055 if (dep_type == REG_DEP_OUTPUT)
11056 *cost = insn_default_latency (dep) - 3;
11057 else
11058 *cost = insn_default_latency (dep);
11059 return false;
11061 else
11063 if (dep_type == REG_DEP_OUTPUT)
11064 *cost = insn_default_latency (dep) + 1;
11065 else
11066 *cost = insn_default_latency (dep);
11068 return false;
11073 break;
11075 default:
11076 gcc_unreachable ();
11079 return true;
11082 /* Adjust cost hook for FA726TE. */
11083 static bool
11084 fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11085 int * cost)
11087 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11088 have penalty of 3. */
11089 if (dep_type == REG_DEP_TRUE
11090 && recog_memoized (insn) >= 0
11091 && recog_memoized (dep) >= 0
11092 && get_attr_conds (dep) == CONDS_SET)
11094 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11095 if (get_attr_conds (insn) == CONDS_USE
11096 && get_attr_type (insn) != TYPE_BRANCH)
11098 *cost = 3;
11099 return false;
11102 if (GET_CODE (PATTERN (insn)) == COND_EXEC
11103 || get_attr_conds (insn) == CONDS_USE)
11105 *cost = 0;
11106 return false;
11110 return true;
11113 /* Implement TARGET_REGISTER_MOVE_COST.
11115 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11116 it is typically more expensive than a single memory access. We set
11117 the cost to less than two memory accesses so that floating
11118 point to integer conversion does not go through memory. */
11121 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11122 reg_class_t from, reg_class_t to)
11124 if (TARGET_32BIT)
11126 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11127 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11128 return 15;
11129 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11130 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11131 return 4;
11132 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11133 return 20;
11134 else
11135 return 2;
11137 else
11139 if (from == HI_REGS || to == HI_REGS)
11140 return 4;
11141 else
11142 return 2;
11146 /* Implement TARGET_MEMORY_MOVE_COST. */
11149 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11150 bool in ATTRIBUTE_UNUSED)
11152 if (TARGET_32BIT)
11153 return 10;
11154 else
11156 if (GET_MODE_SIZE (mode) < 4)
11157 return 8;
11158 else
11159 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11163 /* Vectorizer cost model implementation. */
11165 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11166 static int
11167 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11168 tree vectype,
11169 int misalign ATTRIBUTE_UNUSED)
11171 unsigned elements;
11173 switch (type_of_cost)
11175 case scalar_stmt:
11176 return current_tune->vec_costs->scalar_stmt_cost;
11178 case scalar_load:
11179 return current_tune->vec_costs->scalar_load_cost;
11181 case scalar_store:
11182 return current_tune->vec_costs->scalar_store_cost;
11184 case vector_stmt:
11185 return current_tune->vec_costs->vec_stmt_cost;
11187 case vector_load:
11188 return current_tune->vec_costs->vec_align_load_cost;
11190 case vector_store:
11191 return current_tune->vec_costs->vec_store_cost;
11193 case vec_to_scalar:
11194 return current_tune->vec_costs->vec_to_scalar_cost;
11196 case scalar_to_vec:
11197 return current_tune->vec_costs->scalar_to_vec_cost;
11199 case unaligned_load:
11200 return current_tune->vec_costs->vec_unalign_load_cost;
11202 case unaligned_store:
11203 return current_tune->vec_costs->vec_unalign_store_cost;
11205 case cond_branch_taken:
11206 return current_tune->vec_costs->cond_taken_branch_cost;
11208 case cond_branch_not_taken:
11209 return current_tune->vec_costs->cond_not_taken_branch_cost;
11211 case vec_perm:
11212 case vec_promote_demote:
11213 return current_tune->vec_costs->vec_stmt_cost;
11215 case vec_construct:
11216 elements = TYPE_VECTOR_SUBPARTS (vectype);
11217 return elements / 2 + 1;
11219 default:
11220 gcc_unreachable ();
11224 /* Implement targetm.vectorize.add_stmt_cost. */
11226 static unsigned
11227 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11228 struct _stmt_vec_info *stmt_info, int misalign,
11229 enum vect_cost_model_location where)
11231 unsigned *cost = (unsigned *) data;
11232 unsigned retval = 0;
11234 if (flag_vect_cost_model)
11236 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11237 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11239 /* Statements in an inner loop relative to the loop being
11240 vectorized are weighted more heavily. The value here is
11241 arbitrary and could potentially be improved with analysis. */
11242 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11243 count *= 50; /* FIXME. */
11245 retval = (unsigned) (count * stmt_cost);
11246 cost[where] += retval;
11249 return retval;
11252 /* Return true if and only if this insn can dual-issue only as older. */
11253 static bool
11254 cortexa7_older_only (rtx_insn *insn)
11256 if (recog_memoized (insn) < 0)
11257 return false;
11259 switch (get_attr_type (insn))
11261 case TYPE_ALU_DSP_REG:
11262 case TYPE_ALU_SREG:
11263 case TYPE_ALUS_SREG:
11264 case TYPE_LOGIC_REG:
11265 case TYPE_LOGICS_REG:
11266 case TYPE_ADC_REG:
11267 case TYPE_ADCS_REG:
11268 case TYPE_ADR:
11269 case TYPE_BFM:
11270 case TYPE_REV:
11271 case TYPE_MVN_REG:
11272 case TYPE_SHIFT_IMM:
11273 case TYPE_SHIFT_REG:
11274 case TYPE_LOAD_BYTE:
11275 case TYPE_LOAD1:
11276 case TYPE_STORE1:
11277 case TYPE_FFARITHS:
11278 case TYPE_FADDS:
11279 case TYPE_FFARITHD:
11280 case TYPE_FADDD:
11281 case TYPE_FMOV:
11282 case TYPE_F_CVT:
11283 case TYPE_FCMPS:
11284 case TYPE_FCMPD:
11285 case TYPE_FCONSTS:
11286 case TYPE_FCONSTD:
11287 case TYPE_FMULS:
11288 case TYPE_FMACS:
11289 case TYPE_FMULD:
11290 case TYPE_FMACD:
11291 case TYPE_FDIVS:
11292 case TYPE_FDIVD:
11293 case TYPE_F_MRC:
11294 case TYPE_F_MRRC:
11295 case TYPE_F_FLAG:
11296 case TYPE_F_LOADS:
11297 case TYPE_F_STORES:
11298 return true;
11299 default:
11300 return false;
11304 /* Return true if and only if this insn can dual-issue as younger. */
11305 static bool
11306 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11308 if (recog_memoized (insn) < 0)
11310 if (verbose > 5)
11311 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11312 return false;
11315 switch (get_attr_type (insn))
11317 case TYPE_ALU_IMM:
11318 case TYPE_ALUS_IMM:
11319 case TYPE_LOGIC_IMM:
11320 case TYPE_LOGICS_IMM:
11321 case TYPE_EXTEND:
11322 case TYPE_MVN_IMM:
11323 case TYPE_MOV_IMM:
11324 case TYPE_MOV_REG:
11325 case TYPE_MOV_SHIFT:
11326 case TYPE_MOV_SHIFT_REG:
11327 case TYPE_BRANCH:
11328 case TYPE_CALL:
11329 return true;
11330 default:
11331 return false;
11336 /* Look for an instruction that can dual issue only as an older
11337 instruction, and move it in front of any instructions that can
11338 dual-issue as younger, while preserving the relative order of all
11339 other instructions in the ready list. This is a hueuristic to help
11340 dual-issue in later cycles, by postponing issue of more flexible
11341 instructions. This heuristic may affect dual issue opportunities
11342 in the current cycle. */
11343 static void
11344 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11345 int *n_readyp, int clock)
11347 int i;
11348 int first_older_only = -1, first_younger = -1;
11350 if (verbose > 5)
11351 fprintf (file,
11352 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11353 clock,
11354 *n_readyp);
11356 /* Traverse the ready list from the head (the instruction to issue
11357 first), and looking for the first instruction that can issue as
11358 younger and the first instruction that can dual-issue only as
11359 older. */
11360 for (i = *n_readyp - 1; i >= 0; i--)
11362 rtx_insn *insn = ready[i];
11363 if (cortexa7_older_only (insn))
11365 first_older_only = i;
11366 if (verbose > 5)
11367 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11368 break;
11370 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11371 first_younger = i;
11374 /* Nothing to reorder because either no younger insn found or insn
11375 that can dual-issue only as older appears before any insn that
11376 can dual-issue as younger. */
11377 if (first_younger == -1)
11379 if (verbose > 5)
11380 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11381 return;
11384 /* Nothing to reorder because no older-only insn in the ready list. */
11385 if (first_older_only == -1)
11387 if (verbose > 5)
11388 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11389 return;
11392 /* Move first_older_only insn before first_younger. */
11393 if (verbose > 5)
11394 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11395 INSN_UID(ready [first_older_only]),
11396 INSN_UID(ready [first_younger]));
11397 rtx_insn *first_older_only_insn = ready [first_older_only];
11398 for (i = first_older_only; i < first_younger; i++)
11400 ready[i] = ready[i+1];
11403 ready[i] = first_older_only_insn;
11404 return;
11407 /* Implement TARGET_SCHED_REORDER. */
11408 static int
11409 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
11410 int clock)
11412 switch (arm_tune)
11414 case TARGET_CPU_cortexa7:
11415 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11416 break;
11417 default:
11418 /* Do nothing for other cores. */
11419 break;
11422 return arm_issue_rate ();
11425 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11426 It corrects the value of COST based on the relationship between
11427 INSN and DEP through the dependence LINK. It returns the new
11428 value. There is a per-core adjust_cost hook to adjust scheduler costs
11429 and the per-core hook can choose to completely override the generic
11430 adjust_cost function. Only put bits of code into arm_adjust_cost that
11431 are common across all cores. */
11432 static int
11433 arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
11434 unsigned int)
11436 rtx i_pat, d_pat;
11438 /* When generating Thumb-1 code, we want to place flag-setting operations
11439 close to a conditional branch which depends on them, so that we can
11440 omit the comparison. */
11441 if (TARGET_THUMB1
11442 && dep_type == 0
11443 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11444 && recog_memoized (dep) >= 0
11445 && get_attr_conds (dep) == CONDS_SET)
11446 return 0;
11448 if (current_tune->sched_adjust_cost != NULL)
11450 if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost))
11451 return cost;
11454 /* XXX Is this strictly true? */
11455 if (dep_type == REG_DEP_ANTI
11456 || dep_type == REG_DEP_OUTPUT)
11457 return 0;
11459 /* Call insns don't incur a stall, even if they follow a load. */
11460 if (dep_type == 0
11461 && CALL_P (insn))
11462 return 1;
11464 if ((i_pat = single_set (insn)) != NULL
11465 && MEM_P (SET_SRC (i_pat))
11466 && (d_pat = single_set (dep)) != NULL
11467 && MEM_P (SET_DEST (d_pat)))
11469 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11470 /* This is a load after a store, there is no conflict if the load reads
11471 from a cached area. Assume that loads from the stack, and from the
11472 constant pool are cached, and that others will miss. This is a
11473 hack. */
11475 if ((GET_CODE (src_mem) == SYMBOL_REF
11476 && CONSTANT_POOL_ADDRESS_P (src_mem))
11477 || reg_mentioned_p (stack_pointer_rtx, src_mem)
11478 || reg_mentioned_p (frame_pointer_rtx, src_mem)
11479 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11480 return 1;
11483 return cost;
11487 arm_max_conditional_execute (void)
11489 return max_insns_skipped;
11492 static int
11493 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11495 if (TARGET_32BIT)
11496 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
11497 else
11498 return (optimize > 0) ? 2 : 0;
11501 static int
11502 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
11504 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11507 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11508 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11509 sequences of non-executed instructions in IT blocks probably take the same
11510 amount of time as executed instructions (and the IT instruction itself takes
11511 space in icache). This function was experimentally determined to give good
11512 results on a popular embedded benchmark. */
11514 static int
11515 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
11517 return (TARGET_32BIT && speed_p) ? 1
11518 : arm_default_branch_cost (speed_p, predictable_p);
11521 static int
11522 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
11524 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11527 static bool fp_consts_inited = false;
11529 static REAL_VALUE_TYPE value_fp0;
11531 static void
11532 init_fp_table (void)
11534 REAL_VALUE_TYPE r;
11536 r = REAL_VALUE_ATOF ("0", DFmode);
11537 value_fp0 = r;
11538 fp_consts_inited = true;
11541 /* Return TRUE if rtx X is a valid immediate FP constant. */
11543 arm_const_double_rtx (rtx x)
11545 const REAL_VALUE_TYPE *r;
11547 if (!fp_consts_inited)
11548 init_fp_table ();
11550 r = CONST_DOUBLE_REAL_VALUE (x);
11551 if (REAL_VALUE_MINUS_ZERO (*r))
11552 return 0;
11554 if (real_equal (r, &value_fp0))
11555 return 1;
11557 return 0;
11560 /* VFPv3 has a fairly wide range of representable immediates, formed from
11561 "quarter-precision" floating-point values. These can be evaluated using this
11562 formula (with ^ for exponentiation):
11564 -1^s * n * 2^-r
11566 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11567 16 <= n <= 31 and 0 <= r <= 7.
11569 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11571 - A (most-significant) is the sign bit.
11572 - BCD are the exponent (encoded as r XOR 3).
11573 - EFGH are the mantissa (encoded as n - 16).
11576 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11577 fconst[sd] instruction, or -1 if X isn't suitable. */
11578 static int
11579 vfp3_const_double_index (rtx x)
11581 REAL_VALUE_TYPE r, m;
11582 int sign, exponent;
11583 unsigned HOST_WIDE_INT mantissa, mant_hi;
11584 unsigned HOST_WIDE_INT mask;
11585 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
11586 bool fail;
11588 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
11589 return -1;
11591 r = *CONST_DOUBLE_REAL_VALUE (x);
11593 /* We can't represent these things, so detect them first. */
11594 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
11595 return -1;
11597 /* Extract sign, exponent and mantissa. */
11598 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
11599 r = real_value_abs (&r);
11600 exponent = REAL_EXP (&r);
11601 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11602 highest (sign) bit, with a fixed binary point at bit point_pos.
11603 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11604 bits for the mantissa, this may fail (low bits would be lost). */
11605 real_ldexp (&m, &r, point_pos - exponent);
11606 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
11607 mantissa = w.elt (0);
11608 mant_hi = w.elt (1);
11610 /* If there are bits set in the low part of the mantissa, we can't
11611 represent this value. */
11612 if (mantissa != 0)
11613 return -1;
11615 /* Now make it so that mantissa contains the most-significant bits, and move
11616 the point_pos to indicate that the least-significant bits have been
11617 discarded. */
11618 point_pos -= HOST_BITS_PER_WIDE_INT;
11619 mantissa = mant_hi;
11621 /* We can permit four significant bits of mantissa only, plus a high bit
11622 which is always 1. */
11623 mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
11624 if ((mantissa & mask) != 0)
11625 return -1;
11627 /* Now we know the mantissa is in range, chop off the unneeded bits. */
11628 mantissa >>= point_pos - 5;
11630 /* The mantissa may be zero. Disallow that case. (It's possible to load the
11631 floating-point immediate zero with Neon using an integer-zero load, but
11632 that case is handled elsewhere.) */
11633 if (mantissa == 0)
11634 return -1;
11636 gcc_assert (mantissa >= 16 && mantissa <= 31);
11638 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11639 normalized significands are in the range [1, 2). (Our mantissa is shifted
11640 left 4 places at this point relative to normalized IEEE754 values). GCC
11641 internally uses [0.5, 1) (see real.c), so the exponent returned from
11642 REAL_EXP must be altered. */
11643 exponent = 5 - exponent;
11645 if (exponent < 0 || exponent > 7)
11646 return -1;
11648 /* Sign, mantissa and exponent are now in the correct form to plug into the
11649 formula described in the comment above. */
11650 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
11653 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
11655 vfp3_const_double_rtx (rtx x)
11657 if (!TARGET_VFP3)
11658 return 0;
11660 return vfp3_const_double_index (x) != -1;
11663 /* Recognize immediates which can be used in various Neon instructions. Legal
11664 immediates are described by the following table (for VMVN variants, the
11665 bitwise inverse of the constant shown is recognized. In either case, VMOV
11666 is output and the correct instruction to use for a given constant is chosen
11667 by the assembler). The constant shown is replicated across all elements of
11668 the destination vector.
11670 insn elems variant constant (binary)
11671 ---- ----- ------- -----------------
11672 vmov i32 0 00000000 00000000 00000000 abcdefgh
11673 vmov i32 1 00000000 00000000 abcdefgh 00000000
11674 vmov i32 2 00000000 abcdefgh 00000000 00000000
11675 vmov i32 3 abcdefgh 00000000 00000000 00000000
11676 vmov i16 4 00000000 abcdefgh
11677 vmov i16 5 abcdefgh 00000000
11678 vmvn i32 6 00000000 00000000 00000000 abcdefgh
11679 vmvn i32 7 00000000 00000000 abcdefgh 00000000
11680 vmvn i32 8 00000000 abcdefgh 00000000 00000000
11681 vmvn i32 9 abcdefgh 00000000 00000000 00000000
11682 vmvn i16 10 00000000 abcdefgh
11683 vmvn i16 11 abcdefgh 00000000
11684 vmov i32 12 00000000 00000000 abcdefgh 11111111
11685 vmvn i32 13 00000000 00000000 abcdefgh 11111111
11686 vmov i32 14 00000000 abcdefgh 11111111 11111111
11687 vmvn i32 15 00000000 abcdefgh 11111111 11111111
11688 vmov i8 16 abcdefgh
11689 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
11690 eeeeeeee ffffffff gggggggg hhhhhhhh
11691 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
11692 vmov f32 19 00000000 00000000 00000000 00000000
11694 For case 18, B = !b. Representable values are exactly those accepted by
11695 vfp3_const_double_index, but are output as floating-point numbers rather
11696 than indices.
11698 For case 19, we will change it to vmov.i32 when assembling.
11700 Variants 0-5 (inclusive) may also be used as immediates for the second
11701 operand of VORR/VBIC instructions.
11703 The INVERSE argument causes the bitwise inverse of the given operand to be
11704 recognized instead (used for recognizing legal immediates for the VAND/VORN
11705 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11706 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11707 output, rather than the real insns vbic/vorr).
11709 INVERSE makes no difference to the recognition of float vectors.
11711 The return value is the variant of immediate as shown in the above table, or
11712 -1 if the given value doesn't match any of the listed patterns.
11714 static int
11715 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
11716 rtx *modconst, int *elementwidth)
11718 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
11719 matches = 1; \
11720 for (i = 0; i < idx; i += (STRIDE)) \
11721 if (!(TEST)) \
11722 matches = 0; \
11723 if (matches) \
11725 immtype = (CLASS); \
11726 elsize = (ELSIZE); \
11727 break; \
11730 unsigned int i, elsize = 0, idx = 0, n_elts;
11731 unsigned int innersize;
11732 unsigned char bytes[16];
11733 int immtype = -1, matches;
11734 unsigned int invmask = inverse ? 0xff : 0;
11735 bool vector = GET_CODE (op) == CONST_VECTOR;
11737 if (vector)
11738 n_elts = CONST_VECTOR_NUNITS (op);
11739 else
11741 n_elts = 1;
11742 if (mode == VOIDmode)
11743 mode = DImode;
11746 innersize = GET_MODE_UNIT_SIZE (mode);
11748 /* Vectors of float constants. */
11749 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
11751 rtx el0 = CONST_VECTOR_ELT (op, 0);
11753 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
11754 return -1;
11756 /* FP16 vectors cannot be represented. */
11757 if (GET_MODE_INNER (mode) == HFmode)
11758 return -1;
11760 /* All elements in the vector must be the same. Note that 0.0 and -0.0
11761 are distinct in this context. */
11762 if (!const_vec_duplicate_p (op))
11763 return -1;
11765 if (modconst)
11766 *modconst = CONST_VECTOR_ELT (op, 0);
11768 if (elementwidth)
11769 *elementwidth = 0;
11771 if (el0 == CONST0_RTX (GET_MODE (el0)))
11772 return 19;
11773 else
11774 return 18;
11777 /* The tricks done in the code below apply for little-endian vector layout.
11778 For big-endian vectors only allow vectors of the form { a, a, a..., a }.
11779 FIXME: Implement logic for big-endian vectors. */
11780 if (BYTES_BIG_ENDIAN && vector && !const_vec_duplicate_p (op))
11781 return -1;
11783 /* Splat vector constant out into a byte vector. */
11784 for (i = 0; i < n_elts; i++)
11786 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
11787 unsigned HOST_WIDE_INT elpart;
11789 gcc_assert (CONST_INT_P (el));
11790 elpart = INTVAL (el);
11792 for (unsigned int byte = 0; byte < innersize; byte++)
11794 bytes[idx++] = (elpart & 0xff) ^ invmask;
11795 elpart >>= BITS_PER_UNIT;
11799 /* Sanity check. */
11800 gcc_assert (idx == GET_MODE_SIZE (mode));
11804 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
11805 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11807 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11808 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11810 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
11811 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11813 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
11814 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
11816 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
11818 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
11820 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
11821 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11823 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11824 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11826 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
11827 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11829 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
11830 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
11832 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
11834 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
11836 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11837 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11839 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11840 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11842 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
11843 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11845 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
11846 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11848 CHECK (1, 8, 16, bytes[i] == bytes[0]);
11850 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
11851 && bytes[i] == bytes[(i + 8) % idx]);
11853 while (0);
11855 if (immtype == -1)
11856 return -1;
11858 if (elementwidth)
11859 *elementwidth = elsize;
11861 if (modconst)
11863 unsigned HOST_WIDE_INT imm = 0;
11865 /* Un-invert bytes of recognized vector, if necessary. */
11866 if (invmask != 0)
11867 for (i = 0; i < idx; i++)
11868 bytes[i] ^= invmask;
11870 if (immtype == 17)
11872 /* FIXME: Broken on 32-bit H_W_I hosts. */
11873 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
11875 for (i = 0; i < 8; i++)
11876 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
11877 << (i * BITS_PER_UNIT);
11879 *modconst = GEN_INT (imm);
11881 else
11883 unsigned HOST_WIDE_INT imm = 0;
11885 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
11886 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
11888 *modconst = GEN_INT (imm);
11892 return immtype;
11893 #undef CHECK
11896 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
11897 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
11898 float elements), and a modified constant (whatever should be output for a
11899 VMOV) in *MODCONST. */
11902 neon_immediate_valid_for_move (rtx op, machine_mode mode,
11903 rtx *modconst, int *elementwidth)
11905 rtx tmpconst;
11906 int tmpwidth;
11907 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
11909 if (retval == -1)
11910 return 0;
11912 if (modconst)
11913 *modconst = tmpconst;
11915 if (elementwidth)
11916 *elementwidth = tmpwidth;
11918 return 1;
11921 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
11922 the immediate is valid, write a constant suitable for using as an operand
11923 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
11924 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
11927 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
11928 rtx *modconst, int *elementwidth)
11930 rtx tmpconst;
11931 int tmpwidth;
11932 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
11934 if (retval < 0 || retval > 5)
11935 return 0;
11937 if (modconst)
11938 *modconst = tmpconst;
11940 if (elementwidth)
11941 *elementwidth = tmpwidth;
11943 return 1;
11946 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
11947 the immediate is valid, write a constant suitable for using as an operand
11948 to VSHR/VSHL to *MODCONST and the corresponding element width to
11949 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
11950 because they have different limitations. */
11953 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
11954 rtx *modconst, int *elementwidth,
11955 bool isleftshift)
11957 unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
11958 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
11959 unsigned HOST_WIDE_INT last_elt = 0;
11960 unsigned HOST_WIDE_INT maxshift;
11962 /* Split vector constant out into a byte vector. */
11963 for (i = 0; i < n_elts; i++)
11965 rtx el = CONST_VECTOR_ELT (op, i);
11966 unsigned HOST_WIDE_INT elpart;
11968 if (CONST_INT_P (el))
11969 elpart = INTVAL (el);
11970 else if (CONST_DOUBLE_P (el))
11971 return 0;
11972 else
11973 gcc_unreachable ();
11975 if (i != 0 && elpart != last_elt)
11976 return 0;
11978 last_elt = elpart;
11981 /* Shift less than element size. */
11982 maxshift = innersize * 8;
11984 if (isleftshift)
11986 /* Left shift immediate value can be from 0 to <size>-1. */
11987 if (last_elt >= maxshift)
11988 return 0;
11990 else
11992 /* Right shift immediate value can be from 1 to <size>. */
11993 if (last_elt == 0 || last_elt > maxshift)
11994 return 0;
11997 if (elementwidth)
11998 *elementwidth = innersize * 8;
12000 if (modconst)
12001 *modconst = CONST_VECTOR_ELT (op, 0);
12003 return 1;
12006 /* Return a string suitable for output of Neon immediate logic operation
12007 MNEM. */
12009 char *
12010 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12011 int inverse, int quad)
12013 int width, is_valid;
12014 static char templ[40];
12016 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12018 gcc_assert (is_valid != 0);
12020 if (quad)
12021 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12022 else
12023 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12025 return templ;
12028 /* Return a string suitable for output of Neon immediate shift operation
12029 (VSHR or VSHL) MNEM. */
12031 char *
12032 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12033 machine_mode mode, int quad,
12034 bool isleftshift)
12036 int width, is_valid;
12037 static char templ[40];
12039 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12040 gcc_assert (is_valid != 0);
12042 if (quad)
12043 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12044 else
12045 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12047 return templ;
12050 /* Output a sequence of pairwise operations to implement a reduction.
12051 NOTE: We do "too much work" here, because pairwise operations work on two
12052 registers-worth of operands in one go. Unfortunately we can't exploit those
12053 extra calculations to do the full operation in fewer steps, I don't think.
12054 Although all vector elements of the result but the first are ignored, we
12055 actually calculate the same result in each of the elements. An alternative
12056 such as initially loading a vector with zero to use as each of the second
12057 operands would use up an additional register and take an extra instruction,
12058 for no particular gain. */
12060 void
12061 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12062 rtx (*reduc) (rtx, rtx, rtx))
12064 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
12065 rtx tmpsum = op1;
12067 for (i = parts / 2; i >= 1; i /= 2)
12069 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12070 emit_insn (reduc (dest, tmpsum, tmpsum));
12071 tmpsum = dest;
12075 /* If VALS is a vector constant that can be loaded into a register
12076 using VDUP, generate instructions to do so and return an RTX to
12077 assign to the register. Otherwise return NULL_RTX. */
12079 static rtx
12080 neon_vdup_constant (rtx vals)
12082 machine_mode mode = GET_MODE (vals);
12083 machine_mode inner_mode = GET_MODE_INNER (mode);
12084 rtx x;
12086 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12087 return NULL_RTX;
12089 if (!const_vec_duplicate_p (vals, &x))
12090 /* The elements are not all the same. We could handle repeating
12091 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12092 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12093 vdup.i16). */
12094 return NULL_RTX;
12096 /* We can load this constant by using VDUP and a constant in a
12097 single ARM register. This will be cheaper than a vector
12098 load. */
12100 x = copy_to_mode_reg (inner_mode, x);
12101 return gen_rtx_VEC_DUPLICATE (mode, x);
12104 /* Generate code to load VALS, which is a PARALLEL containing only
12105 constants (for vec_init) or CONST_VECTOR, efficiently into a
12106 register. Returns an RTX to copy into the register, or NULL_RTX
12107 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12110 neon_make_constant (rtx vals)
12112 machine_mode mode = GET_MODE (vals);
12113 rtx target;
12114 rtx const_vec = NULL_RTX;
12115 int n_elts = GET_MODE_NUNITS (mode);
12116 int n_const = 0;
12117 int i;
12119 if (GET_CODE (vals) == CONST_VECTOR)
12120 const_vec = vals;
12121 else if (GET_CODE (vals) == PARALLEL)
12123 /* A CONST_VECTOR must contain only CONST_INTs and
12124 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12125 Only store valid constants in a CONST_VECTOR. */
12126 for (i = 0; i < n_elts; ++i)
12128 rtx x = XVECEXP (vals, 0, i);
12129 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12130 n_const++;
12132 if (n_const == n_elts)
12133 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12135 else
12136 gcc_unreachable ();
12138 if (const_vec != NULL
12139 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12140 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12141 return const_vec;
12142 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12143 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12144 pipeline cycle; creating the constant takes one or two ARM
12145 pipeline cycles. */
12146 return target;
12147 else if (const_vec != NULL_RTX)
12148 /* Load from constant pool. On Cortex-A8 this takes two cycles
12149 (for either double or quad vectors). We can not take advantage
12150 of single-cycle VLD1 because we need a PC-relative addressing
12151 mode. */
12152 return const_vec;
12153 else
12154 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12155 We can not construct an initializer. */
12156 return NULL_RTX;
12159 /* Initialize vector TARGET to VALS. */
12161 void
12162 neon_expand_vector_init (rtx target, rtx vals)
12164 machine_mode mode = GET_MODE (target);
12165 machine_mode inner_mode = GET_MODE_INNER (mode);
12166 int n_elts = GET_MODE_NUNITS (mode);
12167 int n_var = 0, one_var = -1;
12168 bool all_same = true;
12169 rtx x, mem;
12170 int i;
12172 for (i = 0; i < n_elts; ++i)
12174 x = XVECEXP (vals, 0, i);
12175 if (!CONSTANT_P (x))
12176 ++n_var, one_var = i;
12178 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12179 all_same = false;
12182 if (n_var == 0)
12184 rtx constant = neon_make_constant (vals);
12185 if (constant != NULL_RTX)
12187 emit_move_insn (target, constant);
12188 return;
12192 /* Splat a single non-constant element if we can. */
12193 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12195 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12196 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
12197 return;
12200 /* One field is non-constant. Load constant then overwrite varying
12201 field. This is more efficient than using the stack. */
12202 if (n_var == 1)
12204 rtx copy = copy_rtx (vals);
12205 rtx index = GEN_INT (one_var);
12207 /* Load constant part of vector, substitute neighboring value for
12208 varying element. */
12209 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12210 neon_expand_vector_init (target, copy);
12212 /* Insert variable. */
12213 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12214 switch (mode)
12216 case V8QImode:
12217 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12218 break;
12219 case V16QImode:
12220 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12221 break;
12222 case V4HImode:
12223 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12224 break;
12225 case V8HImode:
12226 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12227 break;
12228 case V2SImode:
12229 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12230 break;
12231 case V4SImode:
12232 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12233 break;
12234 case V2SFmode:
12235 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12236 break;
12237 case V4SFmode:
12238 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12239 break;
12240 case V2DImode:
12241 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12242 break;
12243 default:
12244 gcc_unreachable ();
12246 return;
12249 /* Construct the vector in memory one field at a time
12250 and load the whole vector. */
12251 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12252 for (i = 0; i < n_elts; i++)
12253 emit_move_insn (adjust_address_nv (mem, inner_mode,
12254 i * GET_MODE_SIZE (inner_mode)),
12255 XVECEXP (vals, 0, i));
12256 emit_move_insn (target, mem);
12259 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12260 ERR if it doesn't. EXP indicates the source location, which includes the
12261 inlining history for intrinsics. */
12263 static void
12264 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12265 const_tree exp, const char *desc)
12267 HOST_WIDE_INT lane;
12269 gcc_assert (CONST_INT_P (operand));
12271 lane = INTVAL (operand);
12273 if (lane < low || lane >= high)
12275 if (exp)
12276 error ("%K%s %wd out of range %wd - %wd",
12277 exp, desc, lane, low, high - 1);
12278 else
12279 error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
12283 /* Bounds-check lanes. */
12285 void
12286 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12287 const_tree exp)
12289 bounds_check (operand, low, high, exp, "lane");
12292 /* Bounds-check constants. */
12294 void
12295 arm_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12297 bounds_check (operand, low, high, NULL_TREE, "constant");
12300 HOST_WIDE_INT
12301 neon_element_bits (machine_mode mode)
12303 return GET_MODE_UNIT_BITSIZE (mode);
12307 /* Predicates for `match_operand' and `match_operator'. */
12309 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12310 WB is true if full writeback address modes are allowed and is false
12311 if limited writeback address modes (POST_INC and PRE_DEC) are
12312 allowed. */
12315 arm_coproc_mem_operand (rtx op, bool wb)
12317 rtx ind;
12319 /* Reject eliminable registers. */
12320 if (! (reload_in_progress || reload_completed || lra_in_progress)
12321 && ( reg_mentioned_p (frame_pointer_rtx, op)
12322 || reg_mentioned_p (arg_pointer_rtx, op)
12323 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12324 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12325 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12326 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12327 return FALSE;
12329 /* Constants are converted into offsets from labels. */
12330 if (!MEM_P (op))
12331 return FALSE;
12333 ind = XEXP (op, 0);
12335 if (reload_completed
12336 && (GET_CODE (ind) == LABEL_REF
12337 || (GET_CODE (ind) == CONST
12338 && GET_CODE (XEXP (ind, 0)) == PLUS
12339 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12340 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12341 return TRUE;
12343 /* Match: (mem (reg)). */
12344 if (REG_P (ind))
12345 return arm_address_register_rtx_p (ind, 0);
12347 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12348 acceptable in any case (subject to verification by
12349 arm_address_register_rtx_p). We need WB to be true to accept
12350 PRE_INC and POST_DEC. */
12351 if (GET_CODE (ind) == POST_INC
12352 || GET_CODE (ind) == PRE_DEC
12353 || (wb
12354 && (GET_CODE (ind) == PRE_INC
12355 || GET_CODE (ind) == POST_DEC)))
12356 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12358 if (wb
12359 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12360 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12361 && GET_CODE (XEXP (ind, 1)) == PLUS
12362 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12363 ind = XEXP (ind, 1);
12365 /* Match:
12366 (plus (reg)
12367 (const)). */
12368 if (GET_CODE (ind) == PLUS
12369 && REG_P (XEXP (ind, 0))
12370 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12371 && CONST_INT_P (XEXP (ind, 1))
12372 && INTVAL (XEXP (ind, 1)) > -1024
12373 && INTVAL (XEXP (ind, 1)) < 1024
12374 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12375 return TRUE;
12377 return FALSE;
12380 /* Return TRUE if OP is a memory operand which we can load or store a vector
12381 to/from. TYPE is one of the following values:
12382 0 - Vector load/stor (vldr)
12383 1 - Core registers (ldm)
12384 2 - Element/structure loads (vld1)
12387 neon_vector_mem_operand (rtx op, int type, bool strict)
12389 rtx ind;
12391 /* Reject eliminable registers. */
12392 if (strict && ! (reload_in_progress || reload_completed)
12393 && (reg_mentioned_p (frame_pointer_rtx, op)
12394 || reg_mentioned_p (arg_pointer_rtx, op)
12395 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12396 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12397 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12398 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12399 return FALSE;
12401 /* Constants are converted into offsets from labels. */
12402 if (!MEM_P (op))
12403 return FALSE;
12405 ind = XEXP (op, 0);
12407 if (reload_completed
12408 && (GET_CODE (ind) == LABEL_REF
12409 || (GET_CODE (ind) == CONST
12410 && GET_CODE (XEXP (ind, 0)) == PLUS
12411 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12412 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12413 return TRUE;
12415 /* Match: (mem (reg)). */
12416 if (REG_P (ind))
12417 return arm_address_register_rtx_p (ind, 0);
12419 /* Allow post-increment with Neon registers. */
12420 if ((type != 1 && GET_CODE (ind) == POST_INC)
12421 || (type == 0 && GET_CODE (ind) == PRE_DEC))
12422 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12424 /* Allow post-increment by register for VLDn */
12425 if (type == 2 && GET_CODE (ind) == POST_MODIFY
12426 && GET_CODE (XEXP (ind, 1)) == PLUS
12427 && REG_P (XEXP (XEXP (ind, 1), 1)))
12428 return true;
12430 /* Match:
12431 (plus (reg)
12432 (const)). */
12433 if (type == 0
12434 && GET_CODE (ind) == PLUS
12435 && REG_P (XEXP (ind, 0))
12436 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12437 && CONST_INT_P (XEXP (ind, 1))
12438 && INTVAL (XEXP (ind, 1)) > -1024
12439 /* For quad modes, we restrict the constant offset to be slightly less
12440 than what the instruction format permits. We have no such constraint
12441 on double mode offsets. (This must match arm_legitimate_index_p.) */
12442 && (INTVAL (XEXP (ind, 1))
12443 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12444 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12445 return TRUE;
12447 return FALSE;
12450 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12451 type. */
12453 neon_struct_mem_operand (rtx op)
12455 rtx ind;
12457 /* Reject eliminable registers. */
12458 if (! (reload_in_progress || reload_completed)
12459 && ( reg_mentioned_p (frame_pointer_rtx, op)
12460 || reg_mentioned_p (arg_pointer_rtx, op)
12461 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12462 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12463 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12464 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12465 return FALSE;
12467 /* Constants are converted into offsets from labels. */
12468 if (!MEM_P (op))
12469 return FALSE;
12471 ind = XEXP (op, 0);
12473 if (reload_completed
12474 && (GET_CODE (ind) == LABEL_REF
12475 || (GET_CODE (ind) == CONST
12476 && GET_CODE (XEXP (ind, 0)) == PLUS
12477 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12478 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12479 return TRUE;
12481 /* Match: (mem (reg)). */
12482 if (REG_P (ind))
12483 return arm_address_register_rtx_p (ind, 0);
12485 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12486 if (GET_CODE (ind) == POST_INC
12487 || GET_CODE (ind) == PRE_DEC)
12488 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12490 return FALSE;
12493 /* Return true if X is a register that will be eliminated later on. */
12495 arm_eliminable_register (rtx x)
12497 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
12498 || REGNO (x) == ARG_POINTER_REGNUM
12499 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
12500 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
12503 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12504 coprocessor registers. Otherwise return NO_REGS. */
12506 enum reg_class
12507 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
12509 if (mode == HFmode)
12511 if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
12512 return GENERAL_REGS;
12513 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
12514 return NO_REGS;
12515 return GENERAL_REGS;
12518 /* The neon move patterns handle all legitimate vector and struct
12519 addresses. */
12520 if (TARGET_NEON
12521 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
12522 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
12523 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
12524 || VALID_NEON_STRUCT_MODE (mode)))
12525 return NO_REGS;
12527 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
12528 return NO_REGS;
12530 return GENERAL_REGS;
12533 /* Values which must be returned in the most-significant end of the return
12534 register. */
12536 static bool
12537 arm_return_in_msb (const_tree valtype)
12539 return (TARGET_AAPCS_BASED
12540 && BYTES_BIG_ENDIAN
12541 && (AGGREGATE_TYPE_P (valtype)
12542 || TREE_CODE (valtype) == COMPLEX_TYPE
12543 || FIXED_POINT_TYPE_P (valtype)));
12546 /* Return TRUE if X references a SYMBOL_REF. */
12548 symbol_mentioned_p (rtx x)
12550 const char * fmt;
12551 int i;
12553 if (GET_CODE (x) == SYMBOL_REF)
12554 return 1;
12556 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12557 are constant offsets, not symbols. */
12558 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12559 return 0;
12561 fmt = GET_RTX_FORMAT (GET_CODE (x));
12563 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12565 if (fmt[i] == 'E')
12567 int j;
12569 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12570 if (symbol_mentioned_p (XVECEXP (x, i, j)))
12571 return 1;
12573 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
12574 return 1;
12577 return 0;
12580 /* Return TRUE if X references a LABEL_REF. */
12582 label_mentioned_p (rtx x)
12584 const char * fmt;
12585 int i;
12587 if (GET_CODE (x) == LABEL_REF)
12588 return 1;
12590 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12591 instruction, but they are constant offsets, not symbols. */
12592 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12593 return 0;
12595 fmt = GET_RTX_FORMAT (GET_CODE (x));
12596 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12598 if (fmt[i] == 'E')
12600 int j;
12602 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12603 if (label_mentioned_p (XVECEXP (x, i, j)))
12604 return 1;
12606 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
12607 return 1;
12610 return 0;
12614 tls_mentioned_p (rtx x)
12616 switch (GET_CODE (x))
12618 case CONST:
12619 return tls_mentioned_p (XEXP (x, 0));
12621 case UNSPEC:
12622 if (XINT (x, 1) == UNSPEC_TLS)
12623 return 1;
12625 /* Fall through. */
12626 default:
12627 return 0;
12631 /* Must not copy any rtx that uses a pc-relative address.
12632 Also, disallow copying of load-exclusive instructions that
12633 may appear after splitting of compare-and-swap-style operations
12634 so as to prevent those loops from being transformed away from their
12635 canonical forms (see PR 69904). */
12637 static bool
12638 arm_cannot_copy_insn_p (rtx_insn *insn)
12640 /* The tls call insn cannot be copied, as it is paired with a data
12641 word. */
12642 if (recog_memoized (insn) == CODE_FOR_tlscall)
12643 return true;
12645 subrtx_iterator::array_type array;
12646 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
12648 const_rtx x = *iter;
12649 if (GET_CODE (x) == UNSPEC
12650 && (XINT (x, 1) == UNSPEC_PIC_BASE
12651 || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
12652 return true;
12655 rtx set = single_set (insn);
12656 if (set)
12658 rtx src = SET_SRC (set);
12659 if (GET_CODE (src) == ZERO_EXTEND)
12660 src = XEXP (src, 0);
12662 /* Catch the load-exclusive and load-acquire operations. */
12663 if (GET_CODE (src) == UNSPEC_VOLATILE
12664 && (XINT (src, 1) == VUNSPEC_LL
12665 || XINT (src, 1) == VUNSPEC_LAX))
12666 return true;
12668 return false;
12671 enum rtx_code
12672 minmax_code (rtx x)
12674 enum rtx_code code = GET_CODE (x);
12676 switch (code)
12678 case SMAX:
12679 return GE;
12680 case SMIN:
12681 return LE;
12682 case UMIN:
12683 return LEU;
12684 case UMAX:
12685 return GEU;
12686 default:
12687 gcc_unreachable ();
12691 /* Match pair of min/max operators that can be implemented via usat/ssat. */
12693 bool
12694 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
12695 int *mask, bool *signed_sat)
12697 /* The high bound must be a power of two minus one. */
12698 int log = exact_log2 (INTVAL (hi_bound) + 1);
12699 if (log == -1)
12700 return false;
12702 /* The low bound is either zero (for usat) or one less than the
12703 negation of the high bound (for ssat). */
12704 if (INTVAL (lo_bound) == 0)
12706 if (mask)
12707 *mask = log;
12708 if (signed_sat)
12709 *signed_sat = false;
12711 return true;
12714 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
12716 if (mask)
12717 *mask = log + 1;
12718 if (signed_sat)
12719 *signed_sat = true;
12721 return true;
12724 return false;
12727 /* Return 1 if memory locations are adjacent. */
12729 adjacent_mem_locations (rtx a, rtx b)
12731 /* We don't guarantee to preserve the order of these memory refs. */
12732 if (volatile_refs_p (a) || volatile_refs_p (b))
12733 return 0;
12735 if ((REG_P (XEXP (a, 0))
12736 || (GET_CODE (XEXP (a, 0)) == PLUS
12737 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
12738 && (REG_P (XEXP (b, 0))
12739 || (GET_CODE (XEXP (b, 0)) == PLUS
12740 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
12742 HOST_WIDE_INT val0 = 0, val1 = 0;
12743 rtx reg0, reg1;
12744 int val_diff;
12746 if (GET_CODE (XEXP (a, 0)) == PLUS)
12748 reg0 = XEXP (XEXP (a, 0), 0);
12749 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
12751 else
12752 reg0 = XEXP (a, 0);
12754 if (GET_CODE (XEXP (b, 0)) == PLUS)
12756 reg1 = XEXP (XEXP (b, 0), 0);
12757 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
12759 else
12760 reg1 = XEXP (b, 0);
12762 /* Don't accept any offset that will require multiple
12763 instructions to handle, since this would cause the
12764 arith_adjacentmem pattern to output an overlong sequence. */
12765 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
12766 return 0;
12768 /* Don't allow an eliminable register: register elimination can make
12769 the offset too large. */
12770 if (arm_eliminable_register (reg0))
12771 return 0;
12773 val_diff = val1 - val0;
12775 if (arm_ld_sched)
12777 /* If the target has load delay slots, then there's no benefit
12778 to using an ldm instruction unless the offset is zero and
12779 we are optimizing for size. */
12780 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
12781 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
12782 && (val_diff == 4 || val_diff == -4));
12785 return ((REGNO (reg0) == REGNO (reg1))
12786 && (val_diff == 4 || val_diff == -4));
12789 return 0;
12792 /* Return true if OP is a valid load or store multiple operation. LOAD is true
12793 for load operations, false for store operations. CONSECUTIVE is true
12794 if the register numbers in the operation must be consecutive in the register
12795 bank. RETURN_PC is true if value is to be loaded in PC.
12796 The pattern we are trying to match for load is:
12797 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
12798 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
12801 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
12803 where
12804 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
12805 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
12806 3. If consecutive is TRUE, then for kth register being loaded,
12807 REGNO (R_dk) = REGNO (R_d0) + k.
12808 The pattern for store is similar. */
12809 bool
12810 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
12811 bool consecutive, bool return_pc)
12813 HOST_WIDE_INT count = XVECLEN (op, 0);
12814 rtx reg, mem, addr;
12815 unsigned regno;
12816 unsigned first_regno;
12817 HOST_WIDE_INT i = 1, base = 0, offset = 0;
12818 rtx elt;
12819 bool addr_reg_in_reglist = false;
12820 bool update = false;
12821 int reg_increment;
12822 int offset_adj;
12823 int regs_per_val;
12825 /* If not in SImode, then registers must be consecutive
12826 (e.g., VLDM instructions for DFmode). */
12827 gcc_assert ((mode == SImode) || consecutive);
12828 /* Setting return_pc for stores is illegal. */
12829 gcc_assert (!return_pc || load);
12831 /* Set up the increments and the regs per val based on the mode. */
12832 reg_increment = GET_MODE_SIZE (mode);
12833 regs_per_val = reg_increment / 4;
12834 offset_adj = return_pc ? 1 : 0;
12836 if (count <= 1
12837 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
12838 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
12839 return false;
12841 /* Check if this is a write-back. */
12842 elt = XVECEXP (op, 0, offset_adj);
12843 if (GET_CODE (SET_SRC (elt)) == PLUS)
12845 i++;
12846 base = 1;
12847 update = true;
12849 /* The offset adjustment must be the number of registers being
12850 popped times the size of a single register. */
12851 if (!REG_P (SET_DEST (elt))
12852 || !REG_P (XEXP (SET_SRC (elt), 0))
12853 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
12854 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
12855 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
12856 ((count - 1 - offset_adj) * reg_increment))
12857 return false;
12860 i = i + offset_adj;
12861 base = base + offset_adj;
12862 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
12863 success depends on the type: VLDM can do just one reg,
12864 LDM must do at least two. */
12865 if ((count <= i) && (mode == SImode))
12866 return false;
12868 elt = XVECEXP (op, 0, i - 1);
12869 if (GET_CODE (elt) != SET)
12870 return false;
12872 if (load)
12874 reg = SET_DEST (elt);
12875 mem = SET_SRC (elt);
12877 else
12879 reg = SET_SRC (elt);
12880 mem = SET_DEST (elt);
12883 if (!REG_P (reg) || !MEM_P (mem))
12884 return false;
12886 regno = REGNO (reg);
12887 first_regno = regno;
12888 addr = XEXP (mem, 0);
12889 if (GET_CODE (addr) == PLUS)
12891 if (!CONST_INT_P (XEXP (addr, 1)))
12892 return false;
12894 offset = INTVAL (XEXP (addr, 1));
12895 addr = XEXP (addr, 0);
12898 if (!REG_P (addr))
12899 return false;
12901 /* Don't allow SP to be loaded unless it is also the base register. It
12902 guarantees that SP is reset correctly when an LDM instruction
12903 is interrupted. Otherwise, we might end up with a corrupt stack. */
12904 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
12905 return false;
12907 for (; i < count; i++)
12909 elt = XVECEXP (op, 0, i);
12910 if (GET_CODE (elt) != SET)
12911 return false;
12913 if (load)
12915 reg = SET_DEST (elt);
12916 mem = SET_SRC (elt);
12918 else
12920 reg = SET_SRC (elt);
12921 mem = SET_DEST (elt);
12924 if (!REG_P (reg)
12925 || GET_MODE (reg) != mode
12926 || REGNO (reg) <= regno
12927 || (consecutive
12928 && (REGNO (reg) !=
12929 (unsigned int) (first_regno + regs_per_val * (i - base))))
12930 /* Don't allow SP to be loaded unless it is also the base register. It
12931 guarantees that SP is reset correctly when an LDM instruction
12932 is interrupted. Otherwise, we might end up with a corrupt stack. */
12933 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
12934 || !MEM_P (mem)
12935 || GET_MODE (mem) != mode
12936 || ((GET_CODE (XEXP (mem, 0)) != PLUS
12937 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
12938 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
12939 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
12940 offset + (i - base) * reg_increment))
12941 && (!REG_P (XEXP (mem, 0))
12942 || offset + (i - base) * reg_increment != 0)))
12943 return false;
12945 regno = REGNO (reg);
12946 if (regno == REGNO (addr))
12947 addr_reg_in_reglist = true;
12950 if (load)
12952 if (update && addr_reg_in_reglist)
12953 return false;
12955 /* For Thumb-1, address register is always modified - either by write-back
12956 or by explicit load. If the pattern does not describe an update,
12957 then the address register must be in the list of loaded registers. */
12958 if (TARGET_THUMB1)
12959 return update || addr_reg_in_reglist;
12962 return true;
12965 /* Return true iff it would be profitable to turn a sequence of NOPS loads
12966 or stores (depending on IS_STORE) into a load-multiple or store-multiple
12967 instruction. ADD_OFFSET is nonzero if the base address register needs
12968 to be modified with an add instruction before we can use it. */
12970 static bool
12971 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
12972 int nops, HOST_WIDE_INT add_offset)
12974 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
12975 if the offset isn't small enough. The reason 2 ldrs are faster
12976 is because these ARMs are able to do more than one cache access
12977 in a single cycle. The ARM9 and StrongARM have Harvard caches,
12978 whilst the ARM8 has a double bandwidth cache. This means that
12979 these cores can do both an instruction fetch and a data fetch in
12980 a single cycle, so the trick of calculating the address into a
12981 scratch register (one of the result regs) and then doing a load
12982 multiple actually becomes slower (and no smaller in code size).
12983 That is the transformation
12985 ldr rd1, [rbase + offset]
12986 ldr rd2, [rbase + offset + 4]
12990 add rd1, rbase, offset
12991 ldmia rd1, {rd1, rd2}
12993 produces worse code -- '3 cycles + any stalls on rd2' instead of
12994 '2 cycles + any stalls on rd2'. On ARMs with only one cache
12995 access per cycle, the first sequence could never complete in less
12996 than 6 cycles, whereas the ldm sequence would only take 5 and
12997 would make better use of sequential accesses if not hitting the
12998 cache.
13000 We cheat here and test 'arm_ld_sched' which we currently know to
13001 only be true for the ARM8, ARM9 and StrongARM. If this ever
13002 changes, then the test below needs to be reworked. */
13003 if (nops == 2 && arm_ld_sched && add_offset != 0)
13004 return false;
13006 /* XScale has load-store double instructions, but they have stricter
13007 alignment requirements than load-store multiple, so we cannot
13008 use them.
13010 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13011 the pipeline until completion.
13013 NREGS CYCLES
13019 An ldr instruction takes 1-3 cycles, but does not block the
13020 pipeline.
13022 NREGS CYCLES
13023 1 1-3
13024 2 2-6
13025 3 3-9
13026 4 4-12
13028 Best case ldr will always win. However, the more ldr instructions
13029 we issue, the less likely we are to be able to schedule them well.
13030 Using ldr instructions also increases code size.
13032 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13033 for counts of 3 or 4 regs. */
13034 if (nops <= 2 && arm_tune_xscale && !optimize_size)
13035 return false;
13036 return true;
13039 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13040 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13041 an array ORDER which describes the sequence to use when accessing the
13042 offsets that produces an ascending order. In this sequence, each
13043 offset must be larger by exactly 4 than the previous one. ORDER[0]
13044 must have been filled in with the lowest offset by the caller.
13045 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13046 we use to verify that ORDER produces an ascending order of registers.
13047 Return true if it was possible to construct such an order, false if
13048 not. */
13050 static bool
13051 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13052 int *unsorted_regs)
13054 int i;
13055 for (i = 1; i < nops; i++)
13057 int j;
13059 order[i] = order[i - 1];
13060 for (j = 0; j < nops; j++)
13061 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13063 /* We must find exactly one offset that is higher than the
13064 previous one by 4. */
13065 if (order[i] != order[i - 1])
13066 return false;
13067 order[i] = j;
13069 if (order[i] == order[i - 1])
13070 return false;
13071 /* The register numbers must be ascending. */
13072 if (unsorted_regs != NULL
13073 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13074 return false;
13076 return true;
13079 /* Used to determine in a peephole whether a sequence of load
13080 instructions can be changed into a load-multiple instruction.
13081 NOPS is the number of separate load instructions we are examining. The
13082 first NOPS entries in OPERANDS are the destination registers, the
13083 next NOPS entries are memory operands. If this function is
13084 successful, *BASE is set to the common base register of the memory
13085 accesses; *LOAD_OFFSET is set to the first memory location's offset
13086 from that base register.
13087 REGS is an array filled in with the destination register numbers.
13088 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13089 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13090 the sequence of registers in REGS matches the loads from ascending memory
13091 locations, and the function verifies that the register numbers are
13092 themselves ascending. If CHECK_REGS is false, the register numbers
13093 are stored in the order they are found in the operands. */
13094 static int
13095 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13096 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13098 int unsorted_regs[MAX_LDM_STM_OPS];
13099 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13100 int order[MAX_LDM_STM_OPS];
13101 rtx base_reg_rtx = NULL;
13102 int base_reg = -1;
13103 int i, ldm_case;
13105 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13106 easily extended if required. */
13107 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13109 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13111 /* Loop over the operands and check that the memory references are
13112 suitable (i.e. immediate offsets from the same base register). At
13113 the same time, extract the target register, and the memory
13114 offsets. */
13115 for (i = 0; i < nops; i++)
13117 rtx reg;
13118 rtx offset;
13120 /* Convert a subreg of a mem into the mem itself. */
13121 if (GET_CODE (operands[nops + i]) == SUBREG)
13122 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13124 gcc_assert (MEM_P (operands[nops + i]));
13126 /* Don't reorder volatile memory references; it doesn't seem worth
13127 looking for the case where the order is ok anyway. */
13128 if (MEM_VOLATILE_P (operands[nops + i]))
13129 return 0;
13131 offset = const0_rtx;
13133 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13134 || (GET_CODE (reg) == SUBREG
13135 && REG_P (reg = SUBREG_REG (reg))))
13136 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13137 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13138 || (GET_CODE (reg) == SUBREG
13139 && REG_P (reg = SUBREG_REG (reg))))
13140 && (CONST_INT_P (offset
13141 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13143 if (i == 0)
13145 base_reg = REGNO (reg);
13146 base_reg_rtx = reg;
13147 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13148 return 0;
13150 else if (base_reg != (int) REGNO (reg))
13151 /* Not addressed from the same base register. */
13152 return 0;
13154 unsorted_regs[i] = (REG_P (operands[i])
13155 ? REGNO (operands[i])
13156 : REGNO (SUBREG_REG (operands[i])));
13158 /* If it isn't an integer register, or if it overwrites the
13159 base register but isn't the last insn in the list, then
13160 we can't do this. */
13161 if (unsorted_regs[i] < 0
13162 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13163 || unsorted_regs[i] > 14
13164 || (i != nops - 1 && unsorted_regs[i] == base_reg))
13165 return 0;
13167 /* Don't allow SP to be loaded unless it is also the base
13168 register. It guarantees that SP is reset correctly when
13169 an LDM instruction is interrupted. Otherwise, we might
13170 end up with a corrupt stack. */
13171 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13172 return 0;
13174 unsorted_offsets[i] = INTVAL (offset);
13175 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13176 order[0] = i;
13178 else
13179 /* Not a suitable memory address. */
13180 return 0;
13183 /* All the useful information has now been extracted from the
13184 operands into unsorted_regs and unsorted_offsets; additionally,
13185 order[0] has been set to the lowest offset in the list. Sort
13186 the offsets into order, verifying that they are adjacent, and
13187 check that the register numbers are ascending. */
13188 if (!compute_offset_order (nops, unsorted_offsets, order,
13189 check_regs ? unsorted_regs : NULL))
13190 return 0;
13192 if (saved_order)
13193 memcpy (saved_order, order, sizeof order);
13195 if (base)
13197 *base = base_reg;
13199 for (i = 0; i < nops; i++)
13200 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13202 *load_offset = unsorted_offsets[order[0]];
13205 if (TARGET_THUMB1
13206 && !peep2_reg_dead_p (nops, base_reg_rtx))
13207 return 0;
13209 if (unsorted_offsets[order[0]] == 0)
13210 ldm_case = 1; /* ldmia */
13211 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13212 ldm_case = 2; /* ldmib */
13213 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13214 ldm_case = 3; /* ldmda */
13215 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13216 ldm_case = 4; /* ldmdb */
13217 else if (const_ok_for_arm (unsorted_offsets[order[0]])
13218 || const_ok_for_arm (-unsorted_offsets[order[0]]))
13219 ldm_case = 5;
13220 else
13221 return 0;
13223 if (!multiple_operation_profitable_p (false, nops,
13224 ldm_case == 5
13225 ? unsorted_offsets[order[0]] : 0))
13226 return 0;
13228 return ldm_case;
13231 /* Used to determine in a peephole whether a sequence of store instructions can
13232 be changed into a store-multiple instruction.
13233 NOPS is the number of separate store instructions we are examining.
13234 NOPS_TOTAL is the total number of instructions recognized by the peephole
13235 pattern.
13236 The first NOPS entries in OPERANDS are the source registers, the next
13237 NOPS entries are memory operands. If this function is successful, *BASE is
13238 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13239 to the first memory location's offset from that base register. REGS is an
13240 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13241 likewise filled with the corresponding rtx's.
13242 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13243 numbers to an ascending order of stores.
13244 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13245 from ascending memory locations, and the function verifies that the register
13246 numbers are themselves ascending. If CHECK_REGS is false, the register
13247 numbers are stored in the order they are found in the operands. */
13248 static int
13249 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13250 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13251 HOST_WIDE_INT *load_offset, bool check_regs)
13253 int unsorted_regs[MAX_LDM_STM_OPS];
13254 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13255 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13256 int order[MAX_LDM_STM_OPS];
13257 int base_reg = -1;
13258 rtx base_reg_rtx = NULL;
13259 int i, stm_case;
13261 /* Write back of base register is currently only supported for Thumb 1. */
13262 int base_writeback = TARGET_THUMB1;
13264 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13265 easily extended if required. */
13266 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13268 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13270 /* Loop over the operands and check that the memory references are
13271 suitable (i.e. immediate offsets from the same base register). At
13272 the same time, extract the target register, and the memory
13273 offsets. */
13274 for (i = 0; i < nops; i++)
13276 rtx reg;
13277 rtx offset;
13279 /* Convert a subreg of a mem into the mem itself. */
13280 if (GET_CODE (operands[nops + i]) == SUBREG)
13281 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13283 gcc_assert (MEM_P (operands[nops + i]));
13285 /* Don't reorder volatile memory references; it doesn't seem worth
13286 looking for the case where the order is ok anyway. */
13287 if (MEM_VOLATILE_P (operands[nops + i]))
13288 return 0;
13290 offset = const0_rtx;
13292 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13293 || (GET_CODE (reg) == SUBREG
13294 && REG_P (reg = SUBREG_REG (reg))))
13295 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13296 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13297 || (GET_CODE (reg) == SUBREG
13298 && REG_P (reg = SUBREG_REG (reg))))
13299 && (CONST_INT_P (offset
13300 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13302 unsorted_reg_rtxs[i] = (REG_P (operands[i])
13303 ? operands[i] : SUBREG_REG (operands[i]));
13304 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13306 if (i == 0)
13308 base_reg = REGNO (reg);
13309 base_reg_rtx = reg;
13310 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13311 return 0;
13313 else if (base_reg != (int) REGNO (reg))
13314 /* Not addressed from the same base register. */
13315 return 0;
13317 /* If it isn't an integer register, then we can't do this. */
13318 if (unsorted_regs[i] < 0
13319 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13320 /* The effects are unpredictable if the base register is
13321 both updated and stored. */
13322 || (base_writeback && unsorted_regs[i] == base_reg)
13323 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13324 || unsorted_regs[i] > 14)
13325 return 0;
13327 unsorted_offsets[i] = INTVAL (offset);
13328 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13329 order[0] = i;
13331 else
13332 /* Not a suitable memory address. */
13333 return 0;
13336 /* All the useful information has now been extracted from the
13337 operands into unsorted_regs and unsorted_offsets; additionally,
13338 order[0] has been set to the lowest offset in the list. Sort
13339 the offsets into order, verifying that they are adjacent, and
13340 check that the register numbers are ascending. */
13341 if (!compute_offset_order (nops, unsorted_offsets, order,
13342 check_regs ? unsorted_regs : NULL))
13343 return 0;
13345 if (saved_order)
13346 memcpy (saved_order, order, sizeof order);
13348 if (base)
13350 *base = base_reg;
13352 for (i = 0; i < nops; i++)
13354 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13355 if (reg_rtxs)
13356 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13359 *load_offset = unsorted_offsets[order[0]];
13362 if (TARGET_THUMB1
13363 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13364 return 0;
13366 if (unsorted_offsets[order[0]] == 0)
13367 stm_case = 1; /* stmia */
13368 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13369 stm_case = 2; /* stmib */
13370 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13371 stm_case = 3; /* stmda */
13372 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13373 stm_case = 4; /* stmdb */
13374 else
13375 return 0;
13377 if (!multiple_operation_profitable_p (false, nops, 0))
13378 return 0;
13380 return stm_case;
13383 /* Routines for use in generating RTL. */
13385 /* Generate a load-multiple instruction. COUNT is the number of loads in
13386 the instruction; REGS and MEMS are arrays containing the operands.
13387 BASEREG is the base register to be used in addressing the memory operands.
13388 WBACK_OFFSET is nonzero if the instruction should update the base
13389 register. */
13391 static rtx
13392 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13393 HOST_WIDE_INT wback_offset)
13395 int i = 0, j;
13396 rtx result;
13398 if (!multiple_operation_profitable_p (false, count, 0))
13400 rtx seq;
13402 start_sequence ();
13404 for (i = 0; i < count; i++)
13405 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13407 if (wback_offset != 0)
13408 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13410 seq = get_insns ();
13411 end_sequence ();
13413 return seq;
13416 result = gen_rtx_PARALLEL (VOIDmode,
13417 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13418 if (wback_offset != 0)
13420 XVECEXP (result, 0, 0)
13421 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13422 i = 1;
13423 count++;
13426 for (j = 0; i < count; i++, j++)
13427 XVECEXP (result, 0, i)
13428 = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
13430 return result;
13433 /* Generate a store-multiple instruction. COUNT is the number of stores in
13434 the instruction; REGS and MEMS are arrays containing the operands.
13435 BASEREG is the base register to be used in addressing the memory operands.
13436 WBACK_OFFSET is nonzero if the instruction should update the base
13437 register. */
13439 static rtx
13440 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13441 HOST_WIDE_INT wback_offset)
13443 int i = 0, j;
13444 rtx result;
13446 if (GET_CODE (basereg) == PLUS)
13447 basereg = XEXP (basereg, 0);
13449 if (!multiple_operation_profitable_p (false, count, 0))
13451 rtx seq;
13453 start_sequence ();
13455 for (i = 0; i < count; i++)
13456 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13458 if (wback_offset != 0)
13459 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13461 seq = get_insns ();
13462 end_sequence ();
13464 return seq;
13467 result = gen_rtx_PARALLEL (VOIDmode,
13468 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13469 if (wback_offset != 0)
13471 XVECEXP (result, 0, 0)
13472 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13473 i = 1;
13474 count++;
13477 for (j = 0; i < count; i++, j++)
13478 XVECEXP (result, 0, i)
13479 = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
13481 return result;
13484 /* Generate either a load-multiple or a store-multiple instruction. This
13485 function can be used in situations where we can start with a single MEM
13486 rtx and adjust its address upwards.
13487 COUNT is the number of operations in the instruction, not counting a
13488 possible update of the base register. REGS is an array containing the
13489 register operands.
13490 BASEREG is the base register to be used in addressing the memory operands,
13491 which are constructed from BASEMEM.
13492 WRITE_BACK specifies whether the generated instruction should include an
13493 update of the base register.
13494 OFFSETP is used to pass an offset to and from this function; this offset
13495 is not used when constructing the address (instead BASEMEM should have an
13496 appropriate offset in its address), it is used only for setting
13497 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13499 static rtx
13500 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
13501 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
13503 rtx mems[MAX_LDM_STM_OPS];
13504 HOST_WIDE_INT offset = *offsetp;
13505 int i;
13507 gcc_assert (count <= MAX_LDM_STM_OPS);
13509 if (GET_CODE (basereg) == PLUS)
13510 basereg = XEXP (basereg, 0);
13512 for (i = 0; i < count; i++)
13514 rtx addr = plus_constant (Pmode, basereg, i * 4);
13515 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
13516 offset += 4;
13519 if (write_back)
13520 *offsetp = offset;
13522 if (is_load)
13523 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
13524 write_back ? 4 * count : 0);
13525 else
13526 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
13527 write_back ? 4 * count : 0);
13531 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
13532 rtx basemem, HOST_WIDE_INT *offsetp)
13534 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
13535 offsetp);
13539 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
13540 rtx basemem, HOST_WIDE_INT *offsetp)
13542 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
13543 offsetp);
13546 /* Called from a peephole2 expander to turn a sequence of loads into an
13547 LDM instruction. OPERANDS are the operands found by the peephole matcher;
13548 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
13549 is true if we can reorder the registers because they are used commutatively
13550 subsequently.
13551 Returns true iff we could generate a new instruction. */
13553 bool
13554 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
13556 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13557 rtx mems[MAX_LDM_STM_OPS];
13558 int i, j, base_reg;
13559 rtx base_reg_rtx;
13560 HOST_WIDE_INT offset;
13561 int write_back = FALSE;
13562 int ldm_case;
13563 rtx addr;
13565 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
13566 &base_reg, &offset, !sort_regs);
13568 if (ldm_case == 0)
13569 return false;
13571 if (sort_regs)
13572 for (i = 0; i < nops - 1; i++)
13573 for (j = i + 1; j < nops; j++)
13574 if (regs[i] > regs[j])
13576 int t = regs[i];
13577 regs[i] = regs[j];
13578 regs[j] = t;
13580 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13582 if (TARGET_THUMB1)
13584 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
13585 gcc_assert (ldm_case == 1 || ldm_case == 5);
13586 write_back = TRUE;
13589 if (ldm_case == 5)
13591 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
13592 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
13593 offset = 0;
13594 if (!TARGET_THUMB1)
13596 base_reg = regs[0];
13597 base_reg_rtx = newbase;
13601 for (i = 0; i < nops; i++)
13603 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13604 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13605 SImode, addr, 0);
13607 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
13608 write_back ? offset + i * 4 : 0));
13609 return true;
13612 /* Called from a peephole2 expander to turn a sequence of stores into an
13613 STM instruction. OPERANDS are the operands found by the peephole matcher;
13614 NOPS indicates how many separate stores we are trying to combine.
13615 Returns true iff we could generate a new instruction. */
13617 bool
13618 gen_stm_seq (rtx *operands, int nops)
13620 int i;
13621 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13622 rtx mems[MAX_LDM_STM_OPS];
13623 int base_reg;
13624 rtx base_reg_rtx;
13625 HOST_WIDE_INT offset;
13626 int write_back = FALSE;
13627 int stm_case;
13628 rtx addr;
13629 bool base_reg_dies;
13631 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
13632 mem_order, &base_reg, &offset, true);
13634 if (stm_case == 0)
13635 return false;
13637 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13639 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
13640 if (TARGET_THUMB1)
13642 gcc_assert (base_reg_dies);
13643 write_back = TRUE;
13646 if (stm_case == 5)
13648 gcc_assert (base_reg_dies);
13649 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13650 offset = 0;
13653 addr = plus_constant (Pmode, base_reg_rtx, offset);
13655 for (i = 0; i < nops; i++)
13657 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13658 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13659 SImode, addr, 0);
13661 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
13662 write_back ? offset + i * 4 : 0));
13663 return true;
13666 /* Called from a peephole2 expander to turn a sequence of stores that are
13667 preceded by constant loads into an STM instruction. OPERANDS are the
13668 operands found by the peephole matcher; NOPS indicates how many
13669 separate stores we are trying to combine; there are 2 * NOPS
13670 instructions in the peephole.
13671 Returns true iff we could generate a new instruction. */
13673 bool
13674 gen_const_stm_seq (rtx *operands, int nops)
13676 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
13677 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13678 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
13679 rtx mems[MAX_LDM_STM_OPS];
13680 int base_reg;
13681 rtx base_reg_rtx;
13682 HOST_WIDE_INT offset;
13683 int write_back = FALSE;
13684 int stm_case;
13685 rtx addr;
13686 bool base_reg_dies;
13687 int i, j;
13688 HARD_REG_SET allocated;
13690 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
13691 mem_order, &base_reg, &offset, false);
13693 if (stm_case == 0)
13694 return false;
13696 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
13698 /* If the same register is used more than once, try to find a free
13699 register. */
13700 CLEAR_HARD_REG_SET (allocated);
13701 for (i = 0; i < nops; i++)
13703 for (j = i + 1; j < nops; j++)
13704 if (regs[i] == regs[j])
13706 rtx t = peep2_find_free_register (0, nops * 2,
13707 TARGET_THUMB1 ? "l" : "r",
13708 SImode, &allocated);
13709 if (t == NULL_RTX)
13710 return false;
13711 reg_rtxs[i] = t;
13712 regs[i] = REGNO (t);
13716 /* Compute an ordering that maps the register numbers to an ascending
13717 sequence. */
13718 reg_order[0] = 0;
13719 for (i = 0; i < nops; i++)
13720 if (regs[i] < regs[reg_order[0]])
13721 reg_order[0] = i;
13723 for (i = 1; i < nops; i++)
13725 int this_order = reg_order[i - 1];
13726 for (j = 0; j < nops; j++)
13727 if (regs[j] > regs[reg_order[i - 1]]
13728 && (this_order == reg_order[i - 1]
13729 || regs[j] < regs[this_order]))
13730 this_order = j;
13731 reg_order[i] = this_order;
13734 /* Ensure that registers that must be live after the instruction end
13735 up with the correct value. */
13736 for (i = 0; i < nops; i++)
13738 int this_order = reg_order[i];
13739 if ((this_order != mem_order[i]
13740 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
13741 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
13742 return false;
13745 /* Load the constants. */
13746 for (i = 0; i < nops; i++)
13748 rtx op = operands[2 * nops + mem_order[i]];
13749 sorted_regs[i] = regs[reg_order[i]];
13750 emit_move_insn (reg_rtxs[reg_order[i]], op);
13753 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13755 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
13756 if (TARGET_THUMB1)
13758 gcc_assert (base_reg_dies);
13759 write_back = TRUE;
13762 if (stm_case == 5)
13764 gcc_assert (base_reg_dies);
13765 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13766 offset = 0;
13769 addr = plus_constant (Pmode, base_reg_rtx, offset);
13771 for (i = 0; i < nops; i++)
13773 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13774 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13775 SImode, addr, 0);
13777 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
13778 write_back ? offset + i * 4 : 0));
13779 return true;
13782 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
13783 unaligned copies on processors which support unaligned semantics for those
13784 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
13785 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
13786 An interleave factor of 1 (the minimum) will perform no interleaving.
13787 Load/store multiple are used for aligned addresses where possible. */
13789 static void
13790 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
13791 HOST_WIDE_INT length,
13792 unsigned int interleave_factor)
13794 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
13795 int *regnos = XALLOCAVEC (int, interleave_factor);
13796 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
13797 HOST_WIDE_INT i, j;
13798 HOST_WIDE_INT remaining = length, words;
13799 rtx halfword_tmp = NULL, byte_tmp = NULL;
13800 rtx dst, src;
13801 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
13802 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
13803 HOST_WIDE_INT srcoffset, dstoffset;
13804 HOST_WIDE_INT src_autoinc, dst_autoinc;
13805 rtx mem, addr;
13807 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
13809 /* Use hard registers if we have aligned source or destination so we can use
13810 load/store multiple with contiguous registers. */
13811 if (dst_aligned || src_aligned)
13812 for (i = 0; i < interleave_factor; i++)
13813 regs[i] = gen_rtx_REG (SImode, i);
13814 else
13815 for (i = 0; i < interleave_factor; i++)
13816 regs[i] = gen_reg_rtx (SImode);
13818 dst = copy_addr_to_reg (XEXP (dstbase, 0));
13819 src = copy_addr_to_reg (XEXP (srcbase, 0));
13821 srcoffset = dstoffset = 0;
13823 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
13824 For copying the last bytes we want to subtract this offset again. */
13825 src_autoinc = dst_autoinc = 0;
13827 for (i = 0; i < interleave_factor; i++)
13828 regnos[i] = i;
13830 /* Copy BLOCK_SIZE_BYTES chunks. */
13832 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
13834 /* Load words. */
13835 if (src_aligned && interleave_factor > 1)
13837 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
13838 TRUE, srcbase, &srcoffset));
13839 src_autoinc += UNITS_PER_WORD * interleave_factor;
13841 else
13843 for (j = 0; j < interleave_factor; j++)
13845 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
13846 - src_autoinc));
13847 mem = adjust_automodify_address (srcbase, SImode, addr,
13848 srcoffset + j * UNITS_PER_WORD);
13849 emit_insn (gen_unaligned_loadsi (regs[j], mem));
13851 srcoffset += block_size_bytes;
13854 /* Store words. */
13855 if (dst_aligned && interleave_factor > 1)
13857 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
13858 TRUE, dstbase, &dstoffset));
13859 dst_autoinc += UNITS_PER_WORD * interleave_factor;
13861 else
13863 for (j = 0; j < interleave_factor; j++)
13865 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
13866 - dst_autoinc));
13867 mem = adjust_automodify_address (dstbase, SImode, addr,
13868 dstoffset + j * UNITS_PER_WORD);
13869 emit_insn (gen_unaligned_storesi (mem, regs[j]));
13871 dstoffset += block_size_bytes;
13874 remaining -= block_size_bytes;
13877 /* Copy any whole words left (note these aren't interleaved with any
13878 subsequent halfword/byte load/stores in the interests of simplicity). */
13880 words = remaining / UNITS_PER_WORD;
13882 gcc_assert (words < interleave_factor);
13884 if (src_aligned && words > 1)
13886 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
13887 &srcoffset));
13888 src_autoinc += UNITS_PER_WORD * words;
13890 else
13892 for (j = 0; j < words; j++)
13894 addr = plus_constant (Pmode, src,
13895 srcoffset + j * UNITS_PER_WORD - src_autoinc);
13896 mem = adjust_automodify_address (srcbase, SImode, addr,
13897 srcoffset + j * UNITS_PER_WORD);
13898 if (src_aligned)
13899 emit_move_insn (regs[j], mem);
13900 else
13901 emit_insn (gen_unaligned_loadsi (regs[j], mem));
13903 srcoffset += words * UNITS_PER_WORD;
13906 if (dst_aligned && words > 1)
13908 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
13909 &dstoffset));
13910 dst_autoinc += words * UNITS_PER_WORD;
13912 else
13914 for (j = 0; j < words; j++)
13916 addr = plus_constant (Pmode, dst,
13917 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
13918 mem = adjust_automodify_address (dstbase, SImode, addr,
13919 dstoffset + j * UNITS_PER_WORD);
13920 if (dst_aligned)
13921 emit_move_insn (mem, regs[j]);
13922 else
13923 emit_insn (gen_unaligned_storesi (mem, regs[j]));
13925 dstoffset += words * UNITS_PER_WORD;
13928 remaining -= words * UNITS_PER_WORD;
13930 gcc_assert (remaining < 4);
13932 /* Copy a halfword if necessary. */
13934 if (remaining >= 2)
13936 halfword_tmp = gen_reg_rtx (SImode);
13938 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
13939 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
13940 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
13942 /* Either write out immediately, or delay until we've loaded the last
13943 byte, depending on interleave factor. */
13944 if (interleave_factor == 1)
13946 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
13947 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
13948 emit_insn (gen_unaligned_storehi (mem,
13949 gen_lowpart (HImode, halfword_tmp)));
13950 halfword_tmp = NULL;
13951 dstoffset += 2;
13954 remaining -= 2;
13955 srcoffset += 2;
13958 gcc_assert (remaining < 2);
13960 /* Copy last byte. */
13962 if ((remaining & 1) != 0)
13964 byte_tmp = gen_reg_rtx (SImode);
13966 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
13967 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
13968 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
13970 if (interleave_factor == 1)
13972 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
13973 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
13974 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
13975 byte_tmp = NULL;
13976 dstoffset++;
13979 remaining--;
13980 srcoffset++;
13983 /* Store last halfword if we haven't done so already. */
13985 if (halfword_tmp)
13987 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
13988 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
13989 emit_insn (gen_unaligned_storehi (mem,
13990 gen_lowpart (HImode, halfword_tmp)));
13991 dstoffset += 2;
13994 /* Likewise for last byte. */
13996 if (byte_tmp)
13998 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
13999 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14000 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14001 dstoffset++;
14004 gcc_assert (remaining == 0 && srcoffset == dstoffset);
14007 /* From mips_adjust_block_mem:
14009 Helper function for doing a loop-based block operation on memory
14010 reference MEM. Each iteration of the loop will operate on LENGTH
14011 bytes of MEM.
14013 Create a new base register for use within the loop and point it to
14014 the start of MEM. Create a new memory reference that uses this
14015 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14017 static void
14018 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14019 rtx *loop_mem)
14021 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14023 /* Although the new mem does not refer to a known location,
14024 it does keep up to LENGTH bytes of alignment. */
14025 *loop_mem = change_address (mem, BLKmode, *loop_reg);
14026 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14029 /* From mips_block_move_loop:
14031 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14032 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14033 the memory regions do not overlap. */
14035 static void
14036 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14037 unsigned int interleave_factor,
14038 HOST_WIDE_INT bytes_per_iter)
14040 rtx src_reg, dest_reg, final_src, test;
14041 HOST_WIDE_INT leftover;
14043 leftover = length % bytes_per_iter;
14044 length -= leftover;
14046 /* Create registers and memory references for use within the loop. */
14047 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14048 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14050 /* Calculate the value that SRC_REG should have after the last iteration of
14051 the loop. */
14052 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14053 0, 0, OPTAB_WIDEN);
14055 /* Emit the start of the loop. */
14056 rtx_code_label *label = gen_label_rtx ();
14057 emit_label (label);
14059 /* Emit the loop body. */
14060 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14061 interleave_factor);
14063 /* Move on to the next block. */
14064 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14065 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14067 /* Emit the loop condition. */
14068 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14069 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14071 /* Mop up any left-over bytes. */
14072 if (leftover)
14073 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14076 /* Emit a block move when either the source or destination is unaligned (not
14077 aligned to a four-byte boundary). This may need further tuning depending on
14078 core type, optimize_size setting, etc. */
14080 static int
14081 arm_movmemqi_unaligned (rtx *operands)
14083 HOST_WIDE_INT length = INTVAL (operands[2]);
14085 if (optimize_size)
14087 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14088 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14089 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14090 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14091 or dst_aligned though: allow more interleaving in those cases since the
14092 resulting code can be smaller. */
14093 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14094 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14096 if (length > 12)
14097 arm_block_move_unaligned_loop (operands[0], operands[1], length,
14098 interleave_factor, bytes_per_iter);
14099 else
14100 arm_block_move_unaligned_straight (operands[0], operands[1], length,
14101 interleave_factor);
14103 else
14105 /* Note that the loop created by arm_block_move_unaligned_loop may be
14106 subject to loop unrolling, which makes tuning this condition a little
14107 redundant. */
14108 if (length > 32)
14109 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14110 else
14111 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14114 return 1;
14118 arm_gen_movmemqi (rtx *operands)
14120 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14121 HOST_WIDE_INT srcoffset, dstoffset;
14122 int i;
14123 rtx src, dst, srcbase, dstbase;
14124 rtx part_bytes_reg = NULL;
14125 rtx mem;
14127 if (!CONST_INT_P (operands[2])
14128 || !CONST_INT_P (operands[3])
14129 || INTVAL (operands[2]) > 64)
14130 return 0;
14132 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14133 return arm_movmemqi_unaligned (operands);
14135 if (INTVAL (operands[3]) & 3)
14136 return 0;
14138 dstbase = operands[0];
14139 srcbase = operands[1];
14141 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14142 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14144 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14145 out_words_to_go = INTVAL (operands[2]) / 4;
14146 last_bytes = INTVAL (operands[2]) & 3;
14147 dstoffset = srcoffset = 0;
14149 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14150 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14152 for (i = 0; in_words_to_go >= 2; i+=4)
14154 if (in_words_to_go > 4)
14155 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14156 TRUE, srcbase, &srcoffset));
14157 else
14158 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14159 src, FALSE, srcbase,
14160 &srcoffset));
14162 if (out_words_to_go)
14164 if (out_words_to_go > 4)
14165 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14166 TRUE, dstbase, &dstoffset));
14167 else if (out_words_to_go != 1)
14168 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14169 out_words_to_go, dst,
14170 (last_bytes == 0
14171 ? FALSE : TRUE),
14172 dstbase, &dstoffset));
14173 else
14175 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14176 emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
14177 if (last_bytes != 0)
14179 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14180 dstoffset += 4;
14185 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14186 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14189 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14190 if (out_words_to_go)
14192 rtx sreg;
14194 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14195 sreg = copy_to_reg (mem);
14197 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14198 emit_move_insn (mem, sreg);
14199 in_words_to_go--;
14201 gcc_assert (!in_words_to_go); /* Sanity check */
14204 if (in_words_to_go)
14206 gcc_assert (in_words_to_go > 0);
14208 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14209 part_bytes_reg = copy_to_mode_reg (SImode, mem);
14212 gcc_assert (!last_bytes || part_bytes_reg);
14214 if (BYTES_BIG_ENDIAN && last_bytes)
14216 rtx tmp = gen_reg_rtx (SImode);
14218 /* The bytes we want are in the top end of the word. */
14219 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14220 GEN_INT (8 * (4 - last_bytes))));
14221 part_bytes_reg = tmp;
14223 while (last_bytes)
14225 mem = adjust_automodify_address (dstbase, QImode,
14226 plus_constant (Pmode, dst,
14227 last_bytes - 1),
14228 dstoffset + last_bytes - 1);
14229 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14231 if (--last_bytes)
14233 tmp = gen_reg_rtx (SImode);
14234 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14235 part_bytes_reg = tmp;
14240 else
14242 if (last_bytes > 1)
14244 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14245 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14246 last_bytes -= 2;
14247 if (last_bytes)
14249 rtx tmp = gen_reg_rtx (SImode);
14250 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14251 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14252 part_bytes_reg = tmp;
14253 dstoffset += 2;
14257 if (last_bytes)
14259 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14260 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14264 return 1;
14267 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14268 by mode size. */
14269 inline static rtx
14270 next_consecutive_mem (rtx mem)
14272 machine_mode mode = GET_MODE (mem);
14273 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14274 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14276 return adjust_automodify_address (mem, mode, addr, offset);
14279 /* Copy using LDRD/STRD instructions whenever possible.
14280 Returns true upon success. */
14281 bool
14282 gen_movmem_ldrd_strd (rtx *operands)
14284 unsigned HOST_WIDE_INT len;
14285 HOST_WIDE_INT align;
14286 rtx src, dst, base;
14287 rtx reg0;
14288 bool src_aligned, dst_aligned;
14289 bool src_volatile, dst_volatile;
14291 gcc_assert (CONST_INT_P (operands[2]));
14292 gcc_assert (CONST_INT_P (operands[3]));
14294 len = UINTVAL (operands[2]);
14295 if (len > 64)
14296 return false;
14298 /* Maximum alignment we can assume for both src and dst buffers. */
14299 align = INTVAL (operands[3]);
14301 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14302 return false;
14304 /* Place src and dst addresses in registers
14305 and update the corresponding mem rtx. */
14306 dst = operands[0];
14307 dst_volatile = MEM_VOLATILE_P (dst);
14308 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14309 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14310 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14312 src = operands[1];
14313 src_volatile = MEM_VOLATILE_P (src);
14314 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14315 base = copy_to_mode_reg (SImode, XEXP (src, 0));
14316 src = adjust_automodify_address (src, VOIDmode, base, 0);
14318 if (!unaligned_access && !(src_aligned && dst_aligned))
14319 return false;
14321 if (src_volatile || dst_volatile)
14322 return false;
14324 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14325 if (!(dst_aligned || src_aligned))
14326 return arm_gen_movmemqi (operands);
14328 /* If the either src or dst is unaligned we'll be accessing it as pairs
14329 of unaligned SImode accesses. Otherwise we can generate DImode
14330 ldrd/strd instructions. */
14331 src = adjust_address (src, src_aligned ? DImode : SImode, 0);
14332 dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
14334 while (len >= 8)
14336 len -= 8;
14337 reg0 = gen_reg_rtx (DImode);
14338 rtx low_reg = NULL_RTX;
14339 rtx hi_reg = NULL_RTX;
14341 if (!src_aligned || !dst_aligned)
14343 low_reg = gen_lowpart (SImode, reg0);
14344 hi_reg = gen_highpart_mode (SImode, DImode, reg0);
14346 if (src_aligned)
14347 emit_move_insn (reg0, src);
14348 else
14350 emit_insn (gen_unaligned_loadsi (low_reg, src));
14351 src = next_consecutive_mem (src);
14352 emit_insn (gen_unaligned_loadsi (hi_reg, src));
14355 if (dst_aligned)
14356 emit_move_insn (dst, reg0);
14357 else
14359 emit_insn (gen_unaligned_storesi (dst, low_reg));
14360 dst = next_consecutive_mem (dst);
14361 emit_insn (gen_unaligned_storesi (dst, hi_reg));
14364 src = next_consecutive_mem (src);
14365 dst = next_consecutive_mem (dst);
14368 gcc_assert (len < 8);
14369 if (len >= 4)
14371 /* More than a word but less than a double-word to copy. Copy a word. */
14372 reg0 = gen_reg_rtx (SImode);
14373 src = adjust_address (src, SImode, 0);
14374 dst = adjust_address (dst, SImode, 0);
14375 if (src_aligned)
14376 emit_move_insn (reg0, src);
14377 else
14378 emit_insn (gen_unaligned_loadsi (reg0, src));
14380 if (dst_aligned)
14381 emit_move_insn (dst, reg0);
14382 else
14383 emit_insn (gen_unaligned_storesi (dst, reg0));
14385 src = next_consecutive_mem (src);
14386 dst = next_consecutive_mem (dst);
14387 len -= 4;
14390 if (len == 0)
14391 return true;
14393 /* Copy the remaining bytes. */
14394 if (len >= 2)
14396 dst = adjust_address (dst, HImode, 0);
14397 src = adjust_address (src, HImode, 0);
14398 reg0 = gen_reg_rtx (SImode);
14399 if (src_aligned)
14400 emit_insn (gen_zero_extendhisi2 (reg0, src));
14401 else
14402 emit_insn (gen_unaligned_loadhiu (reg0, src));
14404 if (dst_aligned)
14405 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14406 else
14407 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14409 src = next_consecutive_mem (src);
14410 dst = next_consecutive_mem (dst);
14411 if (len == 2)
14412 return true;
14415 dst = adjust_address (dst, QImode, 0);
14416 src = adjust_address (src, QImode, 0);
14417 reg0 = gen_reg_rtx (QImode);
14418 emit_move_insn (reg0, src);
14419 emit_move_insn (dst, reg0);
14420 return true;
14423 /* Select a dominance comparison mode if possible for a test of the general
14424 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14425 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14426 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14427 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14428 In all cases OP will be either EQ or NE, but we don't need to know which
14429 here. If we are unable to support a dominance comparison we return
14430 CC mode. This will then fail to match for the RTL expressions that
14431 generate this call. */
14432 machine_mode
14433 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14435 enum rtx_code cond1, cond2;
14436 int swapped = 0;
14438 /* Currently we will probably get the wrong result if the individual
14439 comparisons are not simple. This also ensures that it is safe to
14440 reverse a comparison if necessary. */
14441 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14442 != CCmode)
14443 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14444 != CCmode))
14445 return CCmode;
14447 /* The if_then_else variant of this tests the second condition if the
14448 first passes, but is true if the first fails. Reverse the first
14449 condition to get a true "inclusive-or" expression. */
14450 if (cond_or == DOM_CC_NX_OR_Y)
14451 cond1 = reverse_condition (cond1);
14453 /* If the comparisons are not equal, and one doesn't dominate the other,
14454 then we can't do this. */
14455 if (cond1 != cond2
14456 && !comparison_dominates_p (cond1, cond2)
14457 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14458 return CCmode;
14460 if (swapped)
14461 std::swap (cond1, cond2);
14463 switch (cond1)
14465 case EQ:
14466 if (cond_or == DOM_CC_X_AND_Y)
14467 return CC_DEQmode;
14469 switch (cond2)
14471 case EQ: return CC_DEQmode;
14472 case LE: return CC_DLEmode;
14473 case LEU: return CC_DLEUmode;
14474 case GE: return CC_DGEmode;
14475 case GEU: return CC_DGEUmode;
14476 default: gcc_unreachable ();
14479 case LT:
14480 if (cond_or == DOM_CC_X_AND_Y)
14481 return CC_DLTmode;
14483 switch (cond2)
14485 case LT:
14486 return CC_DLTmode;
14487 case LE:
14488 return CC_DLEmode;
14489 case NE:
14490 return CC_DNEmode;
14491 default:
14492 gcc_unreachable ();
14495 case GT:
14496 if (cond_or == DOM_CC_X_AND_Y)
14497 return CC_DGTmode;
14499 switch (cond2)
14501 case GT:
14502 return CC_DGTmode;
14503 case GE:
14504 return CC_DGEmode;
14505 case NE:
14506 return CC_DNEmode;
14507 default:
14508 gcc_unreachable ();
14511 case LTU:
14512 if (cond_or == DOM_CC_X_AND_Y)
14513 return CC_DLTUmode;
14515 switch (cond2)
14517 case LTU:
14518 return CC_DLTUmode;
14519 case LEU:
14520 return CC_DLEUmode;
14521 case NE:
14522 return CC_DNEmode;
14523 default:
14524 gcc_unreachable ();
14527 case GTU:
14528 if (cond_or == DOM_CC_X_AND_Y)
14529 return CC_DGTUmode;
14531 switch (cond2)
14533 case GTU:
14534 return CC_DGTUmode;
14535 case GEU:
14536 return CC_DGEUmode;
14537 case NE:
14538 return CC_DNEmode;
14539 default:
14540 gcc_unreachable ();
14543 /* The remaining cases only occur when both comparisons are the
14544 same. */
14545 case NE:
14546 gcc_assert (cond1 == cond2);
14547 return CC_DNEmode;
14549 case LE:
14550 gcc_assert (cond1 == cond2);
14551 return CC_DLEmode;
14553 case GE:
14554 gcc_assert (cond1 == cond2);
14555 return CC_DGEmode;
14557 case LEU:
14558 gcc_assert (cond1 == cond2);
14559 return CC_DLEUmode;
14561 case GEU:
14562 gcc_assert (cond1 == cond2);
14563 return CC_DGEUmode;
14565 default:
14566 gcc_unreachable ();
14570 machine_mode
14571 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
14573 /* All floating point compares return CCFP if it is an equality
14574 comparison, and CCFPE otherwise. */
14575 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14577 switch (op)
14579 case EQ:
14580 case NE:
14581 case UNORDERED:
14582 case ORDERED:
14583 case UNLT:
14584 case UNLE:
14585 case UNGT:
14586 case UNGE:
14587 case UNEQ:
14588 case LTGT:
14589 return CCFPmode;
14591 case LT:
14592 case LE:
14593 case GT:
14594 case GE:
14595 return CCFPEmode;
14597 default:
14598 gcc_unreachable ();
14602 /* A compare with a shifted operand. Because of canonicalization, the
14603 comparison will have to be swapped when we emit the assembler. */
14604 if (GET_MODE (y) == SImode
14605 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14606 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14607 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
14608 || GET_CODE (x) == ROTATERT))
14609 return CC_SWPmode;
14611 /* This operation is performed swapped, but since we only rely on the Z
14612 flag we don't need an additional mode. */
14613 if (GET_MODE (y) == SImode
14614 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14615 && GET_CODE (x) == NEG
14616 && (op == EQ || op == NE))
14617 return CC_Zmode;
14619 /* This is a special case that is used by combine to allow a
14620 comparison of a shifted byte load to be split into a zero-extend
14621 followed by a comparison of the shifted integer (only valid for
14622 equalities and unsigned inequalities). */
14623 if (GET_MODE (x) == SImode
14624 && GET_CODE (x) == ASHIFT
14625 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
14626 && GET_CODE (XEXP (x, 0)) == SUBREG
14627 && MEM_P (SUBREG_REG (XEXP (x, 0)))
14628 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
14629 && (op == EQ || op == NE
14630 || op == GEU || op == GTU || op == LTU || op == LEU)
14631 && CONST_INT_P (y))
14632 return CC_Zmode;
14634 /* A construct for a conditional compare, if the false arm contains
14635 0, then both conditions must be true, otherwise either condition
14636 must be true. Not all conditions are possible, so CCmode is
14637 returned if it can't be done. */
14638 if (GET_CODE (x) == IF_THEN_ELSE
14639 && (XEXP (x, 2) == const0_rtx
14640 || XEXP (x, 2) == const1_rtx)
14641 && COMPARISON_P (XEXP (x, 0))
14642 && COMPARISON_P (XEXP (x, 1)))
14643 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14644 INTVAL (XEXP (x, 2)));
14646 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
14647 if (GET_CODE (x) == AND
14648 && (op == EQ || op == NE)
14649 && COMPARISON_P (XEXP (x, 0))
14650 && COMPARISON_P (XEXP (x, 1)))
14651 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14652 DOM_CC_X_AND_Y);
14654 if (GET_CODE (x) == IOR
14655 && (op == EQ || op == NE)
14656 && COMPARISON_P (XEXP (x, 0))
14657 && COMPARISON_P (XEXP (x, 1)))
14658 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14659 DOM_CC_X_OR_Y);
14661 /* An operation (on Thumb) where we want to test for a single bit.
14662 This is done by shifting that bit up into the top bit of a
14663 scratch register; we can then branch on the sign bit. */
14664 if (TARGET_THUMB1
14665 && GET_MODE (x) == SImode
14666 && (op == EQ || op == NE)
14667 && GET_CODE (x) == ZERO_EXTRACT
14668 && XEXP (x, 1) == const1_rtx)
14669 return CC_Nmode;
14671 /* An operation that sets the condition codes as a side-effect, the
14672 V flag is not set correctly, so we can only use comparisons where
14673 this doesn't matter. (For LT and GE we can use "mi" and "pl"
14674 instead.) */
14675 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
14676 if (GET_MODE (x) == SImode
14677 && y == const0_rtx
14678 && (op == EQ || op == NE || op == LT || op == GE)
14679 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
14680 || GET_CODE (x) == AND || GET_CODE (x) == IOR
14681 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
14682 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
14683 || GET_CODE (x) == LSHIFTRT
14684 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14685 || GET_CODE (x) == ROTATERT
14686 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
14687 return CC_NOOVmode;
14689 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
14690 return CC_Zmode;
14692 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
14693 && GET_CODE (x) == PLUS
14694 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
14695 return CC_Cmode;
14697 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
14699 switch (op)
14701 case EQ:
14702 case NE:
14703 /* A DImode comparison against zero can be implemented by
14704 or'ing the two halves together. */
14705 if (y == const0_rtx)
14706 return CC_Zmode;
14708 /* We can do an equality test in three Thumb instructions. */
14709 if (!TARGET_32BIT)
14710 return CC_Zmode;
14712 /* FALLTHROUGH */
14714 case LTU:
14715 case LEU:
14716 case GTU:
14717 case GEU:
14718 /* DImode unsigned comparisons can be implemented by cmp +
14719 cmpeq without a scratch register. Not worth doing in
14720 Thumb-2. */
14721 if (TARGET_32BIT)
14722 return CC_CZmode;
14724 /* FALLTHROUGH */
14726 case LT:
14727 case LE:
14728 case GT:
14729 case GE:
14730 /* DImode signed and unsigned comparisons can be implemented
14731 by cmp + sbcs with a scratch register, but that does not
14732 set the Z flag - we must reverse GT/LE/GTU/LEU. */
14733 gcc_assert (op != EQ && op != NE);
14734 return CC_NCVmode;
14736 default:
14737 gcc_unreachable ();
14741 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
14742 return GET_MODE (x);
14744 return CCmode;
14747 /* X and Y are two things to compare using CODE. Emit the compare insn and
14748 return the rtx for register 0 in the proper mode. FP means this is a
14749 floating point compare: I don't think that it is needed on the arm. */
14751 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
14753 machine_mode mode;
14754 rtx cc_reg;
14755 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
14757 /* We might have X as a constant, Y as a register because of the predicates
14758 used for cmpdi. If so, force X to a register here. */
14759 if (dimode_comparison && !REG_P (x))
14760 x = force_reg (DImode, x);
14762 mode = SELECT_CC_MODE (code, x, y);
14763 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
14765 if (dimode_comparison
14766 && mode != CC_CZmode)
14768 rtx clobber, set;
14770 /* To compare two non-zero values for equality, XOR them and
14771 then compare against zero. Not used for ARM mode; there
14772 CC_CZmode is cheaper. */
14773 if (mode == CC_Zmode && y != const0_rtx)
14775 gcc_assert (!reload_completed);
14776 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
14777 y = const0_rtx;
14780 /* A scratch register is required. */
14781 if (reload_completed)
14782 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
14783 else
14784 scratch = gen_rtx_SCRATCH (SImode);
14786 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
14787 set = gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y));
14788 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
14790 else
14791 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
14793 return cc_reg;
14796 /* Generate a sequence of insns that will generate the correct return
14797 address mask depending on the physical architecture that the program
14798 is running on. */
14800 arm_gen_return_addr_mask (void)
14802 rtx reg = gen_reg_rtx (Pmode);
14804 emit_insn (gen_return_addr_mask (reg));
14805 return reg;
14808 void
14809 arm_reload_in_hi (rtx *operands)
14811 rtx ref = operands[1];
14812 rtx base, scratch;
14813 HOST_WIDE_INT offset = 0;
14815 if (GET_CODE (ref) == SUBREG)
14817 offset = SUBREG_BYTE (ref);
14818 ref = SUBREG_REG (ref);
14821 if (REG_P (ref))
14823 /* We have a pseudo which has been spilt onto the stack; there
14824 are two cases here: the first where there is a simple
14825 stack-slot replacement and a second where the stack-slot is
14826 out of range, or is used as a subreg. */
14827 if (reg_equiv_mem (REGNO (ref)))
14829 ref = reg_equiv_mem (REGNO (ref));
14830 base = find_replacement (&XEXP (ref, 0));
14832 else
14833 /* The slot is out of range, or was dressed up in a SUBREG. */
14834 base = reg_equiv_address (REGNO (ref));
14836 /* PR 62554: If there is no equivalent memory location then just move
14837 the value as an SImode register move. This happens when the target
14838 architecture variant does not have an HImode register move. */
14839 if (base == NULL)
14841 gcc_assert (REG_P (operands[0]));
14842 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
14843 gen_rtx_SUBREG (SImode, ref, 0)));
14844 return;
14847 else
14848 base = find_replacement (&XEXP (ref, 0));
14850 /* Handle the case where the address is too complex to be offset by 1. */
14851 if (GET_CODE (base) == MINUS
14852 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
14854 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14856 emit_set_insn (base_plus, base);
14857 base = base_plus;
14859 else if (GET_CODE (base) == PLUS)
14861 /* The addend must be CONST_INT, or we would have dealt with it above. */
14862 HOST_WIDE_INT hi, lo;
14864 offset += INTVAL (XEXP (base, 1));
14865 base = XEXP (base, 0);
14867 /* Rework the address into a legal sequence of insns. */
14868 /* Valid range for lo is -4095 -> 4095 */
14869 lo = (offset >= 0
14870 ? (offset & 0xfff)
14871 : -((-offset) & 0xfff));
14873 /* Corner case, if lo is the max offset then we would be out of range
14874 once we have added the additional 1 below, so bump the msb into the
14875 pre-loading insn(s). */
14876 if (lo == 4095)
14877 lo &= 0x7ff;
14879 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
14880 ^ (HOST_WIDE_INT) 0x80000000)
14881 - (HOST_WIDE_INT) 0x80000000);
14883 gcc_assert (hi + lo == offset);
14885 if (hi != 0)
14887 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14889 /* Get the base address; addsi3 knows how to handle constants
14890 that require more than one insn. */
14891 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
14892 base = base_plus;
14893 offset = lo;
14897 /* Operands[2] may overlap operands[0] (though it won't overlap
14898 operands[1]), that's why we asked for a DImode reg -- so we can
14899 use the bit that does not overlap. */
14900 if (REGNO (operands[2]) == REGNO (operands[0]))
14901 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14902 else
14903 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
14905 emit_insn (gen_zero_extendqisi2 (scratch,
14906 gen_rtx_MEM (QImode,
14907 plus_constant (Pmode, base,
14908 offset))));
14909 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
14910 gen_rtx_MEM (QImode,
14911 plus_constant (Pmode, base,
14912 offset + 1))));
14913 if (!BYTES_BIG_ENDIAN)
14914 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
14915 gen_rtx_IOR (SImode,
14916 gen_rtx_ASHIFT
14917 (SImode,
14918 gen_rtx_SUBREG (SImode, operands[0], 0),
14919 GEN_INT (8)),
14920 scratch));
14921 else
14922 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
14923 gen_rtx_IOR (SImode,
14924 gen_rtx_ASHIFT (SImode, scratch,
14925 GEN_INT (8)),
14926 gen_rtx_SUBREG (SImode, operands[0], 0)));
14929 /* Handle storing a half-word to memory during reload by synthesizing as two
14930 byte stores. Take care not to clobber the input values until after we
14931 have moved them somewhere safe. This code assumes that if the DImode
14932 scratch in operands[2] overlaps either the input value or output address
14933 in some way, then that value must die in this insn (we absolutely need
14934 two scratch registers for some corner cases). */
14935 void
14936 arm_reload_out_hi (rtx *operands)
14938 rtx ref = operands[0];
14939 rtx outval = operands[1];
14940 rtx base, scratch;
14941 HOST_WIDE_INT offset = 0;
14943 if (GET_CODE (ref) == SUBREG)
14945 offset = SUBREG_BYTE (ref);
14946 ref = SUBREG_REG (ref);
14949 if (REG_P (ref))
14951 /* We have a pseudo which has been spilt onto the stack; there
14952 are two cases here: the first where there is a simple
14953 stack-slot replacement and a second where the stack-slot is
14954 out of range, or is used as a subreg. */
14955 if (reg_equiv_mem (REGNO (ref)))
14957 ref = reg_equiv_mem (REGNO (ref));
14958 base = find_replacement (&XEXP (ref, 0));
14960 else
14961 /* The slot is out of range, or was dressed up in a SUBREG. */
14962 base = reg_equiv_address (REGNO (ref));
14964 /* PR 62254: If there is no equivalent memory location then just move
14965 the value as an SImode register move. This happens when the target
14966 architecture variant does not have an HImode register move. */
14967 if (base == NULL)
14969 gcc_assert (REG_P (outval) || SUBREG_P (outval));
14971 if (REG_P (outval))
14973 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
14974 gen_rtx_SUBREG (SImode, outval, 0)));
14976 else /* SUBREG_P (outval) */
14978 if (GET_MODE (SUBREG_REG (outval)) == SImode)
14979 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
14980 SUBREG_REG (outval)));
14981 else
14982 /* FIXME: Handle other cases ? */
14983 gcc_unreachable ();
14985 return;
14988 else
14989 base = find_replacement (&XEXP (ref, 0));
14991 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
14993 /* Handle the case where the address is too complex to be offset by 1. */
14994 if (GET_CODE (base) == MINUS
14995 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
14997 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14999 /* Be careful not to destroy OUTVAL. */
15000 if (reg_overlap_mentioned_p (base_plus, outval))
15002 /* Updating base_plus might destroy outval, see if we can
15003 swap the scratch and base_plus. */
15004 if (!reg_overlap_mentioned_p (scratch, outval))
15005 std::swap (scratch, base_plus);
15006 else
15008 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15010 /* Be conservative and copy OUTVAL into the scratch now,
15011 this should only be necessary if outval is a subreg
15012 of something larger than a word. */
15013 /* XXX Might this clobber base? I can't see how it can,
15014 since scratch is known to overlap with OUTVAL, and
15015 must be wider than a word. */
15016 emit_insn (gen_movhi (scratch_hi, outval));
15017 outval = scratch_hi;
15021 emit_set_insn (base_plus, base);
15022 base = base_plus;
15024 else if (GET_CODE (base) == PLUS)
15026 /* The addend must be CONST_INT, or we would have dealt with it above. */
15027 HOST_WIDE_INT hi, lo;
15029 offset += INTVAL (XEXP (base, 1));
15030 base = XEXP (base, 0);
15032 /* Rework the address into a legal sequence of insns. */
15033 /* Valid range for lo is -4095 -> 4095 */
15034 lo = (offset >= 0
15035 ? (offset & 0xfff)
15036 : -((-offset) & 0xfff));
15038 /* Corner case, if lo is the max offset then we would be out of range
15039 once we have added the additional 1 below, so bump the msb into the
15040 pre-loading insn(s). */
15041 if (lo == 4095)
15042 lo &= 0x7ff;
15044 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15045 ^ (HOST_WIDE_INT) 0x80000000)
15046 - (HOST_WIDE_INT) 0x80000000);
15048 gcc_assert (hi + lo == offset);
15050 if (hi != 0)
15052 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15054 /* Be careful not to destroy OUTVAL. */
15055 if (reg_overlap_mentioned_p (base_plus, outval))
15057 /* Updating base_plus might destroy outval, see if we
15058 can swap the scratch and base_plus. */
15059 if (!reg_overlap_mentioned_p (scratch, outval))
15060 std::swap (scratch, base_plus);
15061 else
15063 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15065 /* Be conservative and copy outval into scratch now,
15066 this should only be necessary if outval is a
15067 subreg of something larger than a word. */
15068 /* XXX Might this clobber base? I can't see how it
15069 can, since scratch is known to overlap with
15070 outval. */
15071 emit_insn (gen_movhi (scratch_hi, outval));
15072 outval = scratch_hi;
15076 /* Get the base address; addsi3 knows how to handle constants
15077 that require more than one insn. */
15078 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15079 base = base_plus;
15080 offset = lo;
15084 if (BYTES_BIG_ENDIAN)
15086 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15087 plus_constant (Pmode, base,
15088 offset + 1)),
15089 gen_lowpart (QImode, outval)));
15090 emit_insn (gen_lshrsi3 (scratch,
15091 gen_rtx_SUBREG (SImode, outval, 0),
15092 GEN_INT (8)));
15093 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15094 offset)),
15095 gen_lowpart (QImode, scratch)));
15097 else
15099 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15100 offset)),
15101 gen_lowpart (QImode, outval)));
15102 emit_insn (gen_lshrsi3 (scratch,
15103 gen_rtx_SUBREG (SImode, outval, 0),
15104 GEN_INT (8)));
15105 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15106 plus_constant (Pmode, base,
15107 offset + 1)),
15108 gen_lowpart (QImode, scratch)));
15112 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15113 (padded to the size of a word) should be passed in a register. */
15115 static bool
15116 arm_must_pass_in_stack (machine_mode mode, const_tree type)
15118 if (TARGET_AAPCS_BASED)
15119 return must_pass_in_stack_var_size (mode, type);
15120 else
15121 return must_pass_in_stack_var_size_or_pad (mode, type);
15125 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15126 Return true if an argument passed on the stack should be padded upwards,
15127 i.e. if the least-significant byte has useful data.
15128 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15129 aggregate types are placed in the lowest memory address. */
15131 bool
15132 arm_pad_arg_upward (machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
15134 if (!TARGET_AAPCS_BASED)
15135 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
15137 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15138 return false;
15140 return true;
15144 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15145 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15146 register has useful data, and return the opposite if the most
15147 significant byte does. */
15149 bool
15150 arm_pad_reg_upward (machine_mode mode,
15151 tree type, int first ATTRIBUTE_UNUSED)
15153 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15155 /* For AAPCS, small aggregates, small fixed-point types,
15156 and small complex types are always padded upwards. */
15157 if (type)
15159 if ((AGGREGATE_TYPE_P (type)
15160 || TREE_CODE (type) == COMPLEX_TYPE
15161 || FIXED_POINT_TYPE_P (type))
15162 && int_size_in_bytes (type) <= 4)
15163 return true;
15165 else
15167 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15168 && GET_MODE_SIZE (mode) <= 4)
15169 return true;
15173 /* Otherwise, use default padding. */
15174 return !BYTES_BIG_ENDIAN;
15177 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15178 assuming that the address in the base register is word aligned. */
15179 bool
15180 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15182 HOST_WIDE_INT max_offset;
15184 /* Offset must be a multiple of 4 in Thumb mode. */
15185 if (TARGET_THUMB2 && ((offset & 3) != 0))
15186 return false;
15188 if (TARGET_THUMB2)
15189 max_offset = 1020;
15190 else if (TARGET_ARM)
15191 max_offset = 255;
15192 else
15193 return false;
15195 return ((offset <= max_offset) && (offset >= -max_offset));
15198 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15199 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15200 Assumes that the address in the base register RN is word aligned. Pattern
15201 guarantees that both memory accesses use the same base register,
15202 the offsets are constants within the range, and the gap between the offsets is 4.
15203 If preload complete then check that registers are legal. WBACK indicates whether
15204 address is updated. LOAD indicates whether memory access is load or store. */
15205 bool
15206 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15207 bool wback, bool load)
15209 unsigned int t, t2, n;
15211 if (!reload_completed)
15212 return true;
15214 if (!offset_ok_for_ldrd_strd (offset))
15215 return false;
15217 t = REGNO (rt);
15218 t2 = REGNO (rt2);
15219 n = REGNO (rn);
15221 if ((TARGET_THUMB2)
15222 && ((wback && (n == t || n == t2))
15223 || (t == SP_REGNUM)
15224 || (t == PC_REGNUM)
15225 || (t2 == SP_REGNUM)
15226 || (t2 == PC_REGNUM)
15227 || (!load && (n == PC_REGNUM))
15228 || (load && (t == t2))
15229 /* Triggers Cortex-M3 LDRD errata. */
15230 || (!wback && load && fix_cm3_ldrd && (n == t))))
15231 return false;
15233 if ((TARGET_ARM)
15234 && ((wback && (n == t || n == t2))
15235 || (t2 == PC_REGNUM)
15236 || (t % 2 != 0) /* First destination register is not even. */
15237 || (t2 != t + 1)
15238 /* PC can be used as base register (for offset addressing only),
15239 but it is depricated. */
15240 || (n == PC_REGNUM)))
15241 return false;
15243 return true;
15246 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15247 operand MEM's address contains an immediate offset from the base
15248 register and has no side effects, in which case it sets BASE and
15249 OFFSET accordingly. */
15250 static bool
15251 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset)
15253 rtx addr;
15255 gcc_assert (base != NULL && offset != NULL);
15257 /* TODO: Handle more general memory operand patterns, such as
15258 PRE_DEC and PRE_INC. */
15260 if (side_effects_p (mem))
15261 return false;
15263 /* Can't deal with subregs. */
15264 if (GET_CODE (mem) == SUBREG)
15265 return false;
15267 gcc_assert (MEM_P (mem));
15269 *offset = const0_rtx;
15271 addr = XEXP (mem, 0);
15273 /* If addr isn't valid for DImode, then we can't handle it. */
15274 if (!arm_legitimate_address_p (DImode, addr,
15275 reload_in_progress || reload_completed))
15276 return false;
15278 if (REG_P (addr))
15280 *base = addr;
15281 return true;
15283 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15285 *base = XEXP (addr, 0);
15286 *offset = XEXP (addr, 1);
15287 return (REG_P (*base) && CONST_INT_P (*offset));
15290 return false;
15293 /* Called from a peephole2 to replace two word-size accesses with a
15294 single LDRD/STRD instruction. Returns true iff we can generate a
15295 new instruction sequence. That is, both accesses use the same base
15296 register and the gap between constant offsets is 4. This function
15297 may reorder its operands to match ldrd/strd RTL templates.
15298 OPERANDS are the operands found by the peephole matcher;
15299 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15300 corresponding memory operands. LOAD indicaates whether the access
15301 is load or store. CONST_STORE indicates a store of constant
15302 integer values held in OPERANDS[4,5] and assumes that the pattern
15303 is of length 4 insn, for the purpose of checking dead registers.
15304 COMMUTE indicates that register operands may be reordered. */
15305 bool
15306 gen_operands_ldrd_strd (rtx *operands, bool load,
15307 bool const_store, bool commute)
15309 int nops = 2;
15310 HOST_WIDE_INT offsets[2], offset;
15311 rtx base = NULL_RTX;
15312 rtx cur_base, cur_offset, tmp;
15313 int i, gap;
15314 HARD_REG_SET regset;
15316 gcc_assert (!const_store || !load);
15317 /* Check that the memory references are immediate offsets from the
15318 same base register. Extract the base register, the destination
15319 registers, and the corresponding memory offsets. */
15320 for (i = 0; i < nops; i++)
15322 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
15323 return false;
15325 if (i == 0)
15326 base = cur_base;
15327 else if (REGNO (base) != REGNO (cur_base))
15328 return false;
15330 offsets[i] = INTVAL (cur_offset);
15331 if (GET_CODE (operands[i]) == SUBREG)
15333 tmp = SUBREG_REG (operands[i]);
15334 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15335 operands[i] = tmp;
15339 /* Make sure there is no dependency between the individual loads. */
15340 if (load && REGNO (operands[0]) == REGNO (base))
15341 return false; /* RAW */
15343 if (load && REGNO (operands[0]) == REGNO (operands[1]))
15344 return false; /* WAW */
15346 /* If the same input register is used in both stores
15347 when storing different constants, try to find a free register.
15348 For example, the code
15349 mov r0, 0
15350 str r0, [r2]
15351 mov r0, 1
15352 str r0, [r2, #4]
15353 can be transformed into
15354 mov r1, 0
15355 mov r0, 1
15356 strd r1, r0, [r2]
15357 in Thumb mode assuming that r1 is free.
15358 For ARM mode do the same but only if the starting register
15359 can be made to be even. */
15360 if (const_store
15361 && REGNO (operands[0]) == REGNO (operands[1])
15362 && INTVAL (operands[4]) != INTVAL (operands[5]))
15364 if (TARGET_THUMB2)
15366 CLEAR_HARD_REG_SET (regset);
15367 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15368 if (tmp == NULL_RTX)
15369 return false;
15371 /* Use the new register in the first load to ensure that
15372 if the original input register is not dead after peephole,
15373 then it will have the correct constant value. */
15374 operands[0] = tmp;
15376 else if (TARGET_ARM)
15378 int regno = REGNO (operands[0]);
15379 if (!peep2_reg_dead_p (4, operands[0]))
15381 /* When the input register is even and is not dead after the
15382 pattern, it has to hold the second constant but we cannot
15383 form a legal STRD in ARM mode with this register as the second
15384 register. */
15385 if (regno % 2 == 0)
15386 return false;
15388 /* Is regno-1 free? */
15389 SET_HARD_REG_SET (regset);
15390 CLEAR_HARD_REG_BIT(regset, regno - 1);
15391 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15392 if (tmp == NULL_RTX)
15393 return false;
15395 operands[0] = tmp;
15397 else
15399 /* Find a DImode register. */
15400 CLEAR_HARD_REG_SET (regset);
15401 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15402 if (tmp != NULL_RTX)
15404 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15405 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15407 else
15409 /* Can we use the input register to form a DI register? */
15410 SET_HARD_REG_SET (regset);
15411 CLEAR_HARD_REG_BIT(regset,
15412 regno % 2 == 0 ? regno + 1 : regno - 1);
15413 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15414 if (tmp == NULL_RTX)
15415 return false;
15416 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15420 gcc_assert (operands[0] != NULL_RTX);
15421 gcc_assert (operands[1] != NULL_RTX);
15422 gcc_assert (REGNO (operands[0]) % 2 == 0);
15423 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15427 /* Make sure the instructions are ordered with lower memory access first. */
15428 if (offsets[0] > offsets[1])
15430 gap = offsets[0] - offsets[1];
15431 offset = offsets[1];
15433 /* Swap the instructions such that lower memory is accessed first. */
15434 std::swap (operands[0], operands[1]);
15435 std::swap (operands[2], operands[3]);
15436 if (const_store)
15437 std::swap (operands[4], operands[5]);
15439 else
15441 gap = offsets[1] - offsets[0];
15442 offset = offsets[0];
15445 /* Make sure accesses are to consecutive memory locations. */
15446 if (gap != 4)
15447 return false;
15449 /* Make sure we generate legal instructions. */
15450 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15451 false, load))
15452 return true;
15454 /* In Thumb state, where registers are almost unconstrained, there
15455 is little hope to fix it. */
15456 if (TARGET_THUMB2)
15457 return false;
15459 if (load && commute)
15461 /* Try reordering registers. */
15462 std::swap (operands[0], operands[1]);
15463 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15464 false, load))
15465 return true;
15468 if (const_store)
15470 /* If input registers are dead after this pattern, they can be
15471 reordered or replaced by other registers that are free in the
15472 current pattern. */
15473 if (!peep2_reg_dead_p (4, operands[0])
15474 || !peep2_reg_dead_p (4, operands[1]))
15475 return false;
15477 /* Try to reorder the input registers. */
15478 /* For example, the code
15479 mov r0, 0
15480 mov r1, 1
15481 str r1, [r2]
15482 str r0, [r2, #4]
15483 can be transformed into
15484 mov r1, 0
15485 mov r0, 1
15486 strd r0, [r2]
15488 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15489 false, false))
15491 std::swap (operands[0], operands[1]);
15492 return true;
15495 /* Try to find a free DI register. */
15496 CLEAR_HARD_REG_SET (regset);
15497 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15498 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15499 while (true)
15501 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15502 if (tmp == NULL_RTX)
15503 return false;
15505 /* DREG must be an even-numbered register in DImode.
15506 Split it into SI registers. */
15507 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15508 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15509 gcc_assert (operands[0] != NULL_RTX);
15510 gcc_assert (operands[1] != NULL_RTX);
15511 gcc_assert (REGNO (operands[0]) % 2 == 0);
15512 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15514 return (operands_ok_ldrd_strd (operands[0], operands[1],
15515 base, offset,
15516 false, load));
15520 return false;
15526 /* Print a symbolic form of X to the debug file, F. */
15527 static void
15528 arm_print_value (FILE *f, rtx x)
15530 switch (GET_CODE (x))
15532 case CONST_INT:
15533 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15534 return;
15536 case CONST_DOUBLE:
15537 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15538 return;
15540 case CONST_VECTOR:
15542 int i;
15544 fprintf (f, "<");
15545 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15547 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15548 if (i < (CONST_VECTOR_NUNITS (x) - 1))
15549 fputc (',', f);
15551 fprintf (f, ">");
15553 return;
15555 case CONST_STRING:
15556 fprintf (f, "\"%s\"", XSTR (x, 0));
15557 return;
15559 case SYMBOL_REF:
15560 fprintf (f, "`%s'", XSTR (x, 0));
15561 return;
15563 case LABEL_REF:
15564 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
15565 return;
15567 case CONST:
15568 arm_print_value (f, XEXP (x, 0));
15569 return;
15571 case PLUS:
15572 arm_print_value (f, XEXP (x, 0));
15573 fprintf (f, "+");
15574 arm_print_value (f, XEXP (x, 1));
15575 return;
15577 case PC:
15578 fprintf (f, "pc");
15579 return;
15581 default:
15582 fprintf (f, "????");
15583 return;
15587 /* Routines for manipulation of the constant pool. */
15589 /* Arm instructions cannot load a large constant directly into a
15590 register; they have to come from a pc relative load. The constant
15591 must therefore be placed in the addressable range of the pc
15592 relative load. Depending on the precise pc relative load
15593 instruction the range is somewhere between 256 bytes and 4k. This
15594 means that we often have to dump a constant inside a function, and
15595 generate code to branch around it.
15597 It is important to minimize this, since the branches will slow
15598 things down and make the code larger.
15600 Normally we can hide the table after an existing unconditional
15601 branch so that there is no interruption of the flow, but in the
15602 worst case the code looks like this:
15604 ldr rn, L1
15606 b L2
15607 align
15608 L1: .long value
15612 ldr rn, L3
15614 b L4
15615 align
15616 L3: .long value
15620 We fix this by performing a scan after scheduling, which notices
15621 which instructions need to have their operands fetched from the
15622 constant table and builds the table.
15624 The algorithm starts by building a table of all the constants that
15625 need fixing up and all the natural barriers in the function (places
15626 where a constant table can be dropped without breaking the flow).
15627 For each fixup we note how far the pc-relative replacement will be
15628 able to reach and the offset of the instruction into the function.
15630 Having built the table we then group the fixes together to form
15631 tables that are as large as possible (subject to addressing
15632 constraints) and emit each table of constants after the last
15633 barrier that is within range of all the instructions in the group.
15634 If a group does not contain a barrier, then we forcibly create one
15635 by inserting a jump instruction into the flow. Once the table has
15636 been inserted, the insns are then modified to reference the
15637 relevant entry in the pool.
15639 Possible enhancements to the algorithm (not implemented) are:
15641 1) For some processors and object formats, there may be benefit in
15642 aligning the pools to the start of cache lines; this alignment
15643 would need to be taken into account when calculating addressability
15644 of a pool. */
15646 /* These typedefs are located at the start of this file, so that
15647 they can be used in the prototypes there. This comment is to
15648 remind readers of that fact so that the following structures
15649 can be understood more easily.
15651 typedef struct minipool_node Mnode;
15652 typedef struct minipool_fixup Mfix; */
15654 struct minipool_node
15656 /* Doubly linked chain of entries. */
15657 Mnode * next;
15658 Mnode * prev;
15659 /* The maximum offset into the code that this entry can be placed. While
15660 pushing fixes for forward references, all entries are sorted in order
15661 of increasing max_address. */
15662 HOST_WIDE_INT max_address;
15663 /* Similarly for an entry inserted for a backwards ref. */
15664 HOST_WIDE_INT min_address;
15665 /* The number of fixes referencing this entry. This can become zero
15666 if we "unpush" an entry. In this case we ignore the entry when we
15667 come to emit the code. */
15668 int refcount;
15669 /* The offset from the start of the minipool. */
15670 HOST_WIDE_INT offset;
15671 /* The value in table. */
15672 rtx value;
15673 /* The mode of value. */
15674 machine_mode mode;
15675 /* The size of the value. With iWMMXt enabled
15676 sizes > 4 also imply an alignment of 8-bytes. */
15677 int fix_size;
15680 struct minipool_fixup
15682 Mfix * next;
15683 rtx_insn * insn;
15684 HOST_WIDE_INT address;
15685 rtx * loc;
15686 machine_mode mode;
15687 int fix_size;
15688 rtx value;
15689 Mnode * minipool;
15690 HOST_WIDE_INT forwards;
15691 HOST_WIDE_INT backwards;
15694 /* Fixes less than a word need padding out to a word boundary. */
15695 #define MINIPOOL_FIX_SIZE(mode) \
15696 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15698 static Mnode * minipool_vector_head;
15699 static Mnode * minipool_vector_tail;
15700 static rtx_code_label *minipool_vector_label;
15701 static int minipool_pad;
15703 /* The linked list of all minipool fixes required for this function. */
15704 Mfix * minipool_fix_head;
15705 Mfix * minipool_fix_tail;
15706 /* The fix entry for the current minipool, once it has been placed. */
15707 Mfix * minipool_barrier;
15709 #ifndef JUMP_TABLES_IN_TEXT_SECTION
15710 #define JUMP_TABLES_IN_TEXT_SECTION 0
15711 #endif
15713 static HOST_WIDE_INT
15714 get_jump_table_size (rtx_jump_table_data *insn)
15716 /* ADDR_VECs only take room if read-only data does into the text
15717 section. */
15718 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
15720 rtx body = PATTERN (insn);
15721 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
15722 HOST_WIDE_INT size;
15723 HOST_WIDE_INT modesize;
15725 modesize = GET_MODE_SIZE (GET_MODE (body));
15726 size = modesize * XVECLEN (body, elt);
15727 switch (modesize)
15729 case 1:
15730 /* Round up size of TBB table to a halfword boundary. */
15731 size = (size + 1) & ~HOST_WIDE_INT_1;
15732 break;
15733 case 2:
15734 /* No padding necessary for TBH. */
15735 break;
15736 case 4:
15737 /* Add two bytes for alignment on Thumb. */
15738 if (TARGET_THUMB)
15739 size += 2;
15740 break;
15741 default:
15742 gcc_unreachable ();
15744 return size;
15747 return 0;
15750 /* Return the maximum amount of padding that will be inserted before
15751 label LABEL. */
15753 static HOST_WIDE_INT
15754 get_label_padding (rtx label)
15756 HOST_WIDE_INT align, min_insn_size;
15758 align = 1 << label_to_alignment (label);
15759 min_insn_size = TARGET_THUMB ? 2 : 4;
15760 return align > min_insn_size ? align - min_insn_size : 0;
15763 /* Move a minipool fix MP from its current location to before MAX_MP.
15764 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
15765 constraints may need updating. */
15766 static Mnode *
15767 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
15768 HOST_WIDE_INT max_address)
15770 /* The code below assumes these are different. */
15771 gcc_assert (mp != max_mp);
15773 if (max_mp == NULL)
15775 if (max_address < mp->max_address)
15776 mp->max_address = max_address;
15778 else
15780 if (max_address > max_mp->max_address - mp->fix_size)
15781 mp->max_address = max_mp->max_address - mp->fix_size;
15782 else
15783 mp->max_address = max_address;
15785 /* Unlink MP from its current position. Since max_mp is non-null,
15786 mp->prev must be non-null. */
15787 mp->prev->next = mp->next;
15788 if (mp->next != NULL)
15789 mp->next->prev = mp->prev;
15790 else
15791 minipool_vector_tail = mp->prev;
15793 /* Re-insert it before MAX_MP. */
15794 mp->next = max_mp;
15795 mp->prev = max_mp->prev;
15796 max_mp->prev = mp;
15798 if (mp->prev != NULL)
15799 mp->prev->next = mp;
15800 else
15801 minipool_vector_head = mp;
15804 /* Save the new entry. */
15805 max_mp = mp;
15807 /* Scan over the preceding entries and adjust their addresses as
15808 required. */
15809 while (mp->prev != NULL
15810 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
15812 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
15813 mp = mp->prev;
15816 return max_mp;
15819 /* Add a constant to the minipool for a forward reference. Returns the
15820 node added or NULL if the constant will not fit in this pool. */
15821 static Mnode *
15822 add_minipool_forward_ref (Mfix *fix)
15824 /* If set, max_mp is the first pool_entry that has a lower
15825 constraint than the one we are trying to add. */
15826 Mnode * max_mp = NULL;
15827 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
15828 Mnode * mp;
15830 /* If the minipool starts before the end of FIX->INSN then this FIX
15831 can not be placed into the current pool. Furthermore, adding the
15832 new constant pool entry may cause the pool to start FIX_SIZE bytes
15833 earlier. */
15834 if (minipool_vector_head &&
15835 (fix->address + get_attr_length (fix->insn)
15836 >= minipool_vector_head->max_address - fix->fix_size))
15837 return NULL;
15839 /* Scan the pool to see if a constant with the same value has
15840 already been added. While we are doing this, also note the
15841 location where we must insert the constant if it doesn't already
15842 exist. */
15843 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
15845 if (GET_CODE (fix->value) == GET_CODE (mp->value)
15846 && fix->mode == mp->mode
15847 && (!LABEL_P (fix->value)
15848 || (CODE_LABEL_NUMBER (fix->value)
15849 == CODE_LABEL_NUMBER (mp->value)))
15850 && rtx_equal_p (fix->value, mp->value))
15852 /* More than one fix references this entry. */
15853 mp->refcount++;
15854 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
15857 /* Note the insertion point if necessary. */
15858 if (max_mp == NULL
15859 && mp->max_address > max_address)
15860 max_mp = mp;
15862 /* If we are inserting an 8-bytes aligned quantity and
15863 we have not already found an insertion point, then
15864 make sure that all such 8-byte aligned quantities are
15865 placed at the start of the pool. */
15866 if (ARM_DOUBLEWORD_ALIGN
15867 && max_mp == NULL
15868 && fix->fix_size >= 8
15869 && mp->fix_size < 8)
15871 max_mp = mp;
15872 max_address = mp->max_address;
15876 /* The value is not currently in the minipool, so we need to create
15877 a new entry for it. If MAX_MP is NULL, the entry will be put on
15878 the end of the list since the placement is less constrained than
15879 any existing entry. Otherwise, we insert the new fix before
15880 MAX_MP and, if necessary, adjust the constraints on the other
15881 entries. */
15882 mp = XNEW (Mnode);
15883 mp->fix_size = fix->fix_size;
15884 mp->mode = fix->mode;
15885 mp->value = fix->value;
15886 mp->refcount = 1;
15887 /* Not yet required for a backwards ref. */
15888 mp->min_address = -65536;
15890 if (max_mp == NULL)
15892 mp->max_address = max_address;
15893 mp->next = NULL;
15894 mp->prev = minipool_vector_tail;
15896 if (mp->prev == NULL)
15898 minipool_vector_head = mp;
15899 minipool_vector_label = gen_label_rtx ();
15901 else
15902 mp->prev->next = mp;
15904 minipool_vector_tail = mp;
15906 else
15908 if (max_address > max_mp->max_address - mp->fix_size)
15909 mp->max_address = max_mp->max_address - mp->fix_size;
15910 else
15911 mp->max_address = max_address;
15913 mp->next = max_mp;
15914 mp->prev = max_mp->prev;
15915 max_mp->prev = mp;
15916 if (mp->prev != NULL)
15917 mp->prev->next = mp;
15918 else
15919 minipool_vector_head = mp;
15922 /* Save the new entry. */
15923 max_mp = mp;
15925 /* Scan over the preceding entries and adjust their addresses as
15926 required. */
15927 while (mp->prev != NULL
15928 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
15930 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
15931 mp = mp->prev;
15934 return max_mp;
15937 static Mnode *
15938 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
15939 HOST_WIDE_INT min_address)
15941 HOST_WIDE_INT offset;
15943 /* The code below assumes these are different. */
15944 gcc_assert (mp != min_mp);
15946 if (min_mp == NULL)
15948 if (min_address > mp->min_address)
15949 mp->min_address = min_address;
15951 else
15953 /* We will adjust this below if it is too loose. */
15954 mp->min_address = min_address;
15956 /* Unlink MP from its current position. Since min_mp is non-null,
15957 mp->next must be non-null. */
15958 mp->next->prev = mp->prev;
15959 if (mp->prev != NULL)
15960 mp->prev->next = mp->next;
15961 else
15962 minipool_vector_head = mp->next;
15964 /* Reinsert it after MIN_MP. */
15965 mp->prev = min_mp;
15966 mp->next = min_mp->next;
15967 min_mp->next = mp;
15968 if (mp->next != NULL)
15969 mp->next->prev = mp;
15970 else
15971 minipool_vector_tail = mp;
15974 min_mp = mp;
15976 offset = 0;
15977 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
15979 mp->offset = offset;
15980 if (mp->refcount > 0)
15981 offset += mp->fix_size;
15983 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
15984 mp->next->min_address = mp->min_address + mp->fix_size;
15987 return min_mp;
15990 /* Add a constant to the minipool for a backward reference. Returns the
15991 node added or NULL if the constant will not fit in this pool.
15993 Note that the code for insertion for a backwards reference can be
15994 somewhat confusing because the calculated offsets for each fix do
15995 not take into account the size of the pool (which is still under
15996 construction. */
15997 static Mnode *
15998 add_minipool_backward_ref (Mfix *fix)
16000 /* If set, min_mp is the last pool_entry that has a lower constraint
16001 than the one we are trying to add. */
16002 Mnode *min_mp = NULL;
16003 /* This can be negative, since it is only a constraint. */
16004 HOST_WIDE_INT min_address = fix->address - fix->backwards;
16005 Mnode *mp;
16007 /* If we can't reach the current pool from this insn, or if we can't
16008 insert this entry at the end of the pool without pushing other
16009 fixes out of range, then we don't try. This ensures that we
16010 can't fail later on. */
16011 if (min_address >= minipool_barrier->address
16012 || (minipool_vector_tail->min_address + fix->fix_size
16013 >= minipool_barrier->address))
16014 return NULL;
16016 /* Scan the pool to see if a constant with the same value has
16017 already been added. While we are doing this, also note the
16018 location where we must insert the constant if it doesn't already
16019 exist. */
16020 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16022 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16023 && fix->mode == mp->mode
16024 && (!LABEL_P (fix->value)
16025 || (CODE_LABEL_NUMBER (fix->value)
16026 == CODE_LABEL_NUMBER (mp->value)))
16027 && rtx_equal_p (fix->value, mp->value)
16028 /* Check that there is enough slack to move this entry to the
16029 end of the table (this is conservative). */
16030 && (mp->max_address
16031 > (minipool_barrier->address
16032 + minipool_vector_tail->offset
16033 + minipool_vector_tail->fix_size)))
16035 mp->refcount++;
16036 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16039 if (min_mp != NULL)
16040 mp->min_address += fix->fix_size;
16041 else
16043 /* Note the insertion point if necessary. */
16044 if (mp->min_address < min_address)
16046 /* For now, we do not allow the insertion of 8-byte alignment
16047 requiring nodes anywhere but at the start of the pool. */
16048 if (ARM_DOUBLEWORD_ALIGN
16049 && fix->fix_size >= 8 && mp->fix_size < 8)
16050 return NULL;
16051 else
16052 min_mp = mp;
16054 else if (mp->max_address
16055 < minipool_barrier->address + mp->offset + fix->fix_size)
16057 /* Inserting before this entry would push the fix beyond
16058 its maximum address (which can happen if we have
16059 re-located a forwards fix); force the new fix to come
16060 after it. */
16061 if (ARM_DOUBLEWORD_ALIGN
16062 && fix->fix_size >= 8 && mp->fix_size < 8)
16063 return NULL;
16064 else
16066 min_mp = mp;
16067 min_address = mp->min_address + fix->fix_size;
16070 /* Do not insert a non-8-byte aligned quantity before 8-byte
16071 aligned quantities. */
16072 else if (ARM_DOUBLEWORD_ALIGN
16073 && fix->fix_size < 8
16074 && mp->fix_size >= 8)
16076 min_mp = mp;
16077 min_address = mp->min_address + fix->fix_size;
16082 /* We need to create a new entry. */
16083 mp = XNEW (Mnode);
16084 mp->fix_size = fix->fix_size;
16085 mp->mode = fix->mode;
16086 mp->value = fix->value;
16087 mp->refcount = 1;
16088 mp->max_address = minipool_barrier->address + 65536;
16090 mp->min_address = min_address;
16092 if (min_mp == NULL)
16094 mp->prev = NULL;
16095 mp->next = minipool_vector_head;
16097 if (mp->next == NULL)
16099 minipool_vector_tail = mp;
16100 minipool_vector_label = gen_label_rtx ();
16102 else
16103 mp->next->prev = mp;
16105 minipool_vector_head = mp;
16107 else
16109 mp->next = min_mp->next;
16110 mp->prev = min_mp;
16111 min_mp->next = mp;
16113 if (mp->next != NULL)
16114 mp->next->prev = mp;
16115 else
16116 minipool_vector_tail = mp;
16119 /* Save the new entry. */
16120 min_mp = mp;
16122 if (mp->prev)
16123 mp = mp->prev;
16124 else
16125 mp->offset = 0;
16127 /* Scan over the following entries and adjust their offsets. */
16128 while (mp->next != NULL)
16130 if (mp->next->min_address < mp->min_address + mp->fix_size)
16131 mp->next->min_address = mp->min_address + mp->fix_size;
16133 if (mp->refcount)
16134 mp->next->offset = mp->offset + mp->fix_size;
16135 else
16136 mp->next->offset = mp->offset;
16138 mp = mp->next;
16141 return min_mp;
16144 static void
16145 assign_minipool_offsets (Mfix *barrier)
16147 HOST_WIDE_INT offset = 0;
16148 Mnode *mp;
16150 minipool_barrier = barrier;
16152 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16154 mp->offset = offset;
16156 if (mp->refcount > 0)
16157 offset += mp->fix_size;
16161 /* Output the literal table */
16162 static void
16163 dump_minipool (rtx_insn *scan)
16165 Mnode * mp;
16166 Mnode * nmp;
16167 int align64 = 0;
16169 if (ARM_DOUBLEWORD_ALIGN)
16170 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16171 if (mp->refcount > 0 && mp->fix_size >= 8)
16173 align64 = 1;
16174 break;
16177 if (dump_file)
16178 fprintf (dump_file,
16179 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16180 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16182 scan = emit_label_after (gen_label_rtx (), scan);
16183 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16184 scan = emit_label_after (minipool_vector_label, scan);
16186 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16188 if (mp->refcount > 0)
16190 if (dump_file)
16192 fprintf (dump_file,
16193 ";; Offset %u, min %ld, max %ld ",
16194 (unsigned) mp->offset, (unsigned long) mp->min_address,
16195 (unsigned long) mp->max_address);
16196 arm_print_value (dump_file, mp->value);
16197 fputc ('\n', dump_file);
16200 rtx val = copy_rtx (mp->value);
16202 switch (GET_MODE_SIZE (mp->mode))
16204 #ifdef HAVE_consttable_1
16205 case 1:
16206 scan = emit_insn_after (gen_consttable_1 (val), scan);
16207 break;
16209 #endif
16210 #ifdef HAVE_consttable_2
16211 case 2:
16212 scan = emit_insn_after (gen_consttable_2 (val), scan);
16213 break;
16215 #endif
16216 #ifdef HAVE_consttable_4
16217 case 4:
16218 scan = emit_insn_after (gen_consttable_4 (val), scan);
16219 break;
16221 #endif
16222 #ifdef HAVE_consttable_8
16223 case 8:
16224 scan = emit_insn_after (gen_consttable_8 (val), scan);
16225 break;
16227 #endif
16228 #ifdef HAVE_consttable_16
16229 case 16:
16230 scan = emit_insn_after (gen_consttable_16 (val), scan);
16231 break;
16233 #endif
16234 default:
16235 gcc_unreachable ();
16239 nmp = mp->next;
16240 free (mp);
16243 minipool_vector_head = minipool_vector_tail = NULL;
16244 scan = emit_insn_after (gen_consttable_end (), scan);
16245 scan = emit_barrier_after (scan);
16248 /* Return the cost of forcibly inserting a barrier after INSN. */
16249 static int
16250 arm_barrier_cost (rtx_insn *insn)
16252 /* Basing the location of the pool on the loop depth is preferable,
16253 but at the moment, the basic block information seems to be
16254 corrupt by this stage of the compilation. */
16255 int base_cost = 50;
16256 rtx_insn *next = next_nonnote_insn (insn);
16258 if (next != NULL && LABEL_P (next))
16259 base_cost -= 20;
16261 switch (GET_CODE (insn))
16263 case CODE_LABEL:
16264 /* It will always be better to place the table before the label, rather
16265 than after it. */
16266 return 50;
16268 case INSN:
16269 case CALL_INSN:
16270 return base_cost;
16272 case JUMP_INSN:
16273 return base_cost - 10;
16275 default:
16276 return base_cost + 10;
16280 /* Find the best place in the insn stream in the range
16281 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16282 Create the barrier by inserting a jump and add a new fix entry for
16283 it. */
16284 static Mfix *
16285 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16287 HOST_WIDE_INT count = 0;
16288 rtx_barrier *barrier;
16289 rtx_insn *from = fix->insn;
16290 /* The instruction after which we will insert the jump. */
16291 rtx_insn *selected = NULL;
16292 int selected_cost;
16293 /* The address at which the jump instruction will be placed. */
16294 HOST_WIDE_INT selected_address;
16295 Mfix * new_fix;
16296 HOST_WIDE_INT max_count = max_address - fix->address;
16297 rtx_code_label *label = gen_label_rtx ();
16299 selected_cost = arm_barrier_cost (from);
16300 selected_address = fix->address;
16302 while (from && count < max_count)
16304 rtx_jump_table_data *tmp;
16305 int new_cost;
16307 /* This code shouldn't have been called if there was a natural barrier
16308 within range. */
16309 gcc_assert (!BARRIER_P (from));
16311 /* Count the length of this insn. This must stay in sync with the
16312 code that pushes minipool fixes. */
16313 if (LABEL_P (from))
16314 count += get_label_padding (from);
16315 else
16316 count += get_attr_length (from);
16318 /* If there is a jump table, add its length. */
16319 if (tablejump_p (from, NULL, &tmp))
16321 count += get_jump_table_size (tmp);
16323 /* Jump tables aren't in a basic block, so base the cost on
16324 the dispatch insn. If we select this location, we will
16325 still put the pool after the table. */
16326 new_cost = arm_barrier_cost (from);
16328 if (count < max_count
16329 && (!selected || new_cost <= selected_cost))
16331 selected = tmp;
16332 selected_cost = new_cost;
16333 selected_address = fix->address + count;
16336 /* Continue after the dispatch table. */
16337 from = NEXT_INSN (tmp);
16338 continue;
16341 new_cost = arm_barrier_cost (from);
16343 if (count < max_count
16344 && (!selected || new_cost <= selected_cost))
16346 selected = from;
16347 selected_cost = new_cost;
16348 selected_address = fix->address + count;
16351 from = NEXT_INSN (from);
16354 /* Make sure that we found a place to insert the jump. */
16355 gcc_assert (selected);
16357 /* Make sure we do not split a call and its corresponding
16358 CALL_ARG_LOCATION note. */
16359 if (CALL_P (selected))
16361 rtx_insn *next = NEXT_INSN (selected);
16362 if (next && NOTE_P (next)
16363 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16364 selected = next;
16367 /* Create a new JUMP_INSN that branches around a barrier. */
16368 from = emit_jump_insn_after (gen_jump (label), selected);
16369 JUMP_LABEL (from) = label;
16370 barrier = emit_barrier_after (from);
16371 emit_label_after (label, barrier);
16373 /* Create a minipool barrier entry for the new barrier. */
16374 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16375 new_fix->insn = barrier;
16376 new_fix->address = selected_address;
16377 new_fix->next = fix->next;
16378 fix->next = new_fix;
16380 return new_fix;
16383 /* Record that there is a natural barrier in the insn stream at
16384 ADDRESS. */
16385 static void
16386 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
16388 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16390 fix->insn = insn;
16391 fix->address = address;
16393 fix->next = NULL;
16394 if (minipool_fix_head != NULL)
16395 minipool_fix_tail->next = fix;
16396 else
16397 minipool_fix_head = fix;
16399 minipool_fix_tail = fix;
16402 /* Record INSN, which will need fixing up to load a value from the
16403 minipool. ADDRESS is the offset of the insn since the start of the
16404 function; LOC is a pointer to the part of the insn which requires
16405 fixing; VALUE is the constant that must be loaded, which is of type
16406 MODE. */
16407 static void
16408 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
16409 machine_mode mode, rtx value)
16411 gcc_assert (!arm_disable_literal_pool);
16412 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16414 fix->insn = insn;
16415 fix->address = address;
16416 fix->loc = loc;
16417 fix->mode = mode;
16418 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16419 fix->value = value;
16420 fix->forwards = get_attr_pool_range (insn);
16421 fix->backwards = get_attr_neg_pool_range (insn);
16422 fix->minipool = NULL;
16424 /* If an insn doesn't have a range defined for it, then it isn't
16425 expecting to be reworked by this code. Better to stop now than
16426 to generate duff assembly code. */
16427 gcc_assert (fix->forwards || fix->backwards);
16429 /* If an entry requires 8-byte alignment then assume all constant pools
16430 require 4 bytes of padding. Trying to do this later on a per-pool
16431 basis is awkward because existing pool entries have to be modified. */
16432 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16433 minipool_pad = 4;
16435 if (dump_file)
16437 fprintf (dump_file,
16438 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16439 GET_MODE_NAME (mode),
16440 INSN_UID (insn), (unsigned long) address,
16441 -1 * (long)fix->backwards, (long)fix->forwards);
16442 arm_print_value (dump_file, fix->value);
16443 fprintf (dump_file, "\n");
16446 /* Add it to the chain of fixes. */
16447 fix->next = NULL;
16449 if (minipool_fix_head != NULL)
16450 minipool_fix_tail->next = fix;
16451 else
16452 minipool_fix_head = fix;
16454 minipool_fix_tail = fix;
16457 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16458 Returns the number of insns needed, or 99 if we always want to synthesize
16459 the value. */
16461 arm_max_const_double_inline_cost ()
16463 return ((optimize_size || arm_ld_sched) ? 3 : 4);
16466 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16467 Returns the number of insns needed, or 99 if we don't know how to
16468 do it. */
16470 arm_const_double_inline_cost (rtx val)
16472 rtx lowpart, highpart;
16473 machine_mode mode;
16475 mode = GET_MODE (val);
16477 if (mode == VOIDmode)
16478 mode = DImode;
16480 gcc_assert (GET_MODE_SIZE (mode) == 8);
16482 lowpart = gen_lowpart (SImode, val);
16483 highpart = gen_highpart_mode (SImode, mode, val);
16485 gcc_assert (CONST_INT_P (lowpart));
16486 gcc_assert (CONST_INT_P (highpart));
16488 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16489 NULL_RTX, NULL_RTX, 0, 0)
16490 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16491 NULL_RTX, NULL_RTX, 0, 0));
16494 /* Cost of loading a SImode constant. */
16495 static inline int
16496 arm_const_inline_cost (enum rtx_code code, rtx val)
16498 return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
16499 NULL_RTX, NULL_RTX, 1, 0);
16502 /* Return true if it is worthwhile to split a 64-bit constant into two
16503 32-bit operations. This is the case if optimizing for size, or
16504 if we have load delay slots, or if one 32-bit part can be done with
16505 a single data operation. */
16506 bool
16507 arm_const_double_by_parts (rtx val)
16509 machine_mode mode = GET_MODE (val);
16510 rtx part;
16512 if (optimize_size || arm_ld_sched)
16513 return true;
16515 if (mode == VOIDmode)
16516 mode = DImode;
16518 part = gen_highpart_mode (SImode, mode, val);
16520 gcc_assert (CONST_INT_P (part));
16522 if (const_ok_for_arm (INTVAL (part))
16523 || const_ok_for_arm (~INTVAL (part)))
16524 return true;
16526 part = gen_lowpart (SImode, val);
16528 gcc_assert (CONST_INT_P (part));
16530 if (const_ok_for_arm (INTVAL (part))
16531 || const_ok_for_arm (~INTVAL (part)))
16532 return true;
16534 return false;
16537 /* Return true if it is possible to inline both the high and low parts
16538 of a 64-bit constant into 32-bit data processing instructions. */
16539 bool
16540 arm_const_double_by_immediates (rtx val)
16542 machine_mode mode = GET_MODE (val);
16543 rtx part;
16545 if (mode == VOIDmode)
16546 mode = DImode;
16548 part = gen_highpart_mode (SImode, mode, val);
16550 gcc_assert (CONST_INT_P (part));
16552 if (!const_ok_for_arm (INTVAL (part)))
16553 return false;
16555 part = gen_lowpart (SImode, val);
16557 gcc_assert (CONST_INT_P (part));
16559 if (!const_ok_for_arm (INTVAL (part)))
16560 return false;
16562 return true;
16565 /* Scan INSN and note any of its operands that need fixing.
16566 If DO_PUSHES is false we do not actually push any of the fixups
16567 needed. */
16568 static void
16569 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
16571 int opno;
16573 extract_constrain_insn (insn);
16575 if (recog_data.n_alternatives == 0)
16576 return;
16578 /* Fill in recog_op_alt with information about the constraints of
16579 this insn. */
16580 preprocess_constraints (insn);
16582 const operand_alternative *op_alt = which_op_alt ();
16583 for (opno = 0; opno < recog_data.n_operands; opno++)
16585 /* Things we need to fix can only occur in inputs. */
16586 if (recog_data.operand_type[opno] != OP_IN)
16587 continue;
16589 /* If this alternative is a memory reference, then any mention
16590 of constants in this alternative is really to fool reload
16591 into allowing us to accept one there. We need to fix them up
16592 now so that we output the right code. */
16593 if (op_alt[opno].memory_ok)
16595 rtx op = recog_data.operand[opno];
16597 if (CONSTANT_P (op))
16599 if (do_pushes)
16600 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
16601 recog_data.operand_mode[opno], op);
16603 else if (MEM_P (op)
16604 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
16605 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
16607 if (do_pushes)
16609 rtx cop = avoid_constant_pool_reference (op);
16611 /* Casting the address of something to a mode narrower
16612 than a word can cause avoid_constant_pool_reference()
16613 to return the pool reference itself. That's no good to
16614 us here. Lets just hope that we can use the
16615 constant pool value directly. */
16616 if (op == cop)
16617 cop = get_pool_constant (XEXP (op, 0));
16619 push_minipool_fix (insn, address,
16620 recog_data.operand_loc[opno],
16621 recog_data.operand_mode[opno], cop);
16628 return;
16631 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
16632 and unions in the context of ARMv8-M Security Extensions. It is used as a
16633 helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
16634 functions. The PADDING_BITS_TO_CLEAR pointer can be the base to either one
16635 or four masks, depending on whether it is being computed for a
16636 'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
16637 respectively. The tree for the type of the argument or a field within an
16638 argument is passed in ARG_TYPE, the current register this argument or field
16639 starts in is kept in the pointer REGNO and updated accordingly, the bit this
16640 argument or field starts at is passed in STARTING_BIT and the last used bit
16641 is kept in LAST_USED_BIT which is also updated accordingly. */
16643 static unsigned HOST_WIDE_INT
16644 comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
16645 uint32_t * padding_bits_to_clear,
16646 unsigned starting_bit, int * last_used_bit)
16649 unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0;
16651 if (TREE_CODE (arg_type) == RECORD_TYPE)
16653 unsigned current_bit = starting_bit;
16654 tree field;
16655 long int offset, size;
16658 field = TYPE_FIELDS (arg_type);
16659 while (field)
16661 /* The offset within a structure is always an offset from
16662 the start of that structure. Make sure we take that into the
16663 calculation of the register based offset that we use here. */
16664 offset = starting_bit;
16665 offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0);
16666 offset %= 32;
16668 /* This is the actual size of the field, for bitfields this is the
16669 bitfield width and not the container size. */
16670 size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16672 if (*last_used_bit != offset)
16674 if (offset < *last_used_bit)
16676 /* This field's offset is before the 'last_used_bit', that
16677 means this field goes on the next register. So we need to
16678 pad the rest of the current register and increase the
16679 register number. */
16680 uint32_t mask;
16681 mask = ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit);
16682 mask++;
16684 padding_bits_to_clear[*regno] |= mask;
16685 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16686 (*regno)++;
16688 else
16690 /* Otherwise we pad the bits between the last field's end and
16691 the start of the new field. */
16692 uint32_t mask;
16694 mask = ((uint32_t)-1) >> (32 - offset);
16695 mask -= ((uint32_t) 1 << *last_used_bit) - 1;
16696 padding_bits_to_clear[*regno] |= mask;
16698 current_bit = offset;
16701 /* Calculate further padding bits for inner structs/unions too. */
16702 if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)))
16704 *last_used_bit = current_bit;
16705 not_to_clear_reg_mask
16706 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno,
16707 padding_bits_to_clear, offset,
16708 last_used_bit);
16710 else
16712 /* Update 'current_bit' with this field's size. If the
16713 'current_bit' lies in a subsequent register, update 'regno' and
16714 reset 'current_bit' to point to the current bit in that new
16715 register. */
16716 current_bit += size;
16717 while (current_bit >= 32)
16719 current_bit-=32;
16720 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16721 (*regno)++;
16723 *last_used_bit = current_bit;
16726 field = TREE_CHAIN (field);
16728 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16730 else if (TREE_CODE (arg_type) == UNION_TYPE)
16732 tree field, field_t;
16733 int i, regno_t, field_size;
16734 int max_reg = -1;
16735 int max_bit = -1;
16736 uint32_t mask;
16737 uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
16738 = {-1, -1, -1, -1};
16740 /* To compute the padding bits in a union we only consider bits as
16741 padding bits if they are always either a padding bit or fall outside a
16742 fields size for all fields in the union. */
16743 field = TYPE_FIELDS (arg_type);
16744 while (field)
16746 uint32_t padding_bits_to_clear_t[NUM_ARG_REGS]
16747 = {0U, 0U, 0U, 0U};
16748 int last_used_bit_t = *last_used_bit;
16749 regno_t = *regno;
16750 field_t = TREE_TYPE (field);
16752 /* If the field's type is either a record or a union make sure to
16753 compute their padding bits too. */
16754 if (RECORD_OR_UNION_TYPE_P (field_t))
16755 not_to_clear_reg_mask
16756 |= comp_not_to_clear_mask_str_un (field_t, &regno_t,
16757 &padding_bits_to_clear_t[0],
16758 starting_bit, &last_used_bit_t);
16759 else
16761 field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16762 regno_t = (field_size / 32) + *regno;
16763 last_used_bit_t = (starting_bit + field_size) % 32;
16766 for (i = *regno; i < regno_t; i++)
16768 /* For all but the last register used by this field only keep the
16769 padding bits that were padding bits in this field. */
16770 padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i];
16773 /* For the last register, keep all padding bits that were padding
16774 bits in this field and any padding bits that are still valid
16775 as padding bits but fall outside of this field's size. */
16776 mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1;
16777 padding_bits_to_clear_res[regno_t]
16778 &= padding_bits_to_clear_t[regno_t] | mask;
16780 /* Update the maximum size of the fields in terms of registers used
16781 ('max_reg') and the 'last_used_bit' in said register. */
16782 if (max_reg < regno_t)
16784 max_reg = regno_t;
16785 max_bit = last_used_bit_t;
16787 else if (max_reg == regno_t && max_bit < last_used_bit_t)
16788 max_bit = last_used_bit_t;
16790 field = TREE_CHAIN (field);
16793 /* Update the current padding_bits_to_clear using the intersection of the
16794 padding bits of all the fields. */
16795 for (i=*regno; i < max_reg; i++)
16796 padding_bits_to_clear[i] |= padding_bits_to_clear_res[i];
16798 /* Do not keep trailing padding bits, we do not know yet whether this
16799 is the end of the argument. */
16800 mask = ((uint32_t) 1 << max_bit) - 1;
16801 padding_bits_to_clear[max_reg]
16802 |= padding_bits_to_clear_res[max_reg] & mask;
16804 *regno = max_reg;
16805 *last_used_bit = max_bit;
16807 else
16808 /* This function should only be used for structs and unions. */
16809 gcc_unreachable ();
16811 return not_to_clear_reg_mask;
16814 /* In the context of ARMv8-M Security Extensions, this function is used for both
16815 'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
16816 registers are used when returning or passing arguments, which is then
16817 returned as a mask. It will also compute a mask to indicate padding/unused
16818 bits for each of these registers, and passes this through the
16819 PADDING_BITS_TO_CLEAR pointer. The tree of the argument type is passed in
16820 ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
16821 the starting register used to pass this argument or return value is passed
16822 in REGNO. It makes use of 'comp_not_to_clear_mask_str_un' to compute these
16823 for struct and union types. */
16825 static unsigned HOST_WIDE_INT
16826 compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
16827 uint32_t * padding_bits_to_clear)
16830 int last_used_bit = 0;
16831 unsigned HOST_WIDE_INT not_to_clear_mask;
16833 if (RECORD_OR_UNION_TYPE_P (arg_type))
16835 not_to_clear_mask
16836 = comp_not_to_clear_mask_str_un (arg_type, &regno,
16837 padding_bits_to_clear, 0,
16838 &last_used_bit);
16841 /* If the 'last_used_bit' is not zero, that means we are still using a
16842 part of the last 'regno'. In such cases we must clear the trailing
16843 bits. Otherwise we are not using regno and we should mark it as to
16844 clear. */
16845 if (last_used_bit != 0)
16846 padding_bits_to_clear[regno]
16847 |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit) + 1;
16848 else
16849 not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno);
16851 else
16853 not_to_clear_mask = 0;
16854 /* We are not dealing with structs nor unions. So these arguments may be
16855 passed in floating point registers too. In some cases a BLKmode is
16856 used when returning or passing arguments in multiple VFP registers. */
16857 if (GET_MODE (arg_rtx) == BLKmode)
16859 int i, arg_regs;
16860 rtx reg;
16862 /* This should really only occur when dealing with the hard-float
16863 ABI. */
16864 gcc_assert (TARGET_HARD_FLOAT_ABI);
16866 for (i = 0; i < XVECLEN (arg_rtx, 0); i++)
16868 reg = XEXP (XVECEXP (arg_rtx, 0, i), 0);
16869 gcc_assert (REG_P (reg));
16871 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg);
16873 /* If we are dealing with DF mode, make sure we don't
16874 clear either of the registers it addresses. */
16875 arg_regs = ARM_NUM_REGS (GET_MODE (reg));
16876 if (arg_regs > 1)
16878 unsigned HOST_WIDE_INT mask;
16879 mask = HOST_WIDE_INT_1U << (REGNO (reg) + arg_regs);
16880 mask -= HOST_WIDE_INT_1U << REGNO (reg);
16881 not_to_clear_mask |= mask;
16885 else
16887 /* Otherwise we can rely on the MODE to determine how many registers
16888 are being used by this argument. */
16889 int arg_regs = ARM_NUM_REGS (GET_MODE (arg_rtx));
16890 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (arg_rtx);
16891 if (arg_regs > 1)
16893 unsigned HOST_WIDE_INT
16894 mask = HOST_WIDE_INT_1U << (REGNO (arg_rtx) + arg_regs);
16895 mask -= HOST_WIDE_INT_1U << REGNO (arg_rtx);
16896 not_to_clear_mask |= mask;
16901 return not_to_clear_mask;
16904 /* Saves callee saved registers, clears callee saved registers and caller saved
16905 registers not used to pass arguments before a cmse_nonsecure_call. And
16906 restores the callee saved registers after. */
16908 static void
16909 cmse_nonsecure_call_clear_caller_saved (void)
16911 basic_block bb;
16913 FOR_EACH_BB_FN (bb, cfun)
16915 rtx_insn *insn;
16917 FOR_BB_INSNS (bb, insn)
16919 uint64_t to_clear_mask, float_mask;
16920 rtx_insn *seq;
16921 rtx pat, call, unspec, reg, cleared_reg, tmp;
16922 unsigned int regno, maxregno;
16923 rtx address;
16924 CUMULATIVE_ARGS args_so_far_v;
16925 cumulative_args_t args_so_far;
16926 tree arg_type, fntype;
16927 bool using_r4, first_param = true;
16928 function_args_iterator args_iter;
16929 uint32_t padding_bits_to_clear[4] = {0U, 0U, 0U, 0U};
16930 uint32_t * padding_bits_to_clear_ptr = &padding_bits_to_clear[0];
16932 if (!NONDEBUG_INSN_P (insn))
16933 continue;
16935 if (!CALL_P (insn))
16936 continue;
16938 pat = PATTERN (insn);
16939 gcc_assert (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 0);
16940 call = XVECEXP (pat, 0, 0);
16942 /* Get the real call RTX if the insn sets a value, ie. returns. */
16943 if (GET_CODE (call) == SET)
16944 call = SET_SRC (call);
16946 /* Check if it is a cmse_nonsecure_call. */
16947 unspec = XEXP (call, 0);
16948 if (GET_CODE (unspec) != UNSPEC
16949 || XINT (unspec, 1) != UNSPEC_NONSECURE_MEM)
16950 continue;
16952 /* Determine the caller-saved registers we need to clear. */
16953 to_clear_mask = (1LL << (NUM_ARG_REGS)) - 1;
16954 maxregno = NUM_ARG_REGS - 1;
16955 /* Only look at the caller-saved floating point registers in case of
16956 -mfloat-abi=hard. For -mfloat-abi=softfp we will be using the
16957 lazy store and loads which clear both caller- and callee-saved
16958 registers. */
16959 if (TARGET_HARD_FLOAT_ABI)
16961 float_mask = (1LL << (D7_VFP_REGNUM + 1)) - 1;
16962 float_mask &= ~((1LL << FIRST_VFP_REGNUM) - 1);
16963 to_clear_mask |= float_mask;
16964 maxregno = D7_VFP_REGNUM;
16967 /* Make sure the register used to hold the function address is not
16968 cleared. */
16969 address = RTVEC_ELT (XVEC (unspec, 0), 0);
16970 gcc_assert (MEM_P (address));
16971 gcc_assert (REG_P (XEXP (address, 0)));
16972 to_clear_mask &= ~(1LL << REGNO (XEXP (address, 0)));
16974 /* Set basic block of call insn so that df rescan is performed on
16975 insns inserted here. */
16976 set_block_for_insn (insn, bb);
16977 df_set_flags (DF_DEFER_INSN_RESCAN);
16978 start_sequence ();
16980 /* Make sure the scheduler doesn't schedule other insns beyond
16981 here. */
16982 emit_insn (gen_blockage ());
16984 /* Walk through all arguments and clear registers appropriately.
16986 fntype = TREE_TYPE (MEM_EXPR (address));
16987 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX,
16988 NULL_TREE);
16989 args_so_far = pack_cumulative_args (&args_so_far_v);
16990 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
16992 rtx arg_rtx;
16993 machine_mode arg_mode = TYPE_MODE (arg_type);
16995 if (VOID_TYPE_P (arg_type))
16996 continue;
16998 if (!first_param)
16999 arm_function_arg_advance (args_so_far, arg_mode, arg_type,
17000 true);
17002 arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type,
17003 true);
17004 gcc_assert (REG_P (arg_rtx));
17005 to_clear_mask
17006 &= ~compute_not_to_clear_mask (arg_type, arg_rtx,
17007 REGNO (arg_rtx),
17008 padding_bits_to_clear_ptr);
17010 first_param = false;
17013 /* Clear padding bits where needed. */
17014 cleared_reg = XEXP (address, 0);
17015 reg = gen_rtx_REG (SImode, IP_REGNUM);
17016 using_r4 = false;
17017 for (regno = R0_REGNUM; regno < NUM_ARG_REGS; regno++)
17019 if (padding_bits_to_clear[regno] == 0)
17020 continue;
17022 /* If this is a Thumb-1 target copy the address of the function
17023 we are calling from 'r4' into 'ip' such that we can use r4 to
17024 clear the unused bits in the arguments. */
17025 if (TARGET_THUMB1 && !using_r4)
17027 using_r4 = true;
17028 reg = cleared_reg;
17029 emit_move_insn (gen_rtx_REG (SImode, IP_REGNUM),
17030 reg);
17033 tmp = GEN_INT ((((~padding_bits_to_clear[regno]) << 16u) >> 16u));
17034 emit_move_insn (reg, tmp);
17035 /* Also fill the top half of the negated
17036 padding_bits_to_clear. */
17037 if (((~padding_bits_to_clear[regno]) >> 16) > 0)
17039 tmp = GEN_INT ((~padding_bits_to_clear[regno]) >> 16);
17040 emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode, reg,
17041 GEN_INT (16),
17042 GEN_INT (16)),
17043 tmp));
17046 emit_insn (gen_andsi3 (gen_rtx_REG (SImode, regno),
17047 gen_rtx_REG (SImode, regno),
17048 reg));
17051 if (using_r4)
17052 emit_move_insn (cleared_reg,
17053 gen_rtx_REG (SImode, IP_REGNUM));
17055 /* We use right shift and left shift to clear the LSB of the address
17056 we jump to instead of using bic, to avoid having to use an extra
17057 register on Thumb-1. */
17058 tmp = gen_rtx_LSHIFTRT (SImode, cleared_reg, const1_rtx);
17059 emit_insn (gen_rtx_SET (cleared_reg, tmp));
17060 tmp = gen_rtx_ASHIFT (SImode, cleared_reg, const1_rtx);
17061 emit_insn (gen_rtx_SET (cleared_reg, tmp));
17063 /* Clearing all registers that leak before doing a non-secure
17064 call. */
17065 for (regno = R0_REGNUM; regno <= maxregno; regno++)
17067 if (!(to_clear_mask & (1LL << regno)))
17068 continue;
17070 /* If regno is an even vfp register and its successor is also to
17071 be cleared, use vmov. */
17072 if (IS_VFP_REGNUM (regno))
17074 if (TARGET_VFP_DOUBLE
17075 && VFP_REGNO_OK_FOR_DOUBLE (regno)
17076 && to_clear_mask & (1LL << (regno + 1)))
17077 emit_move_insn (gen_rtx_REG (DFmode, regno++),
17078 CONST0_RTX (DFmode));
17079 else
17080 emit_move_insn (gen_rtx_REG (SFmode, regno),
17081 CONST0_RTX (SFmode));
17083 else
17084 emit_move_insn (gen_rtx_REG (SImode, regno), cleared_reg);
17087 seq = get_insns ();
17088 end_sequence ();
17089 emit_insn_before (seq, insn);
17095 /* Rewrite move insn into subtract of 0 if the condition codes will
17096 be useful in next conditional jump insn. */
17098 static void
17099 thumb1_reorg (void)
17101 basic_block bb;
17103 FOR_EACH_BB_FN (bb, cfun)
17105 rtx dest, src;
17106 rtx cmp, op0, op1, set = NULL;
17107 rtx_insn *prev, *insn = BB_END (bb);
17108 bool insn_clobbered = false;
17110 while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17111 insn = PREV_INSN (insn);
17113 /* Find the last cbranchsi4_insn in basic block BB. */
17114 if (insn == BB_HEAD (bb)
17115 || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17116 continue;
17118 /* Get the register with which we are comparing. */
17119 cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
17120 op0 = XEXP (cmp, 0);
17121 op1 = XEXP (cmp, 1);
17123 /* Check that comparison is against ZERO. */
17124 if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
17125 continue;
17127 /* Find the first flag setting insn before INSN in basic block BB. */
17128 gcc_assert (insn != BB_HEAD (bb));
17129 for (prev = PREV_INSN (insn);
17130 (!insn_clobbered
17131 && prev != BB_HEAD (bb)
17132 && (NOTE_P (prev)
17133 || DEBUG_INSN_P (prev)
17134 || ((set = single_set (prev)) != NULL
17135 && get_attr_conds (prev) == CONDS_NOCOND)));
17136 prev = PREV_INSN (prev))
17138 if (reg_set_p (op0, prev))
17139 insn_clobbered = true;
17142 /* Skip if op0 is clobbered by insn other than prev. */
17143 if (insn_clobbered)
17144 continue;
17146 if (!set)
17147 continue;
17149 dest = SET_DEST (set);
17150 src = SET_SRC (set);
17151 if (!low_register_operand (dest, SImode)
17152 || !low_register_operand (src, SImode))
17153 continue;
17155 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17156 in INSN. Both src and dest of the move insn are checked. */
17157 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17159 dest = copy_rtx (dest);
17160 src = copy_rtx (src);
17161 src = gen_rtx_MINUS (SImode, src, const0_rtx);
17162 PATTERN (prev) = gen_rtx_SET (dest, src);
17163 INSN_CODE (prev) = -1;
17164 /* Set test register in INSN to dest. */
17165 XEXP (cmp, 0) = copy_rtx (dest);
17166 INSN_CODE (insn) = -1;
17171 /* Convert instructions to their cc-clobbering variant if possible, since
17172 that allows us to use smaller encodings. */
17174 static void
17175 thumb2_reorg (void)
17177 basic_block bb;
17178 regset_head live;
17180 INIT_REG_SET (&live);
17182 /* We are freeing block_for_insn in the toplev to keep compatibility
17183 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17184 compute_bb_for_insn ();
17185 df_analyze ();
17187 enum Convert_Action {SKIP, CONV, SWAP_CONV};
17189 FOR_EACH_BB_FN (bb, cfun)
17191 if ((current_tune->disparage_flag_setting_t16_encodings
17192 == tune_params::DISPARAGE_FLAGS_ALL)
17193 && optimize_bb_for_speed_p (bb))
17194 continue;
17196 rtx_insn *insn;
17197 Convert_Action action = SKIP;
17198 Convert_Action action_for_partial_flag_setting
17199 = ((current_tune->disparage_flag_setting_t16_encodings
17200 != tune_params::DISPARAGE_FLAGS_NEITHER)
17201 && optimize_bb_for_speed_p (bb))
17202 ? SKIP : CONV;
17204 COPY_REG_SET (&live, DF_LR_OUT (bb));
17205 df_simulate_initialize_backwards (bb, &live);
17206 FOR_BB_INSNS_REVERSE (bb, insn)
17208 if (NONJUMP_INSN_P (insn)
17209 && !REGNO_REG_SET_P (&live, CC_REGNUM)
17210 && GET_CODE (PATTERN (insn)) == SET)
17212 action = SKIP;
17213 rtx pat = PATTERN (insn);
17214 rtx dst = XEXP (pat, 0);
17215 rtx src = XEXP (pat, 1);
17216 rtx op0 = NULL_RTX, op1 = NULL_RTX;
17218 if (UNARY_P (src) || BINARY_P (src))
17219 op0 = XEXP (src, 0);
17221 if (BINARY_P (src))
17222 op1 = XEXP (src, 1);
17224 if (low_register_operand (dst, SImode))
17226 switch (GET_CODE (src))
17228 case PLUS:
17229 /* Adding two registers and storing the result
17230 in the first source is already a 16-bit
17231 operation. */
17232 if (rtx_equal_p (dst, op0)
17233 && register_operand (op1, SImode))
17234 break;
17236 if (low_register_operand (op0, SImode))
17238 /* ADDS <Rd>,<Rn>,<Rm> */
17239 if (low_register_operand (op1, SImode))
17240 action = CONV;
17241 /* ADDS <Rdn>,#<imm8> */
17242 /* SUBS <Rdn>,#<imm8> */
17243 else if (rtx_equal_p (dst, op0)
17244 && CONST_INT_P (op1)
17245 && IN_RANGE (INTVAL (op1), -255, 255))
17246 action = CONV;
17247 /* ADDS <Rd>,<Rn>,#<imm3> */
17248 /* SUBS <Rd>,<Rn>,#<imm3> */
17249 else if (CONST_INT_P (op1)
17250 && IN_RANGE (INTVAL (op1), -7, 7))
17251 action = CONV;
17253 /* ADCS <Rd>, <Rn> */
17254 else if (GET_CODE (XEXP (src, 0)) == PLUS
17255 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17256 && low_register_operand (XEXP (XEXP (src, 0), 1),
17257 SImode)
17258 && COMPARISON_P (op1)
17259 && cc_register (XEXP (op1, 0), VOIDmode)
17260 && maybe_get_arm_condition_code (op1) == ARM_CS
17261 && XEXP (op1, 1) == const0_rtx)
17262 action = CONV;
17263 break;
17265 case MINUS:
17266 /* RSBS <Rd>,<Rn>,#0
17267 Not handled here: see NEG below. */
17268 /* SUBS <Rd>,<Rn>,#<imm3>
17269 SUBS <Rdn>,#<imm8>
17270 Not handled here: see PLUS above. */
17271 /* SUBS <Rd>,<Rn>,<Rm> */
17272 if (low_register_operand (op0, SImode)
17273 && low_register_operand (op1, SImode))
17274 action = CONV;
17275 break;
17277 case MULT:
17278 /* MULS <Rdm>,<Rn>,<Rdm>
17279 As an exception to the rule, this is only used
17280 when optimizing for size since MULS is slow on all
17281 known implementations. We do not even want to use
17282 MULS in cold code, if optimizing for speed, so we
17283 test the global flag here. */
17284 if (!optimize_size)
17285 break;
17286 /* Fall through. */
17287 case AND:
17288 case IOR:
17289 case XOR:
17290 /* ANDS <Rdn>,<Rm> */
17291 if (rtx_equal_p (dst, op0)
17292 && low_register_operand (op1, SImode))
17293 action = action_for_partial_flag_setting;
17294 else if (rtx_equal_p (dst, op1)
17295 && low_register_operand (op0, SImode))
17296 action = action_for_partial_flag_setting == SKIP
17297 ? SKIP : SWAP_CONV;
17298 break;
17300 case ASHIFTRT:
17301 case ASHIFT:
17302 case LSHIFTRT:
17303 /* ASRS <Rdn>,<Rm> */
17304 /* LSRS <Rdn>,<Rm> */
17305 /* LSLS <Rdn>,<Rm> */
17306 if (rtx_equal_p (dst, op0)
17307 && low_register_operand (op1, SImode))
17308 action = action_for_partial_flag_setting;
17309 /* ASRS <Rd>,<Rm>,#<imm5> */
17310 /* LSRS <Rd>,<Rm>,#<imm5> */
17311 /* LSLS <Rd>,<Rm>,#<imm5> */
17312 else if (low_register_operand (op0, SImode)
17313 && CONST_INT_P (op1)
17314 && IN_RANGE (INTVAL (op1), 0, 31))
17315 action = action_for_partial_flag_setting;
17316 break;
17318 case ROTATERT:
17319 /* RORS <Rdn>,<Rm> */
17320 if (rtx_equal_p (dst, op0)
17321 && low_register_operand (op1, SImode))
17322 action = action_for_partial_flag_setting;
17323 break;
17325 case NOT:
17326 /* MVNS <Rd>,<Rm> */
17327 if (low_register_operand (op0, SImode))
17328 action = action_for_partial_flag_setting;
17329 break;
17331 case NEG:
17332 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17333 if (low_register_operand (op0, SImode))
17334 action = CONV;
17335 break;
17337 case CONST_INT:
17338 /* MOVS <Rd>,#<imm8> */
17339 if (CONST_INT_P (src)
17340 && IN_RANGE (INTVAL (src), 0, 255))
17341 action = action_for_partial_flag_setting;
17342 break;
17344 case REG:
17345 /* MOVS and MOV<c> with registers have different
17346 encodings, so are not relevant here. */
17347 break;
17349 default:
17350 break;
17354 if (action != SKIP)
17356 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17357 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17358 rtvec vec;
17360 if (action == SWAP_CONV)
17362 src = copy_rtx (src);
17363 XEXP (src, 0) = op1;
17364 XEXP (src, 1) = op0;
17365 pat = gen_rtx_SET (dst, src);
17366 vec = gen_rtvec (2, pat, clobber);
17368 else /* action == CONV */
17369 vec = gen_rtvec (2, pat, clobber);
17371 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17372 INSN_CODE (insn) = -1;
17376 if (NONDEBUG_INSN_P (insn))
17377 df_simulate_one_insn_backwards (bb, insn, &live);
17381 CLEAR_REG_SET (&live);
17384 /* Gcc puts the pool in the wrong place for ARM, since we can only
17385 load addresses a limited distance around the pc. We do some
17386 special munging to move the constant pool values to the correct
17387 point in the code. */
17388 static void
17389 arm_reorg (void)
17391 rtx_insn *insn;
17392 HOST_WIDE_INT address = 0;
17393 Mfix * fix;
17395 if (use_cmse)
17396 cmse_nonsecure_call_clear_caller_saved ();
17397 if (TARGET_THUMB1)
17398 thumb1_reorg ();
17399 else if (TARGET_THUMB2)
17400 thumb2_reorg ();
17402 /* Ensure all insns that must be split have been split at this point.
17403 Otherwise, the pool placement code below may compute incorrect
17404 insn lengths. Note that when optimizing, all insns have already
17405 been split at this point. */
17406 if (!optimize)
17407 split_all_insns_noflow ();
17409 /* Make sure we do not attempt to create a literal pool even though it should
17410 no longer be necessary to create any. */
17411 if (arm_disable_literal_pool)
17412 return ;
17414 minipool_fix_head = minipool_fix_tail = NULL;
17416 /* The first insn must always be a note, or the code below won't
17417 scan it properly. */
17418 insn = get_insns ();
17419 gcc_assert (NOTE_P (insn));
17420 minipool_pad = 0;
17422 /* Scan all the insns and record the operands that will need fixing. */
17423 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17425 if (BARRIER_P (insn))
17426 push_minipool_barrier (insn, address);
17427 else if (INSN_P (insn))
17429 rtx_jump_table_data *table;
17431 note_invalid_constants (insn, address, true);
17432 address += get_attr_length (insn);
17434 /* If the insn is a vector jump, add the size of the table
17435 and skip the table. */
17436 if (tablejump_p (insn, NULL, &table))
17438 address += get_jump_table_size (table);
17439 insn = table;
17442 else if (LABEL_P (insn))
17443 /* Add the worst-case padding due to alignment. We don't add
17444 the _current_ padding because the minipool insertions
17445 themselves might change it. */
17446 address += get_label_padding (insn);
17449 fix = minipool_fix_head;
17451 /* Now scan the fixups and perform the required changes. */
17452 while (fix)
17454 Mfix * ftmp;
17455 Mfix * fdel;
17456 Mfix * last_added_fix;
17457 Mfix * last_barrier = NULL;
17458 Mfix * this_fix;
17460 /* Skip any further barriers before the next fix. */
17461 while (fix && BARRIER_P (fix->insn))
17462 fix = fix->next;
17464 /* No more fixes. */
17465 if (fix == NULL)
17466 break;
17468 last_added_fix = NULL;
17470 for (ftmp = fix; ftmp; ftmp = ftmp->next)
17472 if (BARRIER_P (ftmp->insn))
17474 if (ftmp->address >= minipool_vector_head->max_address)
17475 break;
17477 last_barrier = ftmp;
17479 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17480 break;
17482 last_added_fix = ftmp; /* Keep track of the last fix added. */
17485 /* If we found a barrier, drop back to that; any fixes that we
17486 could have reached but come after the barrier will now go in
17487 the next mini-pool. */
17488 if (last_barrier != NULL)
17490 /* Reduce the refcount for those fixes that won't go into this
17491 pool after all. */
17492 for (fdel = last_barrier->next;
17493 fdel && fdel != ftmp;
17494 fdel = fdel->next)
17496 fdel->minipool->refcount--;
17497 fdel->minipool = NULL;
17500 ftmp = last_barrier;
17502 else
17504 /* ftmp is first fix that we can't fit into this pool and
17505 there no natural barriers that we could use. Insert a
17506 new barrier in the code somewhere between the previous
17507 fix and this one, and arrange to jump around it. */
17508 HOST_WIDE_INT max_address;
17510 /* The last item on the list of fixes must be a barrier, so
17511 we can never run off the end of the list of fixes without
17512 last_barrier being set. */
17513 gcc_assert (ftmp);
17515 max_address = minipool_vector_head->max_address;
17516 /* Check that there isn't another fix that is in range that
17517 we couldn't fit into this pool because the pool was
17518 already too large: we need to put the pool before such an
17519 instruction. The pool itself may come just after the
17520 fix because create_fix_barrier also allows space for a
17521 jump instruction. */
17522 if (ftmp->address < max_address)
17523 max_address = ftmp->address + 1;
17525 last_barrier = create_fix_barrier (last_added_fix, max_address);
17528 assign_minipool_offsets (last_barrier);
17530 while (ftmp)
17532 if (!BARRIER_P (ftmp->insn)
17533 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17534 == NULL))
17535 break;
17537 ftmp = ftmp->next;
17540 /* Scan over the fixes we have identified for this pool, fixing them
17541 up and adding the constants to the pool itself. */
17542 for (this_fix = fix; this_fix && ftmp != this_fix;
17543 this_fix = this_fix->next)
17544 if (!BARRIER_P (this_fix->insn))
17546 rtx addr
17547 = plus_constant (Pmode,
17548 gen_rtx_LABEL_REF (VOIDmode,
17549 minipool_vector_label),
17550 this_fix->minipool->offset);
17551 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17554 dump_minipool (last_barrier->insn);
17555 fix = ftmp;
17558 /* From now on we must synthesize any constants that we can't handle
17559 directly. This can happen if the RTL gets split during final
17560 instruction generation. */
17561 cfun->machine->after_arm_reorg = 1;
17563 /* Free the minipool memory. */
17564 obstack_free (&minipool_obstack, minipool_startobj);
17567 /* Routines to output assembly language. */
17569 /* Return string representation of passed in real value. */
17570 static const char *
17571 fp_const_from_val (REAL_VALUE_TYPE *r)
17573 if (!fp_consts_inited)
17574 init_fp_table ();
17576 gcc_assert (real_equal (r, &value_fp0));
17577 return "0";
17580 /* OPERANDS[0] is the entire list of insns that constitute pop,
17581 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17582 is in the list, UPDATE is true iff the list contains explicit
17583 update of base register. */
17584 void
17585 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17586 bool update)
17588 int i;
17589 char pattern[100];
17590 int offset;
17591 const char *conditional;
17592 int num_saves = XVECLEN (operands[0], 0);
17593 unsigned int regno;
17594 unsigned int regno_base = REGNO (operands[1]);
17595 bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
17597 offset = 0;
17598 offset += update ? 1 : 0;
17599 offset += return_pc ? 1 : 0;
17601 /* Is the base register in the list? */
17602 for (i = offset; i < num_saves; i++)
17604 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17605 /* If SP is in the list, then the base register must be SP. */
17606 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17607 /* If base register is in the list, there must be no explicit update. */
17608 if (regno == regno_base)
17609 gcc_assert (!update);
17612 conditional = reverse ? "%?%D0" : "%?%d0";
17613 /* Can't use POP if returning from an interrupt. */
17614 if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
17615 sprintf (pattern, "pop%s\t{", conditional);
17616 else
17618 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17619 It's just a convention, their semantics are identical. */
17620 if (regno_base == SP_REGNUM)
17621 sprintf (pattern, "ldmfd%s\t", conditional);
17622 else if (update)
17623 sprintf (pattern, "ldmia%s\t", conditional);
17624 else
17625 sprintf (pattern, "ldm%s\t", conditional);
17627 strcat (pattern, reg_names[regno_base]);
17628 if (update)
17629 strcat (pattern, "!, {");
17630 else
17631 strcat (pattern, ", {");
17634 /* Output the first destination register. */
17635 strcat (pattern,
17636 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17638 /* Output the rest of the destination registers. */
17639 for (i = offset + 1; i < num_saves; i++)
17641 strcat (pattern, ", ");
17642 strcat (pattern,
17643 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17646 strcat (pattern, "}");
17648 if (interrupt_p && return_pc)
17649 strcat (pattern, "^");
17651 output_asm_insn (pattern, &cond);
17655 /* Output the assembly for a store multiple. */
17657 const char *
17658 vfp_output_vstmd (rtx * operands)
17660 char pattern[100];
17661 int p;
17662 int base;
17663 int i;
17664 rtx addr_reg = REG_P (XEXP (operands[0], 0))
17665 ? XEXP (operands[0], 0)
17666 : XEXP (XEXP (operands[0], 0), 0);
17667 bool push_p = REGNO (addr_reg) == SP_REGNUM;
17669 if (push_p)
17670 strcpy (pattern, "vpush%?.64\t{%P1");
17671 else
17672 strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
17674 p = strlen (pattern);
17676 gcc_assert (REG_P (operands[1]));
17678 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17679 for (i = 1; i < XVECLEN (operands[2], 0); i++)
17681 p += sprintf (&pattern[p], ", d%d", base + i);
17683 strcpy (&pattern[p], "}");
17685 output_asm_insn (pattern, operands);
17686 return "";
17690 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17691 number of bytes pushed. */
17693 static int
17694 vfp_emit_fstmd (int base_reg, int count)
17696 rtx par;
17697 rtx dwarf;
17698 rtx tmp, reg;
17699 int i;
17701 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17702 register pairs are stored by a store multiple insn. We avoid this
17703 by pushing an extra pair. */
17704 if (count == 2 && !arm_arch6)
17706 if (base_reg == LAST_VFP_REGNUM - 3)
17707 base_reg -= 2;
17708 count++;
17711 /* FSTMD may not store more than 16 doubleword registers at once. Split
17712 larger stores into multiple parts (up to a maximum of two, in
17713 practice). */
17714 if (count > 16)
17716 int saved;
17717 /* NOTE: base_reg is an internal register number, so each D register
17718 counts as 2. */
17719 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17720 saved += vfp_emit_fstmd (base_reg, 16);
17721 return saved;
17724 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17725 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17727 reg = gen_rtx_REG (DFmode, base_reg);
17728 base_reg += 2;
17730 XVECEXP (par, 0, 0)
17731 = gen_rtx_SET (gen_frame_mem
17732 (BLKmode,
17733 gen_rtx_PRE_MODIFY (Pmode,
17734 stack_pointer_rtx,
17735 plus_constant
17736 (Pmode, stack_pointer_rtx,
17737 - (count * 8)))
17739 gen_rtx_UNSPEC (BLKmode,
17740 gen_rtvec (1, reg),
17741 UNSPEC_PUSH_MULT));
17743 tmp = gen_rtx_SET (stack_pointer_rtx,
17744 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17745 RTX_FRAME_RELATED_P (tmp) = 1;
17746 XVECEXP (dwarf, 0, 0) = tmp;
17748 tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
17749 RTX_FRAME_RELATED_P (tmp) = 1;
17750 XVECEXP (dwarf, 0, 1) = tmp;
17752 for (i = 1; i < count; i++)
17754 reg = gen_rtx_REG (DFmode, base_reg);
17755 base_reg += 2;
17756 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
17758 tmp = gen_rtx_SET (gen_frame_mem (DFmode,
17759 plus_constant (Pmode,
17760 stack_pointer_rtx,
17761 i * 8)),
17762 reg);
17763 RTX_FRAME_RELATED_P (tmp) = 1;
17764 XVECEXP (dwarf, 0, i + 1) = tmp;
17767 par = emit_insn (par);
17768 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17769 RTX_FRAME_RELATED_P (par) = 1;
17771 return count * 8;
17774 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
17775 has the cmse_nonsecure_call attribute and returns false otherwise. */
17777 bool
17778 detect_cmse_nonsecure_call (tree addr)
17780 if (!addr)
17781 return FALSE;
17783 tree fntype = TREE_TYPE (addr);
17784 if (use_cmse && lookup_attribute ("cmse_nonsecure_call",
17785 TYPE_ATTRIBUTES (fntype)))
17786 return TRUE;
17787 return FALSE;
17791 /* Emit a call instruction with pattern PAT. ADDR is the address of
17792 the call target. */
17794 void
17795 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
17797 rtx insn;
17799 insn = emit_call_insn (pat);
17801 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17802 If the call might use such an entry, add a use of the PIC register
17803 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17804 if (TARGET_VXWORKS_RTP
17805 && flag_pic
17806 && !sibcall
17807 && GET_CODE (addr) == SYMBOL_REF
17808 && (SYMBOL_REF_DECL (addr)
17809 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
17810 : !SYMBOL_REF_LOCAL_P (addr)))
17812 require_pic_register ();
17813 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
17816 if (TARGET_AAPCS_BASED)
17818 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17819 linker. We need to add an IP clobber to allow setting
17820 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
17821 is not needed since it's a fixed register. */
17822 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
17823 clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
17827 /* Output a 'call' insn. */
17828 const char *
17829 output_call (rtx *operands)
17831 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
17833 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17834 if (REGNO (operands[0]) == LR_REGNUM)
17836 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
17837 output_asm_insn ("mov%?\t%0, %|lr", operands);
17840 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17842 if (TARGET_INTERWORK || arm_arch4t)
17843 output_asm_insn ("bx%?\t%0", operands);
17844 else
17845 output_asm_insn ("mov%?\t%|pc, %0", operands);
17847 return "";
17850 /* Output a move from arm registers to arm registers of a long double
17851 OPERANDS[0] is the destination.
17852 OPERANDS[1] is the source. */
17853 const char *
17854 output_mov_long_double_arm_from_arm (rtx *operands)
17856 /* We have to be careful here because the two might overlap. */
17857 int dest_start = REGNO (operands[0]);
17858 int src_start = REGNO (operands[1]);
17859 rtx ops[2];
17860 int i;
17862 if (dest_start < src_start)
17864 for (i = 0; i < 3; i++)
17866 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17867 ops[1] = gen_rtx_REG (SImode, src_start + i);
17868 output_asm_insn ("mov%?\t%0, %1", ops);
17871 else
17873 for (i = 2; i >= 0; i--)
17875 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17876 ops[1] = gen_rtx_REG (SImode, src_start + i);
17877 output_asm_insn ("mov%?\t%0, %1", ops);
17881 return "";
17884 void
17885 arm_emit_movpair (rtx dest, rtx src)
17887 /* If the src is an immediate, simplify it. */
17888 if (CONST_INT_P (src))
17890 HOST_WIDE_INT val = INTVAL (src);
17891 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
17892 if ((val >> 16) & 0x0000ffff)
17894 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
17895 GEN_INT (16)),
17896 GEN_INT ((val >> 16) & 0x0000ffff));
17897 rtx_insn *insn = get_last_insn ();
17898 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
17900 return;
17902 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
17903 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
17904 rtx_insn *insn = get_last_insn ();
17905 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
17908 /* Output a move between double words. It must be REG<-MEM
17909 or MEM<-REG. */
17910 const char *
17911 output_move_double (rtx *operands, bool emit, int *count)
17913 enum rtx_code code0 = GET_CODE (operands[0]);
17914 enum rtx_code code1 = GET_CODE (operands[1]);
17915 rtx otherops[3];
17916 if (count)
17917 *count = 1;
17919 /* The only case when this might happen is when
17920 you are looking at the length of a DImode instruction
17921 that has an invalid constant in it. */
17922 if (code0 == REG && code1 != MEM)
17924 gcc_assert (!emit);
17925 *count = 2;
17926 return "";
17929 if (code0 == REG)
17931 unsigned int reg0 = REGNO (operands[0]);
17933 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
17935 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
17937 switch (GET_CODE (XEXP (operands[1], 0)))
17939 case REG:
17941 if (emit)
17943 if (TARGET_LDRD
17944 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
17945 output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
17946 else
17947 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
17949 break;
17951 case PRE_INC:
17952 gcc_assert (TARGET_LDRD);
17953 if (emit)
17954 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
17955 break;
17957 case PRE_DEC:
17958 if (emit)
17960 if (TARGET_LDRD)
17961 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
17962 else
17963 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
17965 break;
17967 case POST_INC:
17968 if (emit)
17970 if (TARGET_LDRD)
17971 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
17972 else
17973 output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
17975 break;
17977 case POST_DEC:
17978 gcc_assert (TARGET_LDRD);
17979 if (emit)
17980 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
17981 break;
17983 case PRE_MODIFY:
17984 case POST_MODIFY:
17985 /* Autoicrement addressing modes should never have overlapping
17986 base and destination registers, and overlapping index registers
17987 are already prohibited, so this doesn't need to worry about
17988 fix_cm3_ldrd. */
17989 otherops[0] = operands[0];
17990 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
17991 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
17993 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
17995 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
17997 /* Registers overlap so split out the increment. */
17998 if (emit)
18000 output_asm_insn ("add%?\t%1, %1, %2", otherops);
18001 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
18003 if (count)
18004 *count = 2;
18006 else
18008 /* Use a single insn if we can.
18009 FIXME: IWMMXT allows offsets larger than ldrd can
18010 handle, fix these up with a pair of ldr. */
18011 if (TARGET_THUMB2
18012 || !CONST_INT_P (otherops[2])
18013 || (INTVAL (otherops[2]) > -256
18014 && INTVAL (otherops[2]) < 256))
18016 if (emit)
18017 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
18019 else
18021 if (emit)
18023 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
18024 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18026 if (count)
18027 *count = 2;
18032 else
18034 /* Use a single insn if we can.
18035 FIXME: IWMMXT allows offsets larger than ldrd can handle,
18036 fix these up with a pair of ldr. */
18037 if (TARGET_THUMB2
18038 || !CONST_INT_P (otherops[2])
18039 || (INTVAL (otherops[2]) > -256
18040 && INTVAL (otherops[2]) < 256))
18042 if (emit)
18043 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
18045 else
18047 if (emit)
18049 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18050 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
18052 if (count)
18053 *count = 2;
18056 break;
18058 case LABEL_REF:
18059 case CONST:
18060 /* We might be able to use ldrd %0, %1 here. However the range is
18061 different to ldr/adr, and it is broken on some ARMv7-M
18062 implementations. */
18063 /* Use the second register of the pair to avoid problematic
18064 overlap. */
18065 otherops[1] = operands[1];
18066 if (emit)
18067 output_asm_insn ("adr%?\t%0, %1", otherops);
18068 operands[1] = otherops[0];
18069 if (emit)
18071 if (TARGET_LDRD)
18072 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18073 else
18074 output_asm_insn ("ldmia%?\t%1, %M0", operands);
18077 if (count)
18078 *count = 2;
18079 break;
18081 /* ??? This needs checking for thumb2. */
18082 default:
18083 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18084 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18086 otherops[0] = operands[0];
18087 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18088 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18090 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18092 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18094 switch ((int) INTVAL (otherops[2]))
18096 case -8:
18097 if (emit)
18098 output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
18099 return "";
18100 case -4:
18101 if (TARGET_THUMB2)
18102 break;
18103 if (emit)
18104 output_asm_insn ("ldmda%?\t%1, %M0", otherops);
18105 return "";
18106 case 4:
18107 if (TARGET_THUMB2)
18108 break;
18109 if (emit)
18110 output_asm_insn ("ldmib%?\t%1, %M0", otherops);
18111 return "";
18114 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18115 operands[1] = otherops[0];
18116 if (TARGET_LDRD
18117 && (REG_P (otherops[2])
18118 || TARGET_THUMB2
18119 || (CONST_INT_P (otherops[2])
18120 && INTVAL (otherops[2]) > -256
18121 && INTVAL (otherops[2]) < 256)))
18123 if (reg_overlap_mentioned_p (operands[0],
18124 otherops[2]))
18126 /* Swap base and index registers over to
18127 avoid a conflict. */
18128 std::swap (otherops[1], otherops[2]);
18130 /* If both registers conflict, it will usually
18131 have been fixed by a splitter. */
18132 if (reg_overlap_mentioned_p (operands[0], otherops[2])
18133 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18135 if (emit)
18137 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18138 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18140 if (count)
18141 *count = 2;
18143 else
18145 otherops[0] = operands[0];
18146 if (emit)
18147 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
18149 return "";
18152 if (CONST_INT_P (otherops[2]))
18154 if (emit)
18156 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18157 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18158 else
18159 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18162 else
18164 if (emit)
18165 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18168 else
18170 if (emit)
18171 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18174 if (count)
18175 *count = 2;
18177 if (TARGET_LDRD)
18178 return "ldrd%?\t%0, [%1]";
18180 return "ldmia%?\t%1, %M0";
18182 else
18184 otherops[1] = adjust_address (operands[1], SImode, 4);
18185 /* Take care of overlapping base/data reg. */
18186 if (reg_mentioned_p (operands[0], operands[1]))
18188 if (emit)
18190 output_asm_insn ("ldr%?\t%0, %1", otherops);
18191 output_asm_insn ("ldr%?\t%0, %1", operands);
18193 if (count)
18194 *count = 2;
18197 else
18199 if (emit)
18201 output_asm_insn ("ldr%?\t%0, %1", operands);
18202 output_asm_insn ("ldr%?\t%0, %1", otherops);
18204 if (count)
18205 *count = 2;
18210 else
18212 /* Constraints should ensure this. */
18213 gcc_assert (code0 == MEM && code1 == REG);
18214 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18215 || (TARGET_ARM && TARGET_LDRD));
18217 switch (GET_CODE (XEXP (operands[0], 0)))
18219 case REG:
18220 if (emit)
18222 if (TARGET_LDRD)
18223 output_asm_insn ("strd%?\t%1, [%m0]", operands);
18224 else
18225 output_asm_insn ("stm%?\t%m0, %M1", operands);
18227 break;
18229 case PRE_INC:
18230 gcc_assert (TARGET_LDRD);
18231 if (emit)
18232 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
18233 break;
18235 case PRE_DEC:
18236 if (emit)
18238 if (TARGET_LDRD)
18239 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
18240 else
18241 output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
18243 break;
18245 case POST_INC:
18246 if (emit)
18248 if (TARGET_LDRD)
18249 output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
18250 else
18251 output_asm_insn ("stm%?\t%m0!, %M1", operands);
18253 break;
18255 case POST_DEC:
18256 gcc_assert (TARGET_LDRD);
18257 if (emit)
18258 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
18259 break;
18261 case PRE_MODIFY:
18262 case POST_MODIFY:
18263 otherops[0] = operands[1];
18264 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18265 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18267 /* IWMMXT allows offsets larger than ldrd can handle,
18268 fix these up with a pair of ldr. */
18269 if (!TARGET_THUMB2
18270 && CONST_INT_P (otherops[2])
18271 && (INTVAL(otherops[2]) <= -256
18272 || INTVAL(otherops[2]) >= 256))
18274 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18276 if (emit)
18278 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18279 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18281 if (count)
18282 *count = 2;
18284 else
18286 if (emit)
18288 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18289 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18291 if (count)
18292 *count = 2;
18295 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18297 if (emit)
18298 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
18300 else
18302 if (emit)
18303 output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
18305 break;
18307 case PLUS:
18308 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18309 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18311 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18313 case -8:
18314 if (emit)
18315 output_asm_insn ("stmdb%?\t%m0, %M1", operands);
18316 return "";
18318 case -4:
18319 if (TARGET_THUMB2)
18320 break;
18321 if (emit)
18322 output_asm_insn ("stmda%?\t%m0, %M1", operands);
18323 return "";
18325 case 4:
18326 if (TARGET_THUMB2)
18327 break;
18328 if (emit)
18329 output_asm_insn ("stmib%?\t%m0, %M1", operands);
18330 return "";
18333 if (TARGET_LDRD
18334 && (REG_P (otherops[2])
18335 || TARGET_THUMB2
18336 || (CONST_INT_P (otherops[2])
18337 && INTVAL (otherops[2]) > -256
18338 && INTVAL (otherops[2]) < 256)))
18340 otherops[0] = operands[1];
18341 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18342 if (emit)
18343 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
18344 return "";
18346 /* Fall through */
18348 default:
18349 otherops[0] = adjust_address (operands[0], SImode, 4);
18350 otherops[1] = operands[1];
18351 if (emit)
18353 output_asm_insn ("str%?\t%1, %0", operands);
18354 output_asm_insn ("str%?\t%H1, %0", otherops);
18356 if (count)
18357 *count = 2;
18361 return "";
18364 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18365 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18367 const char *
18368 output_move_quad (rtx *operands)
18370 if (REG_P (operands[0]))
18372 /* Load, or reg->reg move. */
18374 if (MEM_P (operands[1]))
18376 switch (GET_CODE (XEXP (operands[1], 0)))
18378 case REG:
18379 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18380 break;
18382 case LABEL_REF:
18383 case CONST:
18384 output_asm_insn ("adr%?\t%0, %1", operands);
18385 output_asm_insn ("ldmia%?\t%0, %M0", operands);
18386 break;
18388 default:
18389 gcc_unreachable ();
18392 else
18394 rtx ops[2];
18395 int dest, src, i;
18397 gcc_assert (REG_P (operands[1]));
18399 dest = REGNO (operands[0]);
18400 src = REGNO (operands[1]);
18402 /* This seems pretty dumb, but hopefully GCC won't try to do it
18403 very often. */
18404 if (dest < src)
18405 for (i = 0; i < 4; i++)
18407 ops[0] = gen_rtx_REG (SImode, dest + i);
18408 ops[1] = gen_rtx_REG (SImode, src + i);
18409 output_asm_insn ("mov%?\t%0, %1", ops);
18411 else
18412 for (i = 3; i >= 0; i--)
18414 ops[0] = gen_rtx_REG (SImode, dest + i);
18415 ops[1] = gen_rtx_REG (SImode, src + i);
18416 output_asm_insn ("mov%?\t%0, %1", ops);
18420 else
18422 gcc_assert (MEM_P (operands[0]));
18423 gcc_assert (REG_P (operands[1]));
18424 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18426 switch (GET_CODE (XEXP (operands[0], 0)))
18428 case REG:
18429 output_asm_insn ("stm%?\t%m0, %M1", operands);
18430 break;
18432 default:
18433 gcc_unreachable ();
18437 return "";
18440 /* Output a VFP load or store instruction. */
18442 const char *
18443 output_move_vfp (rtx *operands)
18445 rtx reg, mem, addr, ops[2];
18446 int load = REG_P (operands[0]);
18447 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18448 int sp = (!TARGET_VFP_FP16INST
18449 || GET_MODE_SIZE (GET_MODE (operands[0])) == 4);
18450 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18451 const char *templ;
18452 char buff[50];
18453 machine_mode mode;
18455 reg = operands[!load];
18456 mem = operands[load];
18458 mode = GET_MODE (reg);
18460 gcc_assert (REG_P (reg));
18461 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18462 gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT)
18463 || mode == SFmode
18464 || mode == DFmode
18465 || mode == HImode
18466 || mode == SImode
18467 || mode == DImode
18468 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18469 gcc_assert (MEM_P (mem));
18471 addr = XEXP (mem, 0);
18473 switch (GET_CODE (addr))
18475 case PRE_DEC:
18476 templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18477 ops[0] = XEXP (addr, 0);
18478 ops[1] = reg;
18479 break;
18481 case POST_INC:
18482 templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18483 ops[0] = XEXP (addr, 0);
18484 ops[1] = reg;
18485 break;
18487 default:
18488 templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18489 ops[0] = reg;
18490 ops[1] = mem;
18491 break;
18494 sprintf (buff, templ,
18495 load ? "ld" : "st",
18496 dp ? "64" : sp ? "32" : "16",
18497 dp ? "P" : "",
18498 integer_p ? "\t%@ int" : "");
18499 output_asm_insn (buff, ops);
18501 return "";
18504 /* Output a Neon double-word or quad-word load or store, or a load
18505 or store for larger structure modes.
18507 WARNING: The ordering of elements is weird in big-endian mode,
18508 because the EABI requires that vectors stored in memory appear
18509 as though they were stored by a VSTM, as required by the EABI.
18510 GCC RTL defines element ordering based on in-memory order.
18511 This can be different from the architectural ordering of elements
18512 within a NEON register. The intrinsics defined in arm_neon.h use the
18513 NEON register element ordering, not the GCC RTL element ordering.
18515 For example, the in-memory ordering of a big-endian a quadword
18516 vector with 16-bit elements when stored from register pair {d0,d1}
18517 will be (lowest address first, d0[N] is NEON register element N):
18519 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18521 When necessary, quadword registers (dN, dN+1) are moved to ARM
18522 registers from rN in the order:
18524 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18526 So that STM/LDM can be used on vectors in ARM registers, and the
18527 same memory layout will result as if VSTM/VLDM were used.
18529 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18530 possible, which allows use of appropriate alignment tags.
18531 Note that the choice of "64" is independent of the actual vector
18532 element size; this size simply ensures that the behavior is
18533 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18535 Due to limitations of those instructions, use of VST1.64/VLD1.64
18536 is not possible if:
18537 - the address contains PRE_DEC, or
18538 - the mode refers to more than 4 double-word registers
18540 In those cases, it would be possible to replace VSTM/VLDM by a
18541 sequence of instructions; this is not currently implemented since
18542 this is not certain to actually improve performance. */
18544 const char *
18545 output_move_neon (rtx *operands)
18547 rtx reg, mem, addr, ops[2];
18548 int regno, nregs, load = REG_P (operands[0]);
18549 const char *templ;
18550 char buff[50];
18551 machine_mode mode;
18553 reg = operands[!load];
18554 mem = operands[load];
18556 mode = GET_MODE (reg);
18558 gcc_assert (REG_P (reg));
18559 regno = REGNO (reg);
18560 nregs = HARD_REGNO_NREGS (regno, mode) / 2;
18561 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18562 || NEON_REGNO_OK_FOR_QUAD (regno));
18563 gcc_assert (VALID_NEON_DREG_MODE (mode)
18564 || VALID_NEON_QREG_MODE (mode)
18565 || VALID_NEON_STRUCT_MODE (mode));
18566 gcc_assert (MEM_P (mem));
18568 addr = XEXP (mem, 0);
18570 /* Strip off const from addresses like (const (plus (...))). */
18571 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18572 addr = XEXP (addr, 0);
18574 switch (GET_CODE (addr))
18576 case POST_INC:
18577 /* We have to use vldm / vstm for too-large modes. */
18578 if (nregs > 4)
18580 templ = "v%smia%%?\t%%0!, %%h1";
18581 ops[0] = XEXP (addr, 0);
18583 else
18585 templ = "v%s1.64\t%%h1, %%A0";
18586 ops[0] = mem;
18588 ops[1] = reg;
18589 break;
18591 case PRE_DEC:
18592 /* We have to use vldm / vstm in this case, since there is no
18593 pre-decrement form of the vld1 / vst1 instructions. */
18594 templ = "v%smdb%%?\t%%0!, %%h1";
18595 ops[0] = XEXP (addr, 0);
18596 ops[1] = reg;
18597 break;
18599 case POST_MODIFY:
18600 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18601 gcc_unreachable ();
18603 case REG:
18604 /* We have to use vldm / vstm for too-large modes. */
18605 if (nregs > 1)
18607 if (nregs > 4)
18608 templ = "v%smia%%?\t%%m0, %%h1";
18609 else
18610 templ = "v%s1.64\t%%h1, %%A0";
18612 ops[0] = mem;
18613 ops[1] = reg;
18614 break;
18616 /* Fall through. */
18617 case LABEL_REF:
18618 case PLUS:
18620 int i;
18621 int overlap = -1;
18622 for (i = 0; i < nregs; i++)
18624 /* We're only using DImode here because it's a convenient size. */
18625 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18626 ops[1] = adjust_address (mem, DImode, 8 * i);
18627 if (reg_overlap_mentioned_p (ops[0], mem))
18629 gcc_assert (overlap == -1);
18630 overlap = i;
18632 else
18634 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18635 output_asm_insn (buff, ops);
18638 if (overlap != -1)
18640 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18641 ops[1] = adjust_address (mem, SImode, 8 * overlap);
18642 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18643 output_asm_insn (buff, ops);
18646 return "";
18649 default:
18650 gcc_unreachable ();
18653 sprintf (buff, templ, load ? "ld" : "st");
18654 output_asm_insn (buff, ops);
18656 return "";
18659 /* Compute and return the length of neon_mov<mode>, where <mode> is
18660 one of VSTRUCT modes: EI, OI, CI or XI. */
18662 arm_attr_length_move_neon (rtx_insn *insn)
18664 rtx reg, mem, addr;
18665 int load;
18666 machine_mode mode;
18668 extract_insn_cached (insn);
18670 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18672 mode = GET_MODE (recog_data.operand[0]);
18673 switch (mode)
18675 case EImode:
18676 case OImode:
18677 return 8;
18678 case CImode:
18679 return 12;
18680 case XImode:
18681 return 16;
18682 default:
18683 gcc_unreachable ();
18687 load = REG_P (recog_data.operand[0]);
18688 reg = recog_data.operand[!load];
18689 mem = recog_data.operand[load];
18691 gcc_assert (MEM_P (mem));
18693 mode = GET_MODE (reg);
18694 addr = XEXP (mem, 0);
18696 /* Strip off const from addresses like (const (plus (...))). */
18697 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18698 addr = XEXP (addr, 0);
18700 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18702 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
18703 return insns * 4;
18705 else
18706 return 4;
18709 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18710 return zero. */
18713 arm_address_offset_is_imm (rtx_insn *insn)
18715 rtx mem, addr;
18717 extract_insn_cached (insn);
18719 if (REG_P (recog_data.operand[0]))
18720 return 0;
18722 mem = recog_data.operand[0];
18724 gcc_assert (MEM_P (mem));
18726 addr = XEXP (mem, 0);
18728 if (REG_P (addr)
18729 || (GET_CODE (addr) == PLUS
18730 && REG_P (XEXP (addr, 0))
18731 && CONST_INT_P (XEXP (addr, 1))))
18732 return 1;
18733 else
18734 return 0;
18737 /* Output an ADD r, s, #n where n may be too big for one instruction.
18738 If adding zero to one register, output nothing. */
18739 const char *
18740 output_add_immediate (rtx *operands)
18742 HOST_WIDE_INT n = INTVAL (operands[2]);
18744 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18746 if (n < 0)
18747 output_multi_immediate (operands,
18748 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18749 -n);
18750 else
18751 output_multi_immediate (operands,
18752 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18756 return "";
18759 /* Output a multiple immediate operation.
18760 OPERANDS is the vector of operands referred to in the output patterns.
18761 INSTR1 is the output pattern to use for the first constant.
18762 INSTR2 is the output pattern to use for subsequent constants.
18763 IMMED_OP is the index of the constant slot in OPERANDS.
18764 N is the constant value. */
18765 static const char *
18766 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18767 int immed_op, HOST_WIDE_INT n)
18769 #if HOST_BITS_PER_WIDE_INT > 32
18770 n &= 0xffffffff;
18771 #endif
18773 if (n == 0)
18775 /* Quick and easy output. */
18776 operands[immed_op] = const0_rtx;
18777 output_asm_insn (instr1, operands);
18779 else
18781 int i;
18782 const char * instr = instr1;
18784 /* Note that n is never zero here (which would give no output). */
18785 for (i = 0; i < 32; i += 2)
18787 if (n & (3 << i))
18789 operands[immed_op] = GEN_INT (n & (255 << i));
18790 output_asm_insn (instr, operands);
18791 instr = instr2;
18792 i += 6;
18797 return "";
18800 /* Return the name of a shifter operation. */
18801 static const char *
18802 arm_shift_nmem(enum rtx_code code)
18804 switch (code)
18806 case ASHIFT:
18807 return ARM_LSL_NAME;
18809 case ASHIFTRT:
18810 return "asr";
18812 case LSHIFTRT:
18813 return "lsr";
18815 case ROTATERT:
18816 return "ror";
18818 default:
18819 abort();
18823 /* Return the appropriate ARM instruction for the operation code.
18824 The returned result should not be overwritten. OP is the rtx of the
18825 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18826 was shifted. */
18827 const char *
18828 arithmetic_instr (rtx op, int shift_first_arg)
18830 switch (GET_CODE (op))
18832 case PLUS:
18833 return "add";
18835 case MINUS:
18836 return shift_first_arg ? "rsb" : "sub";
18838 case IOR:
18839 return "orr";
18841 case XOR:
18842 return "eor";
18844 case AND:
18845 return "and";
18847 case ASHIFT:
18848 case ASHIFTRT:
18849 case LSHIFTRT:
18850 case ROTATERT:
18851 return arm_shift_nmem(GET_CODE(op));
18853 default:
18854 gcc_unreachable ();
18858 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18859 for the operation code. The returned result should not be overwritten.
18860 OP is the rtx code of the shift.
18861 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18862 shift. */
18863 static const char *
18864 shift_op (rtx op, HOST_WIDE_INT *amountp)
18866 const char * mnem;
18867 enum rtx_code code = GET_CODE (op);
18869 switch (code)
18871 case ROTATE:
18872 if (!CONST_INT_P (XEXP (op, 1)))
18874 output_operand_lossage ("invalid shift operand");
18875 return NULL;
18878 code = ROTATERT;
18879 *amountp = 32 - INTVAL (XEXP (op, 1));
18880 mnem = "ror";
18881 break;
18883 case ASHIFT:
18884 case ASHIFTRT:
18885 case LSHIFTRT:
18886 case ROTATERT:
18887 mnem = arm_shift_nmem(code);
18888 if (CONST_INT_P (XEXP (op, 1)))
18890 *amountp = INTVAL (XEXP (op, 1));
18892 else if (REG_P (XEXP (op, 1)))
18894 *amountp = -1;
18895 return mnem;
18897 else
18899 output_operand_lossage ("invalid shift operand");
18900 return NULL;
18902 break;
18904 case MULT:
18905 /* We never have to worry about the amount being other than a
18906 power of 2, since this case can never be reloaded from a reg. */
18907 if (!CONST_INT_P (XEXP (op, 1)))
18909 output_operand_lossage ("invalid shift operand");
18910 return NULL;
18913 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
18915 /* Amount must be a power of two. */
18916 if (*amountp & (*amountp - 1))
18918 output_operand_lossage ("invalid shift operand");
18919 return NULL;
18922 *amountp = exact_log2 (*amountp);
18923 gcc_assert (IN_RANGE (*amountp, 0, 31));
18924 return ARM_LSL_NAME;
18926 default:
18927 output_operand_lossage ("invalid shift operand");
18928 return NULL;
18931 /* This is not 100% correct, but follows from the desire to merge
18932 multiplication by a power of 2 with the recognizer for a
18933 shift. >=32 is not a valid shift for "lsl", so we must try and
18934 output a shift that produces the correct arithmetical result.
18935 Using lsr #32 is identical except for the fact that the carry bit
18936 is not set correctly if we set the flags; but we never use the
18937 carry bit from such an operation, so we can ignore that. */
18938 if (code == ROTATERT)
18939 /* Rotate is just modulo 32. */
18940 *amountp &= 31;
18941 else if (*amountp != (*amountp & 31))
18943 if (code == ASHIFT)
18944 mnem = "lsr";
18945 *amountp = 32;
18948 /* Shifts of 0 are no-ops. */
18949 if (*amountp == 0)
18950 return NULL;
18952 return mnem;
18955 /* Output a .ascii pseudo-op, keeping track of lengths. This is
18956 because /bin/as is horribly restrictive. The judgement about
18957 whether or not each character is 'printable' (and can be output as
18958 is) or not (and must be printed with an octal escape) must be made
18959 with reference to the *host* character set -- the situation is
18960 similar to that discussed in the comments above pp_c_char in
18961 c-pretty-print.c. */
18963 #define MAX_ASCII_LEN 51
18965 void
18966 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
18968 int i;
18969 int len_so_far = 0;
18971 fputs ("\t.ascii\t\"", stream);
18973 for (i = 0; i < len; i++)
18975 int c = p[i];
18977 if (len_so_far >= MAX_ASCII_LEN)
18979 fputs ("\"\n\t.ascii\t\"", stream);
18980 len_so_far = 0;
18983 if (ISPRINT (c))
18985 if (c == '\\' || c == '\"')
18987 putc ('\\', stream);
18988 len_so_far++;
18990 putc (c, stream);
18991 len_so_far++;
18993 else
18995 fprintf (stream, "\\%03o", c);
18996 len_so_far += 4;
19000 fputs ("\"\n", stream);
19003 /* Whether a register is callee saved or not. This is necessary because high
19004 registers are marked as caller saved when optimizing for size on Thumb-1
19005 targets despite being callee saved in order to avoid using them. */
19006 #define callee_saved_reg_p(reg) \
19007 (!call_used_regs[reg] \
19008 || (TARGET_THUMB1 && optimize_size \
19009 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
19011 /* Compute the register save mask for registers 0 through 12
19012 inclusive. This code is used by arm_compute_save_reg_mask. */
19014 static unsigned long
19015 arm_compute_save_reg0_reg12_mask (void)
19017 unsigned long func_type = arm_current_func_type ();
19018 unsigned long save_reg_mask = 0;
19019 unsigned int reg;
19021 if (IS_INTERRUPT (func_type))
19023 unsigned int max_reg;
19024 /* Interrupt functions must not corrupt any registers,
19025 even call clobbered ones. If this is a leaf function
19026 we can just examine the registers used by the RTL, but
19027 otherwise we have to assume that whatever function is
19028 called might clobber anything, and so we have to save
19029 all the call-clobbered registers as well. */
19030 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
19031 /* FIQ handlers have registers r8 - r12 banked, so
19032 we only need to check r0 - r7, Normal ISRs only
19033 bank r14 and r15, so we must check up to r12.
19034 r13 is the stack pointer which is always preserved,
19035 so we do not need to consider it here. */
19036 max_reg = 7;
19037 else
19038 max_reg = 12;
19040 for (reg = 0; reg <= max_reg; reg++)
19041 if (df_regs_ever_live_p (reg)
19042 || (! crtl->is_leaf && call_used_regs[reg]))
19043 save_reg_mask |= (1 << reg);
19045 /* Also save the pic base register if necessary. */
19046 if (flag_pic
19047 && !TARGET_SINGLE_PIC_BASE
19048 && arm_pic_register != INVALID_REGNUM
19049 && crtl->uses_pic_offset_table)
19050 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19052 else if (IS_VOLATILE(func_type))
19054 /* For noreturn functions we historically omitted register saves
19055 altogether. However this really messes up debugging. As a
19056 compromise save just the frame pointers. Combined with the link
19057 register saved elsewhere this should be sufficient to get
19058 a backtrace. */
19059 if (frame_pointer_needed)
19060 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19061 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
19062 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19063 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
19064 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
19066 else
19068 /* In the normal case we only need to save those registers
19069 which are call saved and which are used by this function. */
19070 for (reg = 0; reg <= 11; reg++)
19071 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19072 save_reg_mask |= (1 << reg);
19074 /* Handle the frame pointer as a special case. */
19075 if (frame_pointer_needed)
19076 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19078 /* If we aren't loading the PIC register,
19079 don't stack it even though it may be live. */
19080 if (flag_pic
19081 && !TARGET_SINGLE_PIC_BASE
19082 && arm_pic_register != INVALID_REGNUM
19083 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19084 || crtl->uses_pic_offset_table))
19085 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19087 /* The prologue will copy SP into R0, so save it. */
19088 if (IS_STACKALIGN (func_type))
19089 save_reg_mask |= 1;
19092 /* Save registers so the exception handler can modify them. */
19093 if (crtl->calls_eh_return)
19095 unsigned int i;
19097 for (i = 0; ; i++)
19099 reg = EH_RETURN_DATA_REGNO (i);
19100 if (reg == INVALID_REGNUM)
19101 break;
19102 save_reg_mask |= 1 << reg;
19106 return save_reg_mask;
19109 /* Return true if r3 is live at the start of the function. */
19111 static bool
19112 arm_r3_live_at_start_p (void)
19114 /* Just look at cfg info, which is still close enough to correct at this
19115 point. This gives false positives for broken functions that might use
19116 uninitialized data that happens to be allocated in r3, but who cares? */
19117 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19120 /* Compute the number of bytes used to store the static chain register on the
19121 stack, above the stack frame. We need to know this accurately to get the
19122 alignment of the rest of the stack frame correct. */
19124 static int
19125 arm_compute_static_chain_stack_bytes (void)
19127 /* See the defining assertion in arm_expand_prologue. */
19128 if (IS_NESTED (arm_current_func_type ())
19129 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19130 || (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
19131 && !df_regs_ever_live_p (LR_REGNUM)))
19132 && arm_r3_live_at_start_p ()
19133 && crtl->args.pretend_args_size == 0)
19134 return 4;
19136 return 0;
19139 /* Compute a bit mask of which registers need to be
19140 saved on the stack for the current function.
19141 This is used by arm_get_frame_offsets, which may add extra registers. */
19143 static unsigned long
19144 arm_compute_save_reg_mask (void)
19146 unsigned int save_reg_mask = 0;
19147 unsigned long func_type = arm_current_func_type ();
19148 unsigned int reg;
19150 if (IS_NAKED (func_type))
19151 /* This should never really happen. */
19152 return 0;
19154 /* If we are creating a stack frame, then we must save the frame pointer,
19155 IP (which will hold the old stack pointer), LR and the PC. */
19156 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19157 save_reg_mask |=
19158 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19159 | (1 << IP_REGNUM)
19160 | (1 << LR_REGNUM)
19161 | (1 << PC_REGNUM);
19163 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19165 /* Decide if we need to save the link register.
19166 Interrupt routines have their own banked link register,
19167 so they never need to save it.
19168 Otherwise if we do not use the link register we do not need to save
19169 it. If we are pushing other registers onto the stack however, we
19170 can save an instruction in the epilogue by pushing the link register
19171 now and then popping it back into the PC. This incurs extra memory
19172 accesses though, so we only do it when optimizing for size, and only
19173 if we know that we will not need a fancy return sequence. */
19174 if (df_regs_ever_live_p (LR_REGNUM)
19175 || (save_reg_mask
19176 && optimize_size
19177 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19178 && !crtl->tail_call_emit
19179 && !crtl->calls_eh_return))
19180 save_reg_mask |= 1 << LR_REGNUM;
19182 if (cfun->machine->lr_save_eliminated)
19183 save_reg_mask &= ~ (1 << LR_REGNUM);
19185 if (TARGET_REALLY_IWMMXT
19186 && ((bit_count (save_reg_mask)
19187 + ARM_NUM_INTS (crtl->args.pretend_args_size +
19188 arm_compute_static_chain_stack_bytes())
19189 ) % 2) != 0)
19191 /* The total number of registers that are going to be pushed
19192 onto the stack is odd. We need to ensure that the stack
19193 is 64-bit aligned before we start to save iWMMXt registers,
19194 and also before we start to create locals. (A local variable
19195 might be a double or long long which we will load/store using
19196 an iWMMXt instruction). Therefore we need to push another
19197 ARM register, so that the stack will be 64-bit aligned. We
19198 try to avoid using the arg registers (r0 -r3) as they might be
19199 used to pass values in a tail call. */
19200 for (reg = 4; reg <= 12; reg++)
19201 if ((save_reg_mask & (1 << reg)) == 0)
19202 break;
19204 if (reg <= 12)
19205 save_reg_mask |= (1 << reg);
19206 else
19208 cfun->machine->sibcall_blocked = 1;
19209 save_reg_mask |= (1 << 3);
19213 /* We may need to push an additional register for use initializing the
19214 PIC base register. */
19215 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19216 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19218 reg = thumb_find_work_register (1 << 4);
19219 if (!call_used_regs[reg])
19220 save_reg_mask |= (1 << reg);
19223 return save_reg_mask;
19226 /* Compute a bit mask of which registers need to be
19227 saved on the stack for the current function. */
19228 static unsigned long
19229 thumb1_compute_save_reg_mask (void)
19231 unsigned long mask;
19232 unsigned reg;
19234 mask = 0;
19235 for (reg = 0; reg < 12; reg ++)
19236 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19237 mask |= 1 << reg;
19239 /* Handle the frame pointer as a special case. */
19240 if (frame_pointer_needed)
19241 mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19243 if (flag_pic
19244 && !TARGET_SINGLE_PIC_BASE
19245 && arm_pic_register != INVALID_REGNUM
19246 && crtl->uses_pic_offset_table)
19247 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19249 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19250 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19251 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19253 /* LR will also be pushed if any lo regs are pushed. */
19254 if (mask & 0xff || thumb_force_lr_save ())
19255 mask |= (1 << LR_REGNUM);
19257 /* Make sure we have a low work register if we need one.
19258 We will need one if we are going to push a high register,
19259 but we are not currently intending to push a low register. */
19260 if ((mask & 0xff) == 0
19261 && ((mask & 0x0f00) || TARGET_BACKTRACE))
19263 /* Use thumb_find_work_register to choose which register
19264 we will use. If the register is live then we will
19265 have to push it. Use LAST_LO_REGNUM as our fallback
19266 choice for the register to select. */
19267 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19268 /* Make sure the register returned by thumb_find_work_register is
19269 not part of the return value. */
19270 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19271 reg = LAST_LO_REGNUM;
19273 if (callee_saved_reg_p (reg))
19274 mask |= 1 << reg;
19277 /* The 504 below is 8 bytes less than 512 because there are two possible
19278 alignment words. We can't tell here if they will be present or not so we
19279 have to play it safe and assume that they are. */
19280 if ((CALLER_INTERWORKING_SLOT_SIZE +
19281 ROUND_UP_WORD (get_frame_size ()) +
19282 crtl->outgoing_args_size) >= 504)
19284 /* This is the same as the code in thumb1_expand_prologue() which
19285 determines which register to use for stack decrement. */
19286 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19287 if (mask & (1 << reg))
19288 break;
19290 if (reg > LAST_LO_REGNUM)
19292 /* Make sure we have a register available for stack decrement. */
19293 mask |= 1 << LAST_LO_REGNUM;
19297 return mask;
19301 /* Return the number of bytes required to save VFP registers. */
19302 static int
19303 arm_get_vfp_saved_size (void)
19305 unsigned int regno;
19306 int count;
19307 int saved;
19309 saved = 0;
19310 /* Space for saved VFP registers. */
19311 if (TARGET_HARD_FLOAT)
19313 count = 0;
19314 for (regno = FIRST_VFP_REGNUM;
19315 regno < LAST_VFP_REGNUM;
19316 regno += 2)
19318 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19319 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19321 if (count > 0)
19323 /* Workaround ARM10 VFPr1 bug. */
19324 if (count == 2 && !arm_arch6)
19325 count++;
19326 saved += count * 8;
19328 count = 0;
19330 else
19331 count++;
19333 if (count > 0)
19335 if (count == 2 && !arm_arch6)
19336 count++;
19337 saved += count * 8;
19340 return saved;
19344 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19345 everything bar the final return instruction. If simple_return is true,
19346 then do not output epilogue, because it has already been emitted in RTL. */
19347 const char *
19348 output_return_instruction (rtx operand, bool really_return, bool reverse,
19349 bool simple_return)
19351 char conditional[10];
19352 char instr[100];
19353 unsigned reg;
19354 unsigned long live_regs_mask;
19355 unsigned long func_type;
19356 arm_stack_offsets *offsets;
19358 func_type = arm_current_func_type ();
19360 if (IS_NAKED (func_type))
19361 return "";
19363 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19365 /* If this function was declared non-returning, and we have
19366 found a tail call, then we have to trust that the called
19367 function won't return. */
19368 if (really_return)
19370 rtx ops[2];
19372 /* Otherwise, trap an attempted return by aborting. */
19373 ops[0] = operand;
19374 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19375 : "abort");
19376 assemble_external_libcall (ops[1]);
19377 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19380 return "";
19383 gcc_assert (!cfun->calls_alloca || really_return);
19385 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19387 cfun->machine->return_used_this_function = 1;
19389 offsets = arm_get_frame_offsets ();
19390 live_regs_mask = offsets->saved_regs_mask;
19392 if (!simple_return && live_regs_mask)
19394 const char * return_reg;
19396 /* If we do not have any special requirements for function exit
19397 (e.g. interworking) then we can load the return address
19398 directly into the PC. Otherwise we must load it into LR. */
19399 if (really_return
19400 && !IS_CMSE_ENTRY (func_type)
19401 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19402 return_reg = reg_names[PC_REGNUM];
19403 else
19404 return_reg = reg_names[LR_REGNUM];
19406 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19408 /* There are three possible reasons for the IP register
19409 being saved. 1) a stack frame was created, in which case
19410 IP contains the old stack pointer, or 2) an ISR routine
19411 corrupted it, or 3) it was saved to align the stack on
19412 iWMMXt. In case 1, restore IP into SP, otherwise just
19413 restore IP. */
19414 if (frame_pointer_needed)
19416 live_regs_mask &= ~ (1 << IP_REGNUM);
19417 live_regs_mask |= (1 << SP_REGNUM);
19419 else
19420 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19423 /* On some ARM architectures it is faster to use LDR rather than
19424 LDM to load a single register. On other architectures, the
19425 cost is the same. In 26 bit mode, or for exception handlers,
19426 we have to use LDM to load the PC so that the CPSR is also
19427 restored. */
19428 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19429 if (live_regs_mask == (1U << reg))
19430 break;
19432 if (reg <= LAST_ARM_REGNUM
19433 && (reg != LR_REGNUM
19434 || ! really_return
19435 || ! IS_INTERRUPT (func_type)))
19437 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19438 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19440 else
19442 char *p;
19443 int first = 1;
19445 /* Generate the load multiple instruction to restore the
19446 registers. Note we can get here, even if
19447 frame_pointer_needed is true, but only if sp already
19448 points to the base of the saved core registers. */
19449 if (live_regs_mask & (1 << SP_REGNUM))
19451 unsigned HOST_WIDE_INT stack_adjust;
19453 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19454 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19456 if (stack_adjust && arm_arch5 && TARGET_ARM)
19457 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19458 else
19460 /* If we can't use ldmib (SA110 bug),
19461 then try to pop r3 instead. */
19462 if (stack_adjust)
19463 live_regs_mask |= 1 << 3;
19465 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19468 /* For interrupt returns we have to use an LDM rather than
19469 a POP so that we can use the exception return variant. */
19470 else if (IS_INTERRUPT (func_type))
19471 sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
19472 else
19473 sprintf (instr, "pop%s\t{", conditional);
19475 p = instr + strlen (instr);
19477 for (reg = 0; reg <= SP_REGNUM; reg++)
19478 if (live_regs_mask & (1 << reg))
19480 int l = strlen (reg_names[reg]);
19482 if (first)
19483 first = 0;
19484 else
19486 memcpy (p, ", ", 2);
19487 p += 2;
19490 memcpy (p, "%|", 2);
19491 memcpy (p + 2, reg_names[reg], l);
19492 p += l + 2;
19495 if (live_regs_mask & (1 << LR_REGNUM))
19497 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19498 /* If returning from an interrupt, restore the CPSR. */
19499 if (IS_INTERRUPT (func_type))
19500 strcat (p, "^");
19502 else
19503 strcpy (p, "}");
19506 output_asm_insn (instr, & operand);
19508 /* See if we need to generate an extra instruction to
19509 perform the actual function return. */
19510 if (really_return
19511 && func_type != ARM_FT_INTERWORKED
19512 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19514 /* The return has already been handled
19515 by loading the LR into the PC. */
19516 return "";
19520 if (really_return)
19522 switch ((int) ARM_FUNC_TYPE (func_type))
19524 case ARM_FT_ISR:
19525 case ARM_FT_FIQ:
19526 /* ??? This is wrong for unified assembly syntax. */
19527 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19528 break;
19530 case ARM_FT_INTERWORKED:
19531 gcc_assert (arm_arch5 || arm_arch4t);
19532 sprintf (instr, "bx%s\t%%|lr", conditional);
19533 break;
19535 case ARM_FT_EXCEPTION:
19536 /* ??? This is wrong for unified assembly syntax. */
19537 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19538 break;
19540 default:
19541 if (IS_CMSE_ENTRY (func_type))
19543 /* Check if we have to clear the 'GE bits' which is only used if
19544 parallel add and subtraction instructions are available. */
19545 if (TARGET_INT_SIMD)
19546 snprintf (instr, sizeof (instr),
19547 "msr%s\tAPSR_nzcvqg, %%|lr", conditional);
19548 else
19549 snprintf (instr, sizeof (instr),
19550 "msr%s\tAPSR_nzcvq, %%|lr", conditional);
19552 output_asm_insn (instr, & operand);
19553 if (TARGET_HARD_FLOAT && !TARGET_THUMB1)
19555 /* Clear the cumulative exception-status bits (0-4,7) and the
19556 condition code bits (28-31) of the FPSCR. We need to
19557 remember to clear the first scratch register used (IP) and
19558 save and restore the second (r4). */
19559 snprintf (instr, sizeof (instr), "push\t{%%|r4}");
19560 output_asm_insn (instr, & operand);
19561 snprintf (instr, sizeof (instr), "vmrs\t%%|ip, fpscr");
19562 output_asm_insn (instr, & operand);
19563 snprintf (instr, sizeof (instr), "movw\t%%|r4, #65376");
19564 output_asm_insn (instr, & operand);
19565 snprintf (instr, sizeof (instr), "movt\t%%|r4, #4095");
19566 output_asm_insn (instr, & operand);
19567 snprintf (instr, sizeof (instr), "and\t%%|ip, %%|r4");
19568 output_asm_insn (instr, & operand);
19569 snprintf (instr, sizeof (instr), "vmsr\tfpscr, %%|ip");
19570 output_asm_insn (instr, & operand);
19571 snprintf (instr, sizeof (instr), "pop\t{%%|r4}");
19572 output_asm_insn (instr, & operand);
19573 snprintf (instr, sizeof (instr), "mov\t%%|ip, %%|lr");
19574 output_asm_insn (instr, & operand);
19576 snprintf (instr, sizeof (instr), "bxns\t%%|lr");
19578 /* Use bx if it's available. */
19579 else if (arm_arch5 || arm_arch4t)
19580 sprintf (instr, "bx%s\t%%|lr", conditional);
19581 else
19582 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19583 break;
19586 output_asm_insn (instr, & operand);
19589 return "";
19592 /* Output in FILE asm statements needed to declare the NAME of the function
19593 defined by its DECL node. */
19595 void
19596 arm_asm_declare_function_name (FILE *file, const char *name, tree decl)
19598 size_t cmse_name_len;
19599 char *cmse_name = 0;
19600 char cmse_prefix[] = "__acle_se_";
19602 /* When compiling with ARMv8-M Security Extensions enabled, we should print an
19603 extra function label for each function with the 'cmse_nonsecure_entry'
19604 attribute. This extra function label should be prepended with
19605 '__acle_se_', telling the linker that it needs to create secure gateway
19606 veneers for this function. */
19607 if (use_cmse && lookup_attribute ("cmse_nonsecure_entry",
19608 DECL_ATTRIBUTES (decl)))
19610 cmse_name_len = sizeof (cmse_prefix) + strlen (name);
19611 cmse_name = XALLOCAVEC (char, cmse_name_len);
19612 snprintf (cmse_name, cmse_name_len, "%s%s", cmse_prefix, name);
19613 targetm.asm_out.globalize_label (file, cmse_name);
19615 ARM_DECLARE_FUNCTION_NAME (file, cmse_name, decl);
19616 ASM_OUTPUT_TYPE_DIRECTIVE (file, cmse_name, "function");
19619 ARM_DECLARE_FUNCTION_NAME (file, name, decl);
19620 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
19621 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
19622 ASM_OUTPUT_LABEL (file, name);
19624 if (cmse_name)
19625 ASM_OUTPUT_LABEL (file, cmse_name);
19627 ARM_OUTPUT_FN_UNWIND (file, TRUE);
19630 /* Write the function name into the code section, directly preceding
19631 the function prologue.
19633 Code will be output similar to this:
19635 .ascii "arm_poke_function_name", 0
19636 .align
19638 .word 0xff000000 + (t1 - t0)
19639 arm_poke_function_name
19640 mov ip, sp
19641 stmfd sp!, {fp, ip, lr, pc}
19642 sub fp, ip, #4
19644 When performing a stack backtrace, code can inspect the value
19645 of 'pc' stored at 'fp' + 0. If the trace function then looks
19646 at location pc - 12 and the top 8 bits are set, then we know
19647 that there is a function name embedded immediately preceding this
19648 location and has length ((pc[-3]) & 0xff000000).
19650 We assume that pc is declared as a pointer to an unsigned long.
19652 It is of no benefit to output the function name if we are assembling
19653 a leaf function. These function types will not contain a stack
19654 backtrace structure, therefore it is not possible to determine the
19655 function name. */
19656 void
19657 arm_poke_function_name (FILE *stream, const char *name)
19659 unsigned long alignlength;
19660 unsigned long length;
19661 rtx x;
19663 length = strlen (name) + 1;
19664 alignlength = ROUND_UP_WORD (length);
19666 ASM_OUTPUT_ASCII (stream, name, length);
19667 ASM_OUTPUT_ALIGN (stream, 2);
19668 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19669 assemble_aligned_integer (UNITS_PER_WORD, x);
19672 /* Place some comments into the assembler stream
19673 describing the current function. */
19674 static void
19675 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
19677 unsigned long func_type;
19679 /* Sanity check. */
19680 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19682 func_type = arm_current_func_type ();
19684 switch ((int) ARM_FUNC_TYPE (func_type))
19686 default:
19687 case ARM_FT_NORMAL:
19688 break;
19689 case ARM_FT_INTERWORKED:
19690 asm_fprintf (f, "\t%@ Function supports interworking.\n");
19691 break;
19692 case ARM_FT_ISR:
19693 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19694 break;
19695 case ARM_FT_FIQ:
19696 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19697 break;
19698 case ARM_FT_EXCEPTION:
19699 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19700 break;
19703 if (IS_NAKED (func_type))
19704 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19706 if (IS_VOLATILE (func_type))
19707 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19709 if (IS_NESTED (func_type))
19710 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19711 if (IS_STACKALIGN (func_type))
19712 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19713 if (IS_CMSE_ENTRY (func_type))
19714 asm_fprintf (f, "\t%@ Non-secure entry function: called from non-secure code.\n");
19716 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19717 crtl->args.size,
19718 crtl->args.pretend_args_size, frame_size);
19720 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19721 frame_pointer_needed,
19722 cfun->machine->uses_anonymous_args);
19724 if (cfun->machine->lr_save_eliminated)
19725 asm_fprintf (f, "\t%@ link register save eliminated.\n");
19727 if (crtl->calls_eh_return)
19728 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19732 static void
19733 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
19734 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
19736 arm_stack_offsets *offsets;
19738 if (TARGET_THUMB1)
19740 int regno;
19742 /* Emit any call-via-reg trampolines that are needed for v4t support
19743 of call_reg and call_value_reg type insns. */
19744 for (regno = 0; regno < LR_REGNUM; regno++)
19746 rtx label = cfun->machine->call_via[regno];
19748 if (label != NULL)
19750 switch_to_section (function_section (current_function_decl));
19751 targetm.asm_out.internal_label (asm_out_file, "L",
19752 CODE_LABEL_NUMBER (label));
19753 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19757 /* ??? Probably not safe to set this here, since it assumes that a
19758 function will be emitted as assembly immediately after we generate
19759 RTL for it. This does not happen for inline functions. */
19760 cfun->machine->return_used_this_function = 0;
19762 else /* TARGET_32BIT */
19764 /* We need to take into account any stack-frame rounding. */
19765 offsets = arm_get_frame_offsets ();
19767 gcc_assert (!use_return_insn (FALSE, NULL)
19768 || (cfun->machine->return_used_this_function != 0)
19769 || offsets->saved_regs == offsets->outgoing_args
19770 || frame_pointer_needed);
19774 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19775 STR and STRD. If an even number of registers are being pushed, one
19776 or more STRD patterns are created for each register pair. If an
19777 odd number of registers are pushed, emit an initial STR followed by
19778 as many STRD instructions as are needed. This works best when the
19779 stack is initially 64-bit aligned (the normal case), since it
19780 ensures that each STRD is also 64-bit aligned. */
19781 static void
19782 thumb2_emit_strd_push (unsigned long saved_regs_mask)
19784 int num_regs = 0;
19785 int i;
19786 int regno;
19787 rtx par = NULL_RTX;
19788 rtx dwarf = NULL_RTX;
19789 rtx tmp;
19790 bool first = true;
19792 num_regs = bit_count (saved_regs_mask);
19794 /* Must be at least one register to save, and can't save SP or PC. */
19795 gcc_assert (num_regs > 0 && num_regs <= 14);
19796 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19797 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19799 /* Create sequence for DWARF info. All the frame-related data for
19800 debugging is held in this wrapper. */
19801 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19803 /* Describe the stack adjustment. */
19804 tmp = gen_rtx_SET (stack_pointer_rtx,
19805 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19806 RTX_FRAME_RELATED_P (tmp) = 1;
19807 XVECEXP (dwarf, 0, 0) = tmp;
19809 /* Find the first register. */
19810 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19813 i = 0;
19815 /* If there's an odd number of registers to push. Start off by
19816 pushing a single register. This ensures that subsequent strd
19817 operations are dword aligned (assuming that SP was originally
19818 64-bit aligned). */
19819 if ((num_regs & 1) != 0)
19821 rtx reg, mem, insn;
19823 reg = gen_rtx_REG (SImode, regno);
19824 if (num_regs == 1)
19825 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19826 stack_pointer_rtx));
19827 else
19828 mem = gen_frame_mem (Pmode,
19829 gen_rtx_PRE_MODIFY
19830 (Pmode, stack_pointer_rtx,
19831 plus_constant (Pmode, stack_pointer_rtx,
19832 -4 * num_regs)));
19834 tmp = gen_rtx_SET (mem, reg);
19835 RTX_FRAME_RELATED_P (tmp) = 1;
19836 insn = emit_insn (tmp);
19837 RTX_FRAME_RELATED_P (insn) = 1;
19838 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19839 tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
19840 RTX_FRAME_RELATED_P (tmp) = 1;
19841 i++;
19842 regno++;
19843 XVECEXP (dwarf, 0, i) = tmp;
19844 first = false;
19847 while (i < num_regs)
19848 if (saved_regs_mask & (1 << regno))
19850 rtx reg1, reg2, mem1, mem2;
19851 rtx tmp0, tmp1, tmp2;
19852 int regno2;
19854 /* Find the register to pair with this one. */
19855 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
19856 regno2++)
19859 reg1 = gen_rtx_REG (SImode, regno);
19860 reg2 = gen_rtx_REG (SImode, regno2);
19862 if (first)
19864 rtx insn;
19866 first = false;
19867 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19868 stack_pointer_rtx,
19869 -4 * num_regs));
19870 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19871 stack_pointer_rtx,
19872 -4 * (num_regs - 1)));
19873 tmp0 = gen_rtx_SET (stack_pointer_rtx,
19874 plus_constant (Pmode, stack_pointer_rtx,
19875 -4 * (num_regs)));
19876 tmp1 = gen_rtx_SET (mem1, reg1);
19877 tmp2 = gen_rtx_SET (mem2, reg2);
19878 RTX_FRAME_RELATED_P (tmp0) = 1;
19879 RTX_FRAME_RELATED_P (tmp1) = 1;
19880 RTX_FRAME_RELATED_P (tmp2) = 1;
19881 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
19882 XVECEXP (par, 0, 0) = tmp0;
19883 XVECEXP (par, 0, 1) = tmp1;
19884 XVECEXP (par, 0, 2) = tmp2;
19885 insn = emit_insn (par);
19886 RTX_FRAME_RELATED_P (insn) = 1;
19887 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19889 else
19891 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19892 stack_pointer_rtx,
19893 4 * i));
19894 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19895 stack_pointer_rtx,
19896 4 * (i + 1)));
19897 tmp1 = gen_rtx_SET (mem1, reg1);
19898 tmp2 = gen_rtx_SET (mem2, reg2);
19899 RTX_FRAME_RELATED_P (tmp1) = 1;
19900 RTX_FRAME_RELATED_P (tmp2) = 1;
19901 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19902 XVECEXP (par, 0, 0) = tmp1;
19903 XVECEXP (par, 0, 1) = tmp2;
19904 emit_insn (par);
19907 /* Create unwind information. This is an approximation. */
19908 tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
19909 plus_constant (Pmode,
19910 stack_pointer_rtx,
19911 4 * i)),
19912 reg1);
19913 tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
19914 plus_constant (Pmode,
19915 stack_pointer_rtx,
19916 4 * (i + 1))),
19917 reg2);
19919 RTX_FRAME_RELATED_P (tmp1) = 1;
19920 RTX_FRAME_RELATED_P (tmp2) = 1;
19921 XVECEXP (dwarf, 0, i + 1) = tmp1;
19922 XVECEXP (dwarf, 0, i + 2) = tmp2;
19923 i += 2;
19924 regno = regno2 + 1;
19926 else
19927 regno++;
19929 return;
19932 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19933 whenever possible, otherwise it emits single-word stores. The first store
19934 also allocates stack space for all saved registers, using writeback with
19935 post-addressing mode. All other stores use offset addressing. If no STRD
19936 can be emitted, this function emits a sequence of single-word stores,
19937 and not an STM as before, because single-word stores provide more freedom
19938 scheduling and can be turned into an STM by peephole optimizations. */
19939 static void
19940 arm_emit_strd_push (unsigned long saved_regs_mask)
19942 int num_regs = 0;
19943 int i, j, dwarf_index = 0;
19944 int offset = 0;
19945 rtx dwarf = NULL_RTX;
19946 rtx insn = NULL_RTX;
19947 rtx tmp, mem;
19949 /* TODO: A more efficient code can be emitted by changing the
19950 layout, e.g., first push all pairs that can use STRD to keep the
19951 stack aligned, and then push all other registers. */
19952 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19953 if (saved_regs_mask & (1 << i))
19954 num_regs++;
19956 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19957 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19958 gcc_assert (num_regs > 0);
19960 /* Create sequence for DWARF info. */
19961 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19963 /* For dwarf info, we generate explicit stack update. */
19964 tmp = gen_rtx_SET (stack_pointer_rtx,
19965 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19966 RTX_FRAME_RELATED_P (tmp) = 1;
19967 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19969 /* Save registers. */
19970 offset = - 4 * num_regs;
19971 j = 0;
19972 while (j <= LAST_ARM_REGNUM)
19973 if (saved_regs_mask & (1 << j))
19975 if ((j % 2 == 0)
19976 && (saved_regs_mask & (1 << (j + 1))))
19978 /* Current register and previous register form register pair for
19979 which STRD can be generated. */
19980 if (offset < 0)
19982 /* Allocate stack space for all saved registers. */
19983 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19984 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19985 mem = gen_frame_mem (DImode, tmp);
19986 offset = 0;
19988 else if (offset > 0)
19989 mem = gen_frame_mem (DImode,
19990 plus_constant (Pmode,
19991 stack_pointer_rtx,
19992 offset));
19993 else
19994 mem = gen_frame_mem (DImode, stack_pointer_rtx);
19996 tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
19997 RTX_FRAME_RELATED_P (tmp) = 1;
19998 tmp = emit_insn (tmp);
20000 /* Record the first store insn. */
20001 if (dwarf_index == 1)
20002 insn = tmp;
20004 /* Generate dwarf info. */
20005 mem = gen_frame_mem (SImode,
20006 plus_constant (Pmode,
20007 stack_pointer_rtx,
20008 offset));
20009 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20010 RTX_FRAME_RELATED_P (tmp) = 1;
20011 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20013 mem = gen_frame_mem (SImode,
20014 plus_constant (Pmode,
20015 stack_pointer_rtx,
20016 offset + 4));
20017 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
20018 RTX_FRAME_RELATED_P (tmp) = 1;
20019 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20021 offset += 8;
20022 j += 2;
20024 else
20026 /* Emit a single word store. */
20027 if (offset < 0)
20029 /* Allocate stack space for all saved registers. */
20030 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20031 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20032 mem = gen_frame_mem (SImode, tmp);
20033 offset = 0;
20035 else if (offset > 0)
20036 mem = gen_frame_mem (SImode,
20037 plus_constant (Pmode,
20038 stack_pointer_rtx,
20039 offset));
20040 else
20041 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20043 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20044 RTX_FRAME_RELATED_P (tmp) = 1;
20045 tmp = emit_insn (tmp);
20047 /* Record the first store insn. */
20048 if (dwarf_index == 1)
20049 insn = tmp;
20051 /* Generate dwarf info. */
20052 mem = gen_frame_mem (SImode,
20053 plus_constant(Pmode,
20054 stack_pointer_rtx,
20055 offset));
20056 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20057 RTX_FRAME_RELATED_P (tmp) = 1;
20058 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20060 offset += 4;
20061 j += 1;
20064 else
20065 j++;
20067 /* Attach dwarf info to the first insn we generate. */
20068 gcc_assert (insn != NULL_RTX);
20069 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20070 RTX_FRAME_RELATED_P (insn) = 1;
20073 /* Generate and emit an insn that we will recognize as a push_multi.
20074 Unfortunately, since this insn does not reflect very well the actual
20075 semantics of the operation, we need to annotate the insn for the benefit
20076 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
20077 MASK for registers that should be annotated for DWARF2 frame unwind
20078 information. */
20079 static rtx
20080 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
20082 int num_regs = 0;
20083 int num_dwarf_regs = 0;
20084 int i, j;
20085 rtx par;
20086 rtx dwarf;
20087 int dwarf_par_index;
20088 rtx tmp, reg;
20090 /* We don't record the PC in the dwarf frame information. */
20091 dwarf_regs_mask &= ~(1 << PC_REGNUM);
20093 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20095 if (mask & (1 << i))
20096 num_regs++;
20097 if (dwarf_regs_mask & (1 << i))
20098 num_dwarf_regs++;
20101 gcc_assert (num_regs && num_regs <= 16);
20102 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
20104 /* For the body of the insn we are going to generate an UNSPEC in
20105 parallel with several USEs. This allows the insn to be recognized
20106 by the push_multi pattern in the arm.md file.
20108 The body of the insn looks something like this:
20110 (parallel [
20111 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20112 (const_int:SI <num>)))
20113 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20114 (use (reg:SI XX))
20115 (use (reg:SI YY))
20119 For the frame note however, we try to be more explicit and actually
20120 show each register being stored into the stack frame, plus a (single)
20121 decrement of the stack pointer. We do it this way in order to be
20122 friendly to the stack unwinding code, which only wants to see a single
20123 stack decrement per instruction. The RTL we generate for the note looks
20124 something like this:
20126 (sequence [
20127 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20128 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20129 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20130 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20134 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20135 instead we'd have a parallel expression detailing all
20136 the stores to the various memory addresses so that debug
20137 information is more up-to-date. Remember however while writing
20138 this to take care of the constraints with the push instruction.
20140 Note also that this has to be taken care of for the VFP registers.
20142 For more see PR43399. */
20144 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20145 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20146 dwarf_par_index = 1;
20148 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20150 if (mask & (1 << i))
20152 reg = gen_rtx_REG (SImode, i);
20154 XVECEXP (par, 0, 0)
20155 = gen_rtx_SET (gen_frame_mem
20156 (BLKmode,
20157 gen_rtx_PRE_MODIFY (Pmode,
20158 stack_pointer_rtx,
20159 plus_constant
20160 (Pmode, stack_pointer_rtx,
20161 -4 * num_regs))
20163 gen_rtx_UNSPEC (BLKmode,
20164 gen_rtvec (1, reg),
20165 UNSPEC_PUSH_MULT));
20167 if (dwarf_regs_mask & (1 << i))
20169 tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
20170 reg);
20171 RTX_FRAME_RELATED_P (tmp) = 1;
20172 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20175 break;
20179 for (j = 1, i++; j < num_regs; i++)
20181 if (mask & (1 << i))
20183 reg = gen_rtx_REG (SImode, i);
20185 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20187 if (dwarf_regs_mask & (1 << i))
20190 = gen_rtx_SET (gen_frame_mem
20191 (SImode,
20192 plus_constant (Pmode, stack_pointer_rtx,
20193 4 * j)),
20194 reg);
20195 RTX_FRAME_RELATED_P (tmp) = 1;
20196 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20199 j++;
20203 par = emit_insn (par);
20205 tmp = gen_rtx_SET (stack_pointer_rtx,
20206 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20207 RTX_FRAME_RELATED_P (tmp) = 1;
20208 XVECEXP (dwarf, 0, 0) = tmp;
20210 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20212 return par;
20215 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20216 SIZE is the offset to be adjusted.
20217 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20218 static void
20219 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20221 rtx dwarf;
20223 RTX_FRAME_RELATED_P (insn) = 1;
20224 dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
20225 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20228 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20229 SAVED_REGS_MASK shows which registers need to be restored.
20231 Unfortunately, since this insn does not reflect very well the actual
20232 semantics of the operation, we need to annotate the insn for the benefit
20233 of DWARF2 frame unwind information. */
20234 static void
20235 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20237 int num_regs = 0;
20238 int i, j;
20239 rtx par;
20240 rtx dwarf = NULL_RTX;
20241 rtx tmp, reg;
20242 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20243 int offset_adj;
20244 int emit_update;
20246 offset_adj = return_in_pc ? 1 : 0;
20247 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20248 if (saved_regs_mask & (1 << i))
20249 num_regs++;
20251 gcc_assert (num_regs && num_regs <= 16);
20253 /* If SP is in reglist, then we don't emit SP update insn. */
20254 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20256 /* The parallel needs to hold num_regs SETs
20257 and one SET for the stack update. */
20258 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20260 if (return_in_pc)
20261 XVECEXP (par, 0, 0) = ret_rtx;
20263 if (emit_update)
20265 /* Increment the stack pointer, based on there being
20266 num_regs 4-byte registers to restore. */
20267 tmp = gen_rtx_SET (stack_pointer_rtx,
20268 plus_constant (Pmode,
20269 stack_pointer_rtx,
20270 4 * num_regs));
20271 RTX_FRAME_RELATED_P (tmp) = 1;
20272 XVECEXP (par, 0, offset_adj) = tmp;
20275 /* Now restore every reg, which may include PC. */
20276 for (j = 0, i = 0; j < num_regs; i++)
20277 if (saved_regs_mask & (1 << i))
20279 reg = gen_rtx_REG (SImode, i);
20280 if ((num_regs == 1) && emit_update && !return_in_pc)
20282 /* Emit single load with writeback. */
20283 tmp = gen_frame_mem (SImode,
20284 gen_rtx_POST_INC (Pmode,
20285 stack_pointer_rtx));
20286 tmp = emit_insn (gen_rtx_SET (reg, tmp));
20287 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20288 return;
20291 tmp = gen_rtx_SET (reg,
20292 gen_frame_mem
20293 (SImode,
20294 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20295 RTX_FRAME_RELATED_P (tmp) = 1;
20296 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20298 /* We need to maintain a sequence for DWARF info too. As dwarf info
20299 should not have PC, skip PC. */
20300 if (i != PC_REGNUM)
20301 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20303 j++;
20306 if (return_in_pc)
20307 par = emit_jump_insn (par);
20308 else
20309 par = emit_insn (par);
20311 REG_NOTES (par) = dwarf;
20312 if (!return_in_pc)
20313 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20314 stack_pointer_rtx, stack_pointer_rtx);
20317 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20318 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20320 Unfortunately, since this insn does not reflect very well the actual
20321 semantics of the operation, we need to annotate the insn for the benefit
20322 of DWARF2 frame unwind information. */
20323 static void
20324 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20326 int i, j;
20327 rtx par;
20328 rtx dwarf = NULL_RTX;
20329 rtx tmp, reg;
20331 gcc_assert (num_regs && num_regs <= 32);
20333 /* Workaround ARM10 VFPr1 bug. */
20334 if (num_regs == 2 && !arm_arch6)
20336 if (first_reg == 15)
20337 first_reg--;
20339 num_regs++;
20342 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20343 there could be up to 32 D-registers to restore.
20344 If there are more than 16 D-registers, make two recursive calls,
20345 each of which emits one pop_multi instruction. */
20346 if (num_regs > 16)
20348 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20349 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20350 return;
20353 /* The parallel needs to hold num_regs SETs
20354 and one SET for the stack update. */
20355 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20357 /* Increment the stack pointer, based on there being
20358 num_regs 8-byte registers to restore. */
20359 tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
20360 RTX_FRAME_RELATED_P (tmp) = 1;
20361 XVECEXP (par, 0, 0) = tmp;
20363 /* Now show every reg that will be restored, using a SET for each. */
20364 for (j = 0, i=first_reg; j < num_regs; i += 2)
20366 reg = gen_rtx_REG (DFmode, i);
20368 tmp = gen_rtx_SET (reg,
20369 gen_frame_mem
20370 (DFmode,
20371 plus_constant (Pmode, base_reg, 8 * j)));
20372 RTX_FRAME_RELATED_P (tmp) = 1;
20373 XVECEXP (par, 0, j + 1) = tmp;
20375 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20377 j++;
20380 par = emit_insn (par);
20381 REG_NOTES (par) = dwarf;
20383 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20384 if (REGNO (base_reg) == IP_REGNUM)
20386 RTX_FRAME_RELATED_P (par) = 1;
20387 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20389 else
20390 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20391 base_reg, base_reg);
20394 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20395 number of registers are being popped, multiple LDRD patterns are created for
20396 all register pairs. If odd number of registers are popped, last register is
20397 loaded by using LDR pattern. */
20398 static void
20399 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20401 int num_regs = 0;
20402 int i, j;
20403 rtx par = NULL_RTX;
20404 rtx dwarf = NULL_RTX;
20405 rtx tmp, reg, tmp1;
20406 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20408 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20409 if (saved_regs_mask & (1 << i))
20410 num_regs++;
20412 gcc_assert (num_regs && num_regs <= 16);
20414 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20415 to be popped. So, if num_regs is even, now it will become odd,
20416 and we can generate pop with PC. If num_regs is odd, it will be
20417 even now, and ldr with return can be generated for PC. */
20418 if (return_in_pc)
20419 num_regs--;
20421 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20423 /* Var j iterates over all the registers to gather all the registers in
20424 saved_regs_mask. Var i gives index of saved registers in stack frame.
20425 A PARALLEL RTX of register-pair is created here, so that pattern for
20426 LDRD can be matched. As PC is always last register to be popped, and
20427 we have already decremented num_regs if PC, we don't have to worry
20428 about PC in this loop. */
20429 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20430 if (saved_regs_mask & (1 << j))
20432 /* Create RTX for memory load. */
20433 reg = gen_rtx_REG (SImode, j);
20434 tmp = gen_rtx_SET (reg,
20435 gen_frame_mem (SImode,
20436 plus_constant (Pmode,
20437 stack_pointer_rtx, 4 * i)));
20438 RTX_FRAME_RELATED_P (tmp) = 1;
20440 if (i % 2 == 0)
20442 /* When saved-register index (i) is even, the RTX to be emitted is
20443 yet to be created. Hence create it first. The LDRD pattern we
20444 are generating is :
20445 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20446 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20447 where target registers need not be consecutive. */
20448 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20449 dwarf = NULL_RTX;
20452 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20453 added as 0th element and if i is odd, reg_i is added as 1st element
20454 of LDRD pattern shown above. */
20455 XVECEXP (par, 0, (i % 2)) = tmp;
20456 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20458 if ((i % 2) == 1)
20460 /* When saved-register index (i) is odd, RTXs for both the registers
20461 to be loaded are generated in above given LDRD pattern, and the
20462 pattern can be emitted now. */
20463 par = emit_insn (par);
20464 REG_NOTES (par) = dwarf;
20465 RTX_FRAME_RELATED_P (par) = 1;
20468 i++;
20471 /* If the number of registers pushed is odd AND return_in_pc is false OR
20472 number of registers are even AND return_in_pc is true, last register is
20473 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20474 then LDR with post increment. */
20476 /* Increment the stack pointer, based on there being
20477 num_regs 4-byte registers to restore. */
20478 tmp = gen_rtx_SET (stack_pointer_rtx,
20479 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20480 RTX_FRAME_RELATED_P (tmp) = 1;
20481 tmp = emit_insn (tmp);
20482 if (!return_in_pc)
20484 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20485 stack_pointer_rtx, stack_pointer_rtx);
20488 dwarf = NULL_RTX;
20490 if (((num_regs % 2) == 1 && !return_in_pc)
20491 || ((num_regs % 2) == 0 && return_in_pc))
20493 /* Scan for the single register to be popped. Skip until the saved
20494 register is found. */
20495 for (; (saved_regs_mask & (1 << j)) == 0; j++);
20497 /* Gen LDR with post increment here. */
20498 tmp1 = gen_rtx_MEM (SImode,
20499 gen_rtx_POST_INC (SImode,
20500 stack_pointer_rtx));
20501 set_mem_alias_set (tmp1, get_frame_alias_set ());
20503 reg = gen_rtx_REG (SImode, j);
20504 tmp = gen_rtx_SET (reg, tmp1);
20505 RTX_FRAME_RELATED_P (tmp) = 1;
20506 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20508 if (return_in_pc)
20510 /* If return_in_pc, j must be PC_REGNUM. */
20511 gcc_assert (j == PC_REGNUM);
20512 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20513 XVECEXP (par, 0, 0) = ret_rtx;
20514 XVECEXP (par, 0, 1) = tmp;
20515 par = emit_jump_insn (par);
20517 else
20519 par = emit_insn (tmp);
20520 REG_NOTES (par) = dwarf;
20521 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20522 stack_pointer_rtx, stack_pointer_rtx);
20526 else if ((num_regs % 2) == 1 && return_in_pc)
20528 /* There are 2 registers to be popped. So, generate the pattern
20529 pop_multiple_with_stack_update_and_return to pop in PC. */
20530 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20533 return;
20536 /* LDRD in ARM mode needs consecutive registers as operands. This function
20537 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20538 offset addressing and then generates one separate stack udpate. This provides
20539 more scheduling freedom, compared to writeback on every load. However,
20540 if the function returns using load into PC directly
20541 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20542 before the last load. TODO: Add a peephole optimization to recognize
20543 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20544 peephole optimization to merge the load at stack-offset zero
20545 with the stack update instruction using load with writeback
20546 in post-index addressing mode. */
20547 static void
20548 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20550 int j = 0;
20551 int offset = 0;
20552 rtx par = NULL_RTX;
20553 rtx dwarf = NULL_RTX;
20554 rtx tmp, mem;
20556 /* Restore saved registers. */
20557 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20558 j = 0;
20559 while (j <= LAST_ARM_REGNUM)
20560 if (saved_regs_mask & (1 << j))
20562 if ((j % 2) == 0
20563 && (saved_regs_mask & (1 << (j + 1)))
20564 && (j + 1) != PC_REGNUM)
20566 /* Current register and next register form register pair for which
20567 LDRD can be generated. PC is always the last register popped, and
20568 we handle it separately. */
20569 if (offset > 0)
20570 mem = gen_frame_mem (DImode,
20571 plus_constant (Pmode,
20572 stack_pointer_rtx,
20573 offset));
20574 else
20575 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20577 tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
20578 tmp = emit_insn (tmp);
20579 RTX_FRAME_RELATED_P (tmp) = 1;
20581 /* Generate dwarf info. */
20583 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20584 gen_rtx_REG (SImode, j),
20585 NULL_RTX);
20586 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20587 gen_rtx_REG (SImode, j + 1),
20588 dwarf);
20590 REG_NOTES (tmp) = dwarf;
20592 offset += 8;
20593 j += 2;
20595 else if (j != PC_REGNUM)
20597 /* Emit a single word load. */
20598 if (offset > 0)
20599 mem = gen_frame_mem (SImode,
20600 plus_constant (Pmode,
20601 stack_pointer_rtx,
20602 offset));
20603 else
20604 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20606 tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
20607 tmp = emit_insn (tmp);
20608 RTX_FRAME_RELATED_P (tmp) = 1;
20610 /* Generate dwarf info. */
20611 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20612 gen_rtx_REG (SImode, j),
20613 NULL_RTX);
20615 offset += 4;
20616 j += 1;
20618 else /* j == PC_REGNUM */
20619 j++;
20621 else
20622 j++;
20624 /* Update the stack. */
20625 if (offset > 0)
20627 tmp = gen_rtx_SET (stack_pointer_rtx,
20628 plus_constant (Pmode,
20629 stack_pointer_rtx,
20630 offset));
20631 tmp = emit_insn (tmp);
20632 arm_add_cfa_adjust_cfa_note (tmp, offset,
20633 stack_pointer_rtx, stack_pointer_rtx);
20634 offset = 0;
20637 if (saved_regs_mask & (1 << PC_REGNUM))
20639 /* Only PC is to be popped. */
20640 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20641 XVECEXP (par, 0, 0) = ret_rtx;
20642 tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
20643 gen_frame_mem (SImode,
20644 gen_rtx_POST_INC (SImode,
20645 stack_pointer_rtx)));
20646 RTX_FRAME_RELATED_P (tmp) = 1;
20647 XVECEXP (par, 0, 1) = tmp;
20648 par = emit_jump_insn (par);
20650 /* Generate dwarf info. */
20651 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20652 gen_rtx_REG (SImode, PC_REGNUM),
20653 NULL_RTX);
20654 REG_NOTES (par) = dwarf;
20655 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20656 stack_pointer_rtx, stack_pointer_rtx);
20660 /* Calculate the size of the return value that is passed in registers. */
20661 static unsigned
20662 arm_size_return_regs (void)
20664 machine_mode mode;
20666 if (crtl->return_rtx != 0)
20667 mode = GET_MODE (crtl->return_rtx);
20668 else
20669 mode = DECL_MODE (DECL_RESULT (current_function_decl));
20671 return GET_MODE_SIZE (mode);
20674 /* Return true if the current function needs to save/restore LR. */
20675 static bool
20676 thumb_force_lr_save (void)
20678 return !cfun->machine->lr_save_eliminated
20679 && (!crtl->is_leaf
20680 || thumb_far_jump_used_p ()
20681 || df_regs_ever_live_p (LR_REGNUM));
20684 /* We do not know if r3 will be available because
20685 we do have an indirect tailcall happening in this
20686 particular case. */
20687 static bool
20688 is_indirect_tailcall_p (rtx call)
20690 rtx pat = PATTERN (call);
20692 /* Indirect tail call. */
20693 pat = XVECEXP (pat, 0, 0);
20694 if (GET_CODE (pat) == SET)
20695 pat = SET_SRC (pat);
20697 pat = XEXP (XEXP (pat, 0), 0);
20698 return REG_P (pat);
20701 /* Return true if r3 is used by any of the tail call insns in the
20702 current function. */
20703 static bool
20704 any_sibcall_could_use_r3 (void)
20706 edge_iterator ei;
20707 edge e;
20709 if (!crtl->tail_call_emit)
20710 return false;
20711 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20712 if (e->flags & EDGE_SIBCALL)
20714 rtx_insn *call = BB_END (e->src);
20715 if (!CALL_P (call))
20716 call = prev_nonnote_nondebug_insn (call);
20717 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20718 if (find_regno_fusage (call, USE, 3)
20719 || is_indirect_tailcall_p (call))
20720 return true;
20722 return false;
20726 /* Compute the distance from register FROM to register TO.
20727 These can be the arg pointer (26), the soft frame pointer (25),
20728 the stack pointer (13) or the hard frame pointer (11).
20729 In thumb mode r7 is used as the soft frame pointer, if needed.
20730 Typical stack layout looks like this:
20732 old stack pointer -> | |
20733 ----
20734 | | \
20735 | | saved arguments for
20736 | | vararg functions
20737 | | /
20739 hard FP & arg pointer -> | | \
20740 | | stack
20741 | | frame
20742 | | /
20744 | | \
20745 | | call saved
20746 | | registers
20747 soft frame pointer -> | | /
20749 | | \
20750 | | local
20751 | | variables
20752 locals base pointer -> | | /
20754 | | \
20755 | | outgoing
20756 | | arguments
20757 current stack pointer -> | | /
20760 For a given function some or all of these stack components
20761 may not be needed, giving rise to the possibility of
20762 eliminating some of the registers.
20764 The values returned by this function must reflect the behavior
20765 of arm_expand_prologue() and arm_compute_save_reg_mask().
20767 The sign of the number returned reflects the direction of stack
20768 growth, so the values are positive for all eliminations except
20769 from the soft frame pointer to the hard frame pointer.
20771 SFP may point just inside the local variables block to ensure correct
20772 alignment. */
20775 /* Calculate stack offsets. These are used to calculate register elimination
20776 offsets and in prologue/epilogue code. Also calculates which registers
20777 should be saved. */
20779 static arm_stack_offsets *
20780 arm_get_frame_offsets (void)
20782 struct arm_stack_offsets *offsets;
20783 unsigned long func_type;
20784 int saved;
20785 int core_saved;
20786 HOST_WIDE_INT frame_size;
20787 int i;
20789 offsets = &cfun->machine->stack_offsets;
20791 if (reload_completed)
20792 return offsets;
20794 /* Initially this is the size of the local variables. It will translated
20795 into an offset once we have determined the size of preceding data. */
20796 frame_size = ROUND_UP_WORD (get_frame_size ());
20798 /* Space for variadic functions. */
20799 offsets->saved_args = crtl->args.pretend_args_size;
20801 /* In Thumb mode this is incorrect, but never used. */
20802 offsets->frame
20803 = (offsets->saved_args
20804 + arm_compute_static_chain_stack_bytes ()
20805 + (frame_pointer_needed ? 4 : 0));
20807 if (TARGET_32BIT)
20809 unsigned int regno;
20811 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
20812 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20813 saved = core_saved;
20815 /* We know that SP will be doubleword aligned on entry, and we must
20816 preserve that condition at any subroutine call. We also require the
20817 soft frame pointer to be doubleword aligned. */
20819 if (TARGET_REALLY_IWMMXT)
20821 /* Check for the call-saved iWMMXt registers. */
20822 for (regno = FIRST_IWMMXT_REGNUM;
20823 regno <= LAST_IWMMXT_REGNUM;
20824 regno++)
20825 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20826 saved += 8;
20829 func_type = arm_current_func_type ();
20830 /* Space for saved VFP registers. */
20831 if (! IS_VOLATILE (func_type)
20832 && TARGET_HARD_FLOAT)
20833 saved += arm_get_vfp_saved_size ();
20835 else /* TARGET_THUMB1 */
20837 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
20838 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20839 saved = core_saved;
20840 if (TARGET_BACKTRACE)
20841 saved += 16;
20844 /* Saved registers include the stack frame. */
20845 offsets->saved_regs
20846 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
20847 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
20849 /* A leaf function does not need any stack alignment if it has nothing
20850 on the stack. */
20851 if (crtl->is_leaf && frame_size == 0
20852 /* However if it calls alloca(), we have a dynamically allocated
20853 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20854 && ! cfun->calls_alloca)
20856 offsets->outgoing_args = offsets->soft_frame;
20857 offsets->locals_base = offsets->soft_frame;
20858 return offsets;
20861 /* Ensure SFP has the correct alignment. */
20862 if (ARM_DOUBLEWORD_ALIGN
20863 && (offsets->soft_frame & 7))
20865 offsets->soft_frame += 4;
20866 /* Try to align stack by pushing an extra reg. Don't bother doing this
20867 when there is a stack frame as the alignment will be rolled into
20868 the normal stack adjustment. */
20869 if (frame_size + crtl->outgoing_args_size == 0)
20871 int reg = -1;
20873 /* Register r3 is caller-saved. Normally it does not need to be
20874 saved on entry by the prologue. However if we choose to save
20875 it for padding then we may confuse the compiler into thinking
20876 a prologue sequence is required when in fact it is not. This
20877 will occur when shrink-wrapping if r3 is used as a scratch
20878 register and there are no other callee-saved writes.
20880 This situation can be avoided when other callee-saved registers
20881 are available and r3 is not mandatory if we choose a callee-saved
20882 register for padding. */
20883 bool prefer_callee_reg_p = false;
20885 /* If it is safe to use r3, then do so. This sometimes
20886 generates better code on Thumb-2 by avoiding the need to
20887 use 32-bit push/pop instructions. */
20888 if (! any_sibcall_could_use_r3 ()
20889 && arm_size_return_regs () <= 12
20890 && (offsets->saved_regs_mask & (1 << 3)) == 0
20891 && (TARGET_THUMB2
20892 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
20894 reg = 3;
20895 if (!TARGET_THUMB2)
20896 prefer_callee_reg_p = true;
20898 if (reg == -1
20899 || prefer_callee_reg_p)
20901 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
20903 /* Avoid fixed registers; they may be changed at
20904 arbitrary times so it's unsafe to restore them
20905 during the epilogue. */
20906 if (!fixed_regs[i]
20907 && (offsets->saved_regs_mask & (1 << i)) == 0)
20909 reg = i;
20910 break;
20915 if (reg != -1)
20917 offsets->saved_regs += 4;
20918 offsets->saved_regs_mask |= (1 << reg);
20923 offsets->locals_base = offsets->soft_frame + frame_size;
20924 offsets->outgoing_args = (offsets->locals_base
20925 + crtl->outgoing_args_size);
20927 if (ARM_DOUBLEWORD_ALIGN)
20929 /* Ensure SP remains doubleword aligned. */
20930 if (offsets->outgoing_args & 7)
20931 offsets->outgoing_args += 4;
20932 gcc_assert (!(offsets->outgoing_args & 7));
20935 return offsets;
20939 /* Calculate the relative offsets for the different stack pointers. Positive
20940 offsets are in the direction of stack growth. */
20942 HOST_WIDE_INT
20943 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20945 arm_stack_offsets *offsets;
20947 offsets = arm_get_frame_offsets ();
20949 /* OK, now we have enough information to compute the distances.
20950 There must be an entry in these switch tables for each pair
20951 of registers in ELIMINABLE_REGS, even if some of the entries
20952 seem to be redundant or useless. */
20953 switch (from)
20955 case ARG_POINTER_REGNUM:
20956 switch (to)
20958 case THUMB_HARD_FRAME_POINTER_REGNUM:
20959 return 0;
20961 case FRAME_POINTER_REGNUM:
20962 /* This is the reverse of the soft frame pointer
20963 to hard frame pointer elimination below. */
20964 return offsets->soft_frame - offsets->saved_args;
20966 case ARM_HARD_FRAME_POINTER_REGNUM:
20967 /* This is only non-zero in the case where the static chain register
20968 is stored above the frame. */
20969 return offsets->frame - offsets->saved_args - 4;
20971 case STACK_POINTER_REGNUM:
20972 /* If nothing has been pushed on the stack at all
20973 then this will return -4. This *is* correct! */
20974 return offsets->outgoing_args - (offsets->saved_args + 4);
20976 default:
20977 gcc_unreachable ();
20979 gcc_unreachable ();
20981 case FRAME_POINTER_REGNUM:
20982 switch (to)
20984 case THUMB_HARD_FRAME_POINTER_REGNUM:
20985 return 0;
20987 case ARM_HARD_FRAME_POINTER_REGNUM:
20988 /* The hard frame pointer points to the top entry in the
20989 stack frame. The soft frame pointer to the bottom entry
20990 in the stack frame. If there is no stack frame at all,
20991 then they are identical. */
20993 return offsets->frame - offsets->soft_frame;
20995 case STACK_POINTER_REGNUM:
20996 return offsets->outgoing_args - offsets->soft_frame;
20998 default:
20999 gcc_unreachable ();
21001 gcc_unreachable ();
21003 default:
21004 /* You cannot eliminate from the stack pointer.
21005 In theory you could eliminate from the hard frame
21006 pointer to the stack pointer, but this will never
21007 happen, since if a stack frame is not needed the
21008 hard frame pointer will never be used. */
21009 gcc_unreachable ();
21013 /* Given FROM and TO register numbers, say whether this elimination is
21014 allowed. Frame pointer elimination is automatically handled.
21016 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
21017 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
21018 pointer, we must eliminate FRAME_POINTER_REGNUM into
21019 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
21020 ARG_POINTER_REGNUM. */
21022 bool
21023 arm_can_eliminate (const int from, const int to)
21025 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
21026 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
21027 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
21028 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
21029 true);
21032 /* Emit RTL to save coprocessor registers on function entry. Returns the
21033 number of bytes pushed. */
21035 static int
21036 arm_save_coproc_regs(void)
21038 int saved_size = 0;
21039 unsigned reg;
21040 unsigned start_reg;
21041 rtx insn;
21043 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
21044 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
21046 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21047 insn = gen_rtx_MEM (V2SImode, insn);
21048 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
21049 RTX_FRAME_RELATED_P (insn) = 1;
21050 saved_size += 8;
21053 if (TARGET_HARD_FLOAT)
21055 start_reg = FIRST_VFP_REGNUM;
21057 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
21059 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
21060 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
21062 if (start_reg != reg)
21063 saved_size += vfp_emit_fstmd (start_reg,
21064 (reg - start_reg) / 2);
21065 start_reg = reg + 2;
21068 if (start_reg != reg)
21069 saved_size += vfp_emit_fstmd (start_reg,
21070 (reg - start_reg) / 2);
21072 return saved_size;
21076 /* Set the Thumb frame pointer from the stack pointer. */
21078 static void
21079 thumb_set_frame_pointer (arm_stack_offsets *offsets)
21081 HOST_WIDE_INT amount;
21082 rtx insn, dwarf;
21084 amount = offsets->outgoing_args - offsets->locals_base;
21085 if (amount < 1024)
21086 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21087 stack_pointer_rtx, GEN_INT (amount)));
21088 else
21090 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21091 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21092 expects the first two operands to be the same. */
21093 if (TARGET_THUMB2)
21095 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21096 stack_pointer_rtx,
21097 hard_frame_pointer_rtx));
21099 else
21101 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21102 hard_frame_pointer_rtx,
21103 stack_pointer_rtx));
21105 dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
21106 plus_constant (Pmode, stack_pointer_rtx, amount));
21107 RTX_FRAME_RELATED_P (dwarf) = 1;
21108 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21111 RTX_FRAME_RELATED_P (insn) = 1;
21114 struct scratch_reg {
21115 rtx reg;
21116 bool saved;
21119 /* Return a short-lived scratch register for use as a 2nd scratch register on
21120 function entry after the registers are saved in the prologue. This register
21121 must be released by means of release_scratch_register_on_entry. IP is not
21122 considered since it is always used as the 1st scratch register if available.
21124 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
21125 mask of live registers. */
21127 static void
21128 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
21129 unsigned long live_regs)
21131 int regno = -1;
21133 sr->saved = false;
21135 if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
21136 regno = LR_REGNUM;
21137 else
21139 unsigned int i;
21141 for (i = 4; i < 11; i++)
21142 if (regno1 != i && (live_regs & (1 << i)) != 0)
21144 regno = i;
21145 break;
21148 if (regno < 0)
21150 /* If IP is used as the 1st scratch register for a nested function,
21151 then either r3 wasn't available or is used to preserve IP. */
21152 if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
21153 regno1 = 3;
21154 regno = (regno1 == 3 ? 2 : 3);
21155 sr->saved
21156 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
21157 regno);
21161 sr->reg = gen_rtx_REG (SImode, regno);
21162 if (sr->saved)
21164 rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21165 rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
21166 rtx x = gen_rtx_SET (stack_pointer_rtx,
21167 plus_constant (Pmode, stack_pointer_rtx, -4));
21168 RTX_FRAME_RELATED_P (insn) = 1;
21169 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21173 /* Release a scratch register obtained from the preceding function. */
21175 static void
21176 release_scratch_register_on_entry (struct scratch_reg *sr)
21178 if (sr->saved)
21180 rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
21181 rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
21182 rtx x = gen_rtx_SET (stack_pointer_rtx,
21183 plus_constant (Pmode, stack_pointer_rtx, 4));
21184 RTX_FRAME_RELATED_P (insn) = 1;
21185 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21189 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
21191 #if PROBE_INTERVAL > 4096
21192 #error Cannot use indexed addressing mode for stack probing
21193 #endif
21195 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
21196 inclusive. These are offsets from the current stack pointer. REGNO1
21197 is the index number of the 1st scratch register and LIVE_REGS is the
21198 mask of live registers. */
21200 static void
21201 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
21202 unsigned int regno1, unsigned long live_regs)
21204 rtx reg1 = gen_rtx_REG (Pmode, regno1);
21206 /* See if we have a constant small number of probes to generate. If so,
21207 that's the easy case. */
21208 if (size <= PROBE_INTERVAL)
21210 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21211 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21212 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
21215 /* The run-time loop is made up of 10 insns in the generic case while the
21216 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
21217 else if (size <= 5 * PROBE_INTERVAL)
21219 HOST_WIDE_INT i, rem;
21221 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21222 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21223 emit_stack_probe (reg1);
21225 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
21226 it exceeds SIZE. If only two probes are needed, this will not
21227 generate any code. Then probe at FIRST + SIZE. */
21228 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
21230 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21231 emit_stack_probe (reg1);
21234 rem = size - (i - PROBE_INTERVAL);
21235 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21237 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21238 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
21240 else
21241 emit_stack_probe (plus_constant (Pmode, reg1, -rem));
21244 /* Otherwise, do the same as above, but in a loop. Note that we must be
21245 extra careful with variables wrapping around because we might be at
21246 the very top (or the very bottom) of the address space and we have
21247 to be able to handle this case properly; in particular, we use an
21248 equality test for the loop condition. */
21249 else
21251 HOST_WIDE_INT rounded_size;
21252 struct scratch_reg sr;
21254 get_scratch_register_on_entry (&sr, regno1, live_regs);
21256 emit_move_insn (reg1, GEN_INT (first));
21259 /* Step 1: round SIZE to the previous multiple of the interval. */
21261 rounded_size = size & -PROBE_INTERVAL;
21262 emit_move_insn (sr.reg, GEN_INT (rounded_size));
21265 /* Step 2: compute initial and final value of the loop counter. */
21267 /* TEST_ADDR = SP + FIRST. */
21268 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21270 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
21271 emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
21274 /* Step 3: the loop
21278 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
21279 probe at TEST_ADDR
21281 while (TEST_ADDR != LAST_ADDR)
21283 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
21284 until it is equal to ROUNDED_SIZE. */
21286 emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
21289 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
21290 that SIZE is equal to ROUNDED_SIZE. */
21292 if (size != rounded_size)
21294 HOST_WIDE_INT rem = size - rounded_size;
21296 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21298 emit_set_insn (sr.reg,
21299 plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
21300 emit_stack_probe (plus_constant (Pmode, sr.reg,
21301 PROBE_INTERVAL - rem));
21303 else
21304 emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
21307 release_scratch_register_on_entry (&sr);
21310 /* Make sure nothing is scheduled before we are done. */
21311 emit_insn (gen_blockage ());
21314 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
21315 absolute addresses. */
21317 const char *
21318 output_probe_stack_range (rtx reg1, rtx reg2)
21320 static int labelno = 0;
21321 char loop_lab[32];
21322 rtx xops[2];
21324 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
21326 /* Loop. */
21327 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
21329 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
21330 xops[0] = reg1;
21331 xops[1] = GEN_INT (PROBE_INTERVAL);
21332 output_asm_insn ("sub\t%0, %0, %1", xops);
21334 /* Probe at TEST_ADDR. */
21335 output_asm_insn ("str\tr0, [%0, #0]", xops);
21337 /* Test if TEST_ADDR == LAST_ADDR. */
21338 xops[1] = reg2;
21339 output_asm_insn ("cmp\t%0, %1", xops);
21341 /* Branch. */
21342 fputs ("\tbne\t", asm_out_file);
21343 assemble_name_raw (asm_out_file, loop_lab);
21344 fputc ('\n', asm_out_file);
21346 return "";
21349 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21350 function. */
21351 void
21352 arm_expand_prologue (void)
21354 rtx amount;
21355 rtx insn;
21356 rtx ip_rtx;
21357 unsigned long live_regs_mask;
21358 unsigned long func_type;
21359 int fp_offset = 0;
21360 int saved_pretend_args = 0;
21361 int saved_regs = 0;
21362 unsigned HOST_WIDE_INT args_to_push;
21363 HOST_WIDE_INT size;
21364 arm_stack_offsets *offsets;
21365 bool clobber_ip;
21367 func_type = arm_current_func_type ();
21369 /* Naked functions don't have prologues. */
21370 if (IS_NAKED (func_type))
21372 if (flag_stack_usage_info)
21373 current_function_static_stack_size = 0;
21374 return;
21377 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21378 args_to_push = crtl->args.pretend_args_size;
21380 /* Compute which register we will have to save onto the stack. */
21381 offsets = arm_get_frame_offsets ();
21382 live_regs_mask = offsets->saved_regs_mask;
21384 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21386 if (IS_STACKALIGN (func_type))
21388 rtx r0, r1;
21390 /* Handle a word-aligned stack pointer. We generate the following:
21392 mov r0, sp
21393 bic r1, r0, #7
21394 mov sp, r1
21395 <save and restore r0 in normal prologue/epilogue>
21396 mov sp, r0
21397 bx lr
21399 The unwinder doesn't need to know about the stack realignment.
21400 Just tell it we saved SP in r0. */
21401 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21403 r0 = gen_rtx_REG (SImode, R0_REGNUM);
21404 r1 = gen_rtx_REG (SImode, R1_REGNUM);
21406 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21407 RTX_FRAME_RELATED_P (insn) = 1;
21408 add_reg_note (insn, REG_CFA_REGISTER, NULL);
21410 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21412 /* ??? The CFA changes here, which may cause GDB to conclude that it
21413 has entered a different function. That said, the unwind info is
21414 correct, individually, before and after this instruction because
21415 we've described the save of SP, which will override the default
21416 handling of SP as restoring from the CFA. */
21417 emit_insn (gen_movsi (stack_pointer_rtx, r1));
21420 /* The static chain register is the same as the IP register. If it is
21421 clobbered when creating the frame, we need to save and restore it. */
21422 clobber_ip = IS_NESTED (func_type)
21423 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21424 || (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21425 && !df_regs_ever_live_p (LR_REGNUM)
21426 && arm_r3_live_at_start_p ()));
21428 /* Find somewhere to store IP whilst the frame is being created.
21429 We try the following places in order:
21431 1. The last argument register r3 if it is available.
21432 2. A slot on the stack above the frame if there are no
21433 arguments to push onto the stack.
21434 3. Register r3 again, after pushing the argument registers
21435 onto the stack, if this is a varargs function.
21436 4. The last slot on the stack created for the arguments to
21437 push, if this isn't a varargs function.
21439 Note - we only need to tell the dwarf2 backend about the SP
21440 adjustment in the second variant; the static chain register
21441 doesn't need to be unwound, as it doesn't contain a value
21442 inherited from the caller. */
21443 if (clobber_ip)
21445 if (!arm_r3_live_at_start_p ())
21446 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21447 else if (args_to_push == 0)
21449 rtx addr, dwarf;
21451 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21452 saved_regs += 4;
21454 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21455 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21456 fp_offset = 4;
21458 /* Just tell the dwarf backend that we adjusted SP. */
21459 dwarf = gen_rtx_SET (stack_pointer_rtx,
21460 plus_constant (Pmode, stack_pointer_rtx,
21461 -fp_offset));
21462 RTX_FRAME_RELATED_P (insn) = 1;
21463 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21465 else
21467 /* Store the args on the stack. */
21468 if (cfun->machine->uses_anonymous_args)
21470 insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21471 (0xf0 >> (args_to_push / 4)) & 0xf);
21472 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21473 saved_pretend_args = 1;
21475 else
21477 rtx addr, dwarf;
21479 if (args_to_push == 4)
21480 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21481 else
21482 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21483 plus_constant (Pmode,
21484 stack_pointer_rtx,
21485 -args_to_push));
21487 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21489 /* Just tell the dwarf backend that we adjusted SP. */
21490 dwarf = gen_rtx_SET (stack_pointer_rtx,
21491 plus_constant (Pmode, stack_pointer_rtx,
21492 -args_to_push));
21493 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21496 RTX_FRAME_RELATED_P (insn) = 1;
21497 fp_offset = args_to_push;
21498 args_to_push = 0;
21502 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21504 if (IS_INTERRUPT (func_type))
21506 /* Interrupt functions must not corrupt any registers.
21507 Creating a frame pointer however, corrupts the IP
21508 register, so we must push it first. */
21509 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21511 /* Do not set RTX_FRAME_RELATED_P on this insn.
21512 The dwarf stack unwinding code only wants to see one
21513 stack decrement per function, and this is not it. If
21514 this instruction is labeled as being part of the frame
21515 creation sequence then dwarf2out_frame_debug_expr will
21516 die when it encounters the assignment of IP to FP
21517 later on, since the use of SP here establishes SP as
21518 the CFA register and not IP.
21520 Anyway this instruction is not really part of the stack
21521 frame creation although it is part of the prologue. */
21524 insn = emit_set_insn (ip_rtx,
21525 plus_constant (Pmode, stack_pointer_rtx,
21526 fp_offset));
21527 RTX_FRAME_RELATED_P (insn) = 1;
21530 if (args_to_push)
21532 /* Push the argument registers, or reserve space for them. */
21533 if (cfun->machine->uses_anonymous_args)
21534 insn = emit_multi_reg_push
21535 ((0xf0 >> (args_to_push / 4)) & 0xf,
21536 (0xf0 >> (args_to_push / 4)) & 0xf);
21537 else
21538 insn = emit_insn
21539 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21540 GEN_INT (- args_to_push)));
21541 RTX_FRAME_RELATED_P (insn) = 1;
21544 /* If this is an interrupt service routine, and the link register
21545 is going to be pushed, and we're not generating extra
21546 push of IP (needed when frame is needed and frame layout if apcs),
21547 subtracting four from LR now will mean that the function return
21548 can be done with a single instruction. */
21549 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21550 && (live_regs_mask & (1 << LR_REGNUM)) != 0
21551 && !(frame_pointer_needed && TARGET_APCS_FRAME)
21552 && TARGET_ARM)
21554 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21556 emit_set_insn (lr, plus_constant (SImode, lr, -4));
21559 if (live_regs_mask)
21561 unsigned long dwarf_regs_mask = live_regs_mask;
21563 saved_regs += bit_count (live_regs_mask) * 4;
21564 if (optimize_size && !frame_pointer_needed
21565 && saved_regs == offsets->saved_regs - offsets->saved_args)
21567 /* If no coprocessor registers are being pushed and we don't have
21568 to worry about a frame pointer then push extra registers to
21569 create the stack frame. This is done is a way that does not
21570 alter the frame layout, so is independent of the epilogue. */
21571 int n;
21572 int frame;
21573 n = 0;
21574 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21575 n++;
21576 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21577 if (frame && n * 4 >= frame)
21579 n = frame / 4;
21580 live_regs_mask |= (1 << n) - 1;
21581 saved_regs += frame;
21585 if (TARGET_LDRD
21586 && current_tune->prefer_ldrd_strd
21587 && !optimize_function_for_size_p (cfun))
21589 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21590 if (TARGET_THUMB2)
21591 thumb2_emit_strd_push (live_regs_mask);
21592 else if (TARGET_ARM
21593 && !TARGET_APCS_FRAME
21594 && !IS_INTERRUPT (func_type))
21595 arm_emit_strd_push (live_regs_mask);
21596 else
21598 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21599 RTX_FRAME_RELATED_P (insn) = 1;
21602 else
21604 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21605 RTX_FRAME_RELATED_P (insn) = 1;
21609 if (! IS_VOLATILE (func_type))
21610 saved_regs += arm_save_coproc_regs ();
21612 if (frame_pointer_needed && TARGET_ARM)
21614 /* Create the new frame pointer. */
21615 if (TARGET_APCS_FRAME)
21617 insn = GEN_INT (-(4 + args_to_push + fp_offset));
21618 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21619 RTX_FRAME_RELATED_P (insn) = 1;
21621 else
21623 insn = GEN_INT (saved_regs - (4 + fp_offset));
21624 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21625 stack_pointer_rtx, insn));
21626 RTX_FRAME_RELATED_P (insn) = 1;
21630 size = offsets->outgoing_args - offsets->saved_args;
21631 if (flag_stack_usage_info)
21632 current_function_static_stack_size = size;
21634 /* If this isn't an interrupt service routine and we have a frame, then do
21635 stack checking. We use IP as the first scratch register, except for the
21636 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
21637 if (!IS_INTERRUPT (func_type)
21638 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
21640 unsigned int regno;
21642 if (!IS_NESTED (func_type) || clobber_ip)
21643 regno = IP_REGNUM;
21644 else if (df_regs_ever_live_p (LR_REGNUM))
21645 regno = LR_REGNUM;
21646 else
21647 regno = 3;
21649 if (crtl->is_leaf && !cfun->calls_alloca)
21651 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
21652 arm_emit_probe_stack_range (STACK_CHECK_PROTECT,
21653 size - STACK_CHECK_PROTECT,
21654 regno, live_regs_mask);
21656 else if (size > 0)
21657 arm_emit_probe_stack_range (STACK_CHECK_PROTECT, size,
21658 regno, live_regs_mask);
21661 /* Recover the static chain register. */
21662 if (clobber_ip)
21664 if (!arm_r3_live_at_start_p () || saved_pretend_args)
21665 insn = gen_rtx_REG (SImode, 3);
21666 else
21668 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21669 insn = gen_frame_mem (SImode, insn);
21671 emit_set_insn (ip_rtx, insn);
21672 emit_insn (gen_force_register_use (ip_rtx));
21675 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21677 /* This add can produce multiple insns for a large constant, so we
21678 need to get tricky. */
21679 rtx_insn *last = get_last_insn ();
21681 amount = GEN_INT (offsets->saved_args + saved_regs
21682 - offsets->outgoing_args);
21684 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21685 amount));
21688 last = last ? NEXT_INSN (last) : get_insns ();
21689 RTX_FRAME_RELATED_P (last) = 1;
21691 while (last != insn);
21693 /* If the frame pointer is needed, emit a special barrier that
21694 will prevent the scheduler from moving stores to the frame
21695 before the stack adjustment. */
21696 if (frame_pointer_needed)
21697 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
21698 hard_frame_pointer_rtx));
21702 if (frame_pointer_needed && TARGET_THUMB2)
21703 thumb_set_frame_pointer (offsets);
21705 if (flag_pic && arm_pic_register != INVALID_REGNUM)
21707 unsigned long mask;
21709 mask = live_regs_mask;
21710 mask &= THUMB2_WORK_REGS;
21711 if (!IS_NESTED (func_type))
21712 mask |= (1 << IP_REGNUM);
21713 arm_load_pic_register (mask);
21716 /* If we are profiling, make sure no instructions are scheduled before
21717 the call to mcount. Similarly if the user has requested no
21718 scheduling in the prolog. Similarly if we want non-call exceptions
21719 using the EABI unwinder, to prevent faulting instructions from being
21720 swapped with a stack adjustment. */
21721 if (crtl->profile || !TARGET_SCHED_PROLOG
21722 || (arm_except_unwind_info (&global_options) == UI_TARGET
21723 && cfun->can_throw_non_call_exceptions))
21724 emit_insn (gen_blockage ());
21726 /* If the link register is being kept alive, with the return address in it,
21727 then make sure that it does not get reused by the ce2 pass. */
21728 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
21729 cfun->machine->lr_save_eliminated = 1;
21732 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21733 static void
21734 arm_print_condition (FILE *stream)
21736 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
21738 /* Branch conversion is not implemented for Thumb-2. */
21739 if (TARGET_THUMB)
21741 output_operand_lossage ("predicated Thumb instruction");
21742 return;
21744 if (current_insn_predicate != NULL)
21746 output_operand_lossage
21747 ("predicated instruction in conditional sequence");
21748 return;
21751 fputs (arm_condition_codes[arm_current_cc], stream);
21753 else if (current_insn_predicate)
21755 enum arm_cond_code code;
21757 if (TARGET_THUMB1)
21759 output_operand_lossage ("predicated Thumb instruction");
21760 return;
21763 code = get_arm_condition_code (current_insn_predicate);
21764 fputs (arm_condition_codes[code], stream);
21769 /* Globally reserved letters: acln
21770 Puncutation letters currently used: @_|?().!#
21771 Lower case letters currently used: bcdefhimpqtvwxyz
21772 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21773 Letters previously used, but now deprecated/obsolete: sVWXYZ.
21775 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21777 If CODE is 'd', then the X is a condition operand and the instruction
21778 should only be executed if the condition is true.
21779 if CODE is 'D', then the X is a condition operand and the instruction
21780 should only be executed if the condition is false: however, if the mode
21781 of the comparison is CCFPEmode, then always execute the instruction -- we
21782 do this because in these circumstances !GE does not necessarily imply LT;
21783 in these cases the instruction pattern will take care to make sure that
21784 an instruction containing %d will follow, thereby undoing the effects of
21785 doing this instruction unconditionally.
21786 If CODE is 'N' then X is a floating point operand that must be negated
21787 before output.
21788 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21789 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21790 static void
21791 arm_print_operand (FILE *stream, rtx x, int code)
21793 switch (code)
21795 case '@':
21796 fputs (ASM_COMMENT_START, stream);
21797 return;
21799 case '_':
21800 fputs (user_label_prefix, stream);
21801 return;
21803 case '|':
21804 fputs (REGISTER_PREFIX, stream);
21805 return;
21807 case '?':
21808 arm_print_condition (stream);
21809 return;
21811 case '.':
21812 /* The current condition code for a condition code setting instruction.
21813 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21814 fputc('s', stream);
21815 arm_print_condition (stream);
21816 return;
21818 case '!':
21819 /* If the instruction is conditionally executed then print
21820 the current condition code, otherwise print 's'. */
21821 gcc_assert (TARGET_THUMB2);
21822 if (current_insn_predicate)
21823 arm_print_condition (stream);
21824 else
21825 fputc('s', stream);
21826 break;
21828 /* %# is a "break" sequence. It doesn't output anything, but is used to
21829 separate e.g. operand numbers from following text, if that text consists
21830 of further digits which we don't want to be part of the operand
21831 number. */
21832 case '#':
21833 return;
21835 case 'N':
21837 REAL_VALUE_TYPE r;
21838 r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
21839 fprintf (stream, "%s", fp_const_from_val (&r));
21841 return;
21843 /* An integer or symbol address without a preceding # sign. */
21844 case 'c':
21845 switch (GET_CODE (x))
21847 case CONST_INT:
21848 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
21849 break;
21851 case SYMBOL_REF:
21852 output_addr_const (stream, x);
21853 break;
21855 case CONST:
21856 if (GET_CODE (XEXP (x, 0)) == PLUS
21857 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
21859 output_addr_const (stream, x);
21860 break;
21862 /* Fall through. */
21864 default:
21865 output_operand_lossage ("Unsupported operand for code '%c'", code);
21867 return;
21869 /* An integer that we want to print in HEX. */
21870 case 'x':
21871 switch (GET_CODE (x))
21873 case CONST_INT:
21874 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
21875 break;
21877 default:
21878 output_operand_lossage ("Unsupported operand for code '%c'", code);
21880 return;
21882 case 'B':
21883 if (CONST_INT_P (x))
21885 HOST_WIDE_INT val;
21886 val = ARM_SIGN_EXTEND (~INTVAL (x));
21887 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
21889 else
21891 putc ('~', stream);
21892 output_addr_const (stream, x);
21894 return;
21896 case 'b':
21897 /* Print the log2 of a CONST_INT. */
21899 HOST_WIDE_INT val;
21901 if (!CONST_INT_P (x)
21902 || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
21903 output_operand_lossage ("Unsupported operand for code '%c'", code);
21904 else
21905 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21907 return;
21909 case 'L':
21910 /* The low 16 bits of an immediate constant. */
21911 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
21912 return;
21914 case 'i':
21915 fprintf (stream, "%s", arithmetic_instr (x, 1));
21916 return;
21918 case 'I':
21919 fprintf (stream, "%s", arithmetic_instr (x, 0));
21920 return;
21922 case 'S':
21924 HOST_WIDE_INT val;
21925 const char *shift;
21927 shift = shift_op (x, &val);
21929 if (shift)
21931 fprintf (stream, ", %s ", shift);
21932 if (val == -1)
21933 arm_print_operand (stream, XEXP (x, 1), 0);
21934 else
21935 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21938 return;
21940 /* An explanation of the 'Q', 'R' and 'H' register operands:
21942 In a pair of registers containing a DI or DF value the 'Q'
21943 operand returns the register number of the register containing
21944 the least significant part of the value. The 'R' operand returns
21945 the register number of the register containing the most
21946 significant part of the value.
21948 The 'H' operand returns the higher of the two register numbers.
21949 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21950 same as the 'Q' operand, since the most significant part of the
21951 value is held in the lower number register. The reverse is true
21952 on systems where WORDS_BIG_ENDIAN is false.
21954 The purpose of these operands is to distinguish between cases
21955 where the endian-ness of the values is important (for example
21956 when they are added together), and cases where the endian-ness
21957 is irrelevant, but the order of register operations is important.
21958 For example when loading a value from memory into a register
21959 pair, the endian-ness does not matter. Provided that the value
21960 from the lower memory address is put into the lower numbered
21961 register, and the value from the higher address is put into the
21962 higher numbered register, the load will work regardless of whether
21963 the value being loaded is big-wordian or little-wordian. The
21964 order of the two register loads can matter however, if the address
21965 of the memory location is actually held in one of the registers
21966 being overwritten by the load.
21968 The 'Q' and 'R' constraints are also available for 64-bit
21969 constants. */
21970 case 'Q':
21971 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21973 rtx part = gen_lowpart (SImode, x);
21974 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21975 return;
21978 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21980 output_operand_lossage ("invalid operand for code '%c'", code);
21981 return;
21984 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
21985 return;
21987 case 'R':
21988 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21990 machine_mode mode = GET_MODE (x);
21991 rtx part;
21993 if (mode == VOIDmode)
21994 mode = DImode;
21995 part = gen_highpart_mode (SImode, mode, x);
21996 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21997 return;
22000 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22002 output_operand_lossage ("invalid operand for code '%c'", code);
22003 return;
22006 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
22007 return;
22009 case 'H':
22010 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22012 output_operand_lossage ("invalid operand for code '%c'", code);
22013 return;
22016 asm_fprintf (stream, "%r", REGNO (x) + 1);
22017 return;
22019 case 'J':
22020 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22022 output_operand_lossage ("invalid operand for code '%c'", code);
22023 return;
22026 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
22027 return;
22029 case 'K':
22030 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22032 output_operand_lossage ("invalid operand for code '%c'", code);
22033 return;
22036 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
22037 return;
22039 case 'm':
22040 asm_fprintf (stream, "%r",
22041 REG_P (XEXP (x, 0))
22042 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
22043 return;
22045 case 'M':
22046 asm_fprintf (stream, "{%r-%r}",
22047 REGNO (x),
22048 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
22049 return;
22051 /* Like 'M', but writing doubleword vector registers, for use by Neon
22052 insns. */
22053 case 'h':
22055 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
22056 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
22057 if (numregs == 1)
22058 asm_fprintf (stream, "{d%d}", regno);
22059 else
22060 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
22062 return;
22064 case 'd':
22065 /* CONST_TRUE_RTX means always -- that's the default. */
22066 if (x == const_true_rtx)
22067 return;
22069 if (!COMPARISON_P (x))
22071 output_operand_lossage ("invalid operand for code '%c'", code);
22072 return;
22075 fputs (arm_condition_codes[get_arm_condition_code (x)],
22076 stream);
22077 return;
22079 case 'D':
22080 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
22081 want to do that. */
22082 if (x == const_true_rtx)
22084 output_operand_lossage ("instruction never executed");
22085 return;
22087 if (!COMPARISON_P (x))
22089 output_operand_lossage ("invalid operand for code '%c'", code);
22090 return;
22093 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
22094 (get_arm_condition_code (x))],
22095 stream);
22096 return;
22098 case 's':
22099 case 'V':
22100 case 'W':
22101 case 'X':
22102 case 'Y':
22103 case 'Z':
22104 /* Former Maverick support, removed after GCC-4.7. */
22105 output_operand_lossage ("obsolete Maverick format code '%c'", code);
22106 return;
22108 case 'U':
22109 if (!REG_P (x)
22110 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
22111 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
22112 /* Bad value for wCG register number. */
22114 output_operand_lossage ("invalid operand for code '%c'", code);
22115 return;
22118 else
22119 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
22120 return;
22122 /* Print an iWMMXt control register name. */
22123 case 'w':
22124 if (!CONST_INT_P (x)
22125 || INTVAL (x) < 0
22126 || INTVAL (x) >= 16)
22127 /* Bad value for wC register number. */
22129 output_operand_lossage ("invalid operand for code '%c'", code);
22130 return;
22133 else
22135 static const char * wc_reg_names [16] =
22137 "wCID", "wCon", "wCSSF", "wCASF",
22138 "wC4", "wC5", "wC6", "wC7",
22139 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
22140 "wC12", "wC13", "wC14", "wC15"
22143 fputs (wc_reg_names [INTVAL (x)], stream);
22145 return;
22147 /* Print the high single-precision register of a VFP double-precision
22148 register. */
22149 case 'p':
22151 machine_mode mode = GET_MODE (x);
22152 int regno;
22154 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
22156 output_operand_lossage ("invalid operand for code '%c'", code);
22157 return;
22160 regno = REGNO (x);
22161 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
22163 output_operand_lossage ("invalid operand for code '%c'", code);
22164 return;
22167 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
22169 return;
22171 /* Print a VFP/Neon double precision or quad precision register name. */
22172 case 'P':
22173 case 'q':
22175 machine_mode mode = GET_MODE (x);
22176 int is_quad = (code == 'q');
22177 int regno;
22179 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
22181 output_operand_lossage ("invalid operand for code '%c'", code);
22182 return;
22185 if (!REG_P (x)
22186 || !IS_VFP_REGNUM (REGNO (x)))
22188 output_operand_lossage ("invalid operand for code '%c'", code);
22189 return;
22192 regno = REGNO (x);
22193 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
22194 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
22196 output_operand_lossage ("invalid operand for code '%c'", code);
22197 return;
22200 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
22201 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
22203 return;
22205 /* These two codes print the low/high doubleword register of a Neon quad
22206 register, respectively. For pair-structure types, can also print
22207 low/high quadword registers. */
22208 case 'e':
22209 case 'f':
22211 machine_mode mode = GET_MODE (x);
22212 int regno;
22214 if ((GET_MODE_SIZE (mode) != 16
22215 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
22217 output_operand_lossage ("invalid operand for code '%c'", code);
22218 return;
22221 regno = REGNO (x);
22222 if (!NEON_REGNO_OK_FOR_QUAD (regno))
22224 output_operand_lossage ("invalid operand for code '%c'", code);
22225 return;
22228 if (GET_MODE_SIZE (mode) == 16)
22229 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
22230 + (code == 'f' ? 1 : 0));
22231 else
22232 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
22233 + (code == 'f' ? 1 : 0));
22235 return;
22237 /* Print a VFPv3 floating-point constant, represented as an integer
22238 index. */
22239 case 'G':
22241 int index = vfp3_const_double_index (x);
22242 gcc_assert (index != -1);
22243 fprintf (stream, "%d", index);
22245 return;
22247 /* Print bits representing opcode features for Neon.
22249 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
22250 and polynomials as unsigned.
22252 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22254 Bit 2 is 1 for rounding functions, 0 otherwise. */
22256 /* Identify the type as 's', 'u', 'p' or 'f'. */
22257 case 'T':
22259 HOST_WIDE_INT bits = INTVAL (x);
22260 fputc ("uspf"[bits & 3], stream);
22262 return;
22264 /* Likewise, but signed and unsigned integers are both 'i'. */
22265 case 'F':
22267 HOST_WIDE_INT bits = INTVAL (x);
22268 fputc ("iipf"[bits & 3], stream);
22270 return;
22272 /* As for 'T', but emit 'u' instead of 'p'. */
22273 case 't':
22275 HOST_WIDE_INT bits = INTVAL (x);
22276 fputc ("usuf"[bits & 3], stream);
22278 return;
22280 /* Bit 2: rounding (vs none). */
22281 case 'O':
22283 HOST_WIDE_INT bits = INTVAL (x);
22284 fputs ((bits & 4) != 0 ? "r" : "", stream);
22286 return;
22288 /* Memory operand for vld1/vst1 instruction. */
22289 case 'A':
22291 rtx addr;
22292 bool postinc = FALSE;
22293 rtx postinc_reg = NULL;
22294 unsigned align, memsize, align_bits;
22296 gcc_assert (MEM_P (x));
22297 addr = XEXP (x, 0);
22298 if (GET_CODE (addr) == POST_INC)
22300 postinc = 1;
22301 addr = XEXP (addr, 0);
22303 if (GET_CODE (addr) == POST_MODIFY)
22305 postinc_reg = XEXP( XEXP (addr, 1), 1);
22306 addr = XEXP (addr, 0);
22308 asm_fprintf (stream, "[%r", REGNO (addr));
22310 /* We know the alignment of this access, so we can emit a hint in the
22311 instruction (for some alignments) as an aid to the memory subsystem
22312 of the target. */
22313 align = MEM_ALIGN (x) >> 3;
22314 memsize = MEM_SIZE (x);
22316 /* Only certain alignment specifiers are supported by the hardware. */
22317 if (memsize == 32 && (align % 32) == 0)
22318 align_bits = 256;
22319 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
22320 align_bits = 128;
22321 else if (memsize >= 8 && (align % 8) == 0)
22322 align_bits = 64;
22323 else
22324 align_bits = 0;
22326 if (align_bits != 0)
22327 asm_fprintf (stream, ":%d", align_bits);
22329 asm_fprintf (stream, "]");
22331 if (postinc)
22332 fputs("!", stream);
22333 if (postinc_reg)
22334 asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22336 return;
22338 case 'C':
22340 rtx addr;
22342 gcc_assert (MEM_P (x));
22343 addr = XEXP (x, 0);
22344 gcc_assert (REG_P (addr));
22345 asm_fprintf (stream, "[%r]", REGNO (addr));
22347 return;
22349 /* Translate an S register number into a D register number and element index. */
22350 case 'y':
22352 machine_mode mode = GET_MODE (x);
22353 int regno;
22355 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22357 output_operand_lossage ("invalid operand for code '%c'", code);
22358 return;
22361 regno = REGNO (x);
22362 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22364 output_operand_lossage ("invalid operand for code '%c'", code);
22365 return;
22368 regno = regno - FIRST_VFP_REGNUM;
22369 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22371 return;
22373 case 'v':
22374 gcc_assert (CONST_DOUBLE_P (x));
22375 int result;
22376 result = vfp3_const_double_for_fract_bits (x);
22377 if (result == 0)
22378 result = vfp3_const_double_for_bits (x);
22379 fprintf (stream, "#%d", result);
22380 return;
22382 /* Register specifier for vld1.16/vst1.16. Translate the S register
22383 number into a D register number and element index. */
22384 case 'z':
22386 machine_mode mode = GET_MODE (x);
22387 int regno;
22389 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22391 output_operand_lossage ("invalid operand for code '%c'", code);
22392 return;
22395 regno = REGNO (x);
22396 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22398 output_operand_lossage ("invalid operand for code '%c'", code);
22399 return;
22402 regno = regno - FIRST_VFP_REGNUM;
22403 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22405 return;
22407 default:
22408 if (x == 0)
22410 output_operand_lossage ("missing operand");
22411 return;
22414 switch (GET_CODE (x))
22416 case REG:
22417 asm_fprintf (stream, "%r", REGNO (x));
22418 break;
22420 case MEM:
22421 output_address (GET_MODE (x), XEXP (x, 0));
22422 break;
22424 case CONST_DOUBLE:
22426 char fpstr[20];
22427 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22428 sizeof (fpstr), 0, 1);
22429 fprintf (stream, "#%s", fpstr);
22431 break;
22433 default:
22434 gcc_assert (GET_CODE (x) != NEG);
22435 fputc ('#', stream);
22436 if (GET_CODE (x) == HIGH)
22438 fputs (":lower16:", stream);
22439 x = XEXP (x, 0);
22442 output_addr_const (stream, x);
22443 break;
22448 /* Target hook for printing a memory address. */
22449 static void
22450 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
22452 if (TARGET_32BIT)
22454 int is_minus = GET_CODE (x) == MINUS;
22456 if (REG_P (x))
22457 asm_fprintf (stream, "[%r]", REGNO (x));
22458 else if (GET_CODE (x) == PLUS || is_minus)
22460 rtx base = XEXP (x, 0);
22461 rtx index = XEXP (x, 1);
22462 HOST_WIDE_INT offset = 0;
22463 if (!REG_P (base)
22464 || (REG_P (index) && REGNO (index) == SP_REGNUM))
22466 /* Ensure that BASE is a register. */
22467 /* (one of them must be). */
22468 /* Also ensure the SP is not used as in index register. */
22469 std::swap (base, index);
22471 switch (GET_CODE (index))
22473 case CONST_INT:
22474 offset = INTVAL (index);
22475 if (is_minus)
22476 offset = -offset;
22477 asm_fprintf (stream, "[%r, #%wd]",
22478 REGNO (base), offset);
22479 break;
22481 case REG:
22482 asm_fprintf (stream, "[%r, %s%r]",
22483 REGNO (base), is_minus ? "-" : "",
22484 REGNO (index));
22485 break;
22487 case MULT:
22488 case ASHIFTRT:
22489 case LSHIFTRT:
22490 case ASHIFT:
22491 case ROTATERT:
22493 asm_fprintf (stream, "[%r, %s%r",
22494 REGNO (base), is_minus ? "-" : "",
22495 REGNO (XEXP (index, 0)));
22496 arm_print_operand (stream, index, 'S');
22497 fputs ("]", stream);
22498 break;
22501 default:
22502 gcc_unreachable ();
22505 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22506 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22508 gcc_assert (REG_P (XEXP (x, 0)));
22510 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22511 asm_fprintf (stream, "[%r, #%s%d]!",
22512 REGNO (XEXP (x, 0)),
22513 GET_CODE (x) == PRE_DEC ? "-" : "",
22514 GET_MODE_SIZE (mode));
22515 else
22516 asm_fprintf (stream, "[%r], #%s%d",
22517 REGNO (XEXP (x, 0)),
22518 GET_CODE (x) == POST_DEC ? "-" : "",
22519 GET_MODE_SIZE (mode));
22521 else if (GET_CODE (x) == PRE_MODIFY)
22523 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22524 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22525 asm_fprintf (stream, "#%wd]!",
22526 INTVAL (XEXP (XEXP (x, 1), 1)));
22527 else
22528 asm_fprintf (stream, "%r]!",
22529 REGNO (XEXP (XEXP (x, 1), 1)));
22531 else if (GET_CODE (x) == POST_MODIFY)
22533 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22534 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22535 asm_fprintf (stream, "#%wd",
22536 INTVAL (XEXP (XEXP (x, 1), 1)));
22537 else
22538 asm_fprintf (stream, "%r",
22539 REGNO (XEXP (XEXP (x, 1), 1)));
22541 else output_addr_const (stream, x);
22543 else
22545 if (REG_P (x))
22546 asm_fprintf (stream, "[%r]", REGNO (x));
22547 else if (GET_CODE (x) == POST_INC)
22548 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22549 else if (GET_CODE (x) == PLUS)
22551 gcc_assert (REG_P (XEXP (x, 0)));
22552 if (CONST_INT_P (XEXP (x, 1)))
22553 asm_fprintf (stream, "[%r, #%wd]",
22554 REGNO (XEXP (x, 0)),
22555 INTVAL (XEXP (x, 1)));
22556 else
22557 asm_fprintf (stream, "[%r, %r]",
22558 REGNO (XEXP (x, 0)),
22559 REGNO (XEXP (x, 1)));
22561 else
22562 output_addr_const (stream, x);
22566 /* Target hook for indicating whether a punctuation character for
22567 TARGET_PRINT_OPERAND is valid. */
22568 static bool
22569 arm_print_operand_punct_valid_p (unsigned char code)
22571 return (code == '@' || code == '|' || code == '.'
22572 || code == '(' || code == ')' || code == '#'
22573 || (TARGET_32BIT && (code == '?'))
22574 || (TARGET_THUMB2 && (code == '!'))
22575 || (TARGET_THUMB && (code == '_')));
22578 /* Target hook for assembling integer objects. The ARM version needs to
22579 handle word-sized values specially. */
22580 static bool
22581 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22583 machine_mode mode;
22585 if (size == UNITS_PER_WORD && aligned_p)
22587 fputs ("\t.word\t", asm_out_file);
22588 output_addr_const (asm_out_file, x);
22590 /* Mark symbols as position independent. We only do this in the
22591 .text segment, not in the .data segment. */
22592 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22593 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22595 /* See legitimize_pic_address for an explanation of the
22596 TARGET_VXWORKS_RTP check. */
22597 /* References to weak symbols cannot be resolved locally:
22598 they may be overridden by a non-weak definition at link
22599 time. */
22600 if (!arm_pic_data_is_text_relative
22601 || (GET_CODE (x) == SYMBOL_REF
22602 && (!SYMBOL_REF_LOCAL_P (x)
22603 || (SYMBOL_REF_DECL (x)
22604 ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0))))
22605 fputs ("(GOT)", asm_out_file);
22606 else
22607 fputs ("(GOTOFF)", asm_out_file);
22609 fputc ('\n', asm_out_file);
22610 return true;
22613 mode = GET_MODE (x);
22615 if (arm_vector_mode_supported_p (mode))
22617 int i, units;
22619 gcc_assert (GET_CODE (x) == CONST_VECTOR);
22621 units = CONST_VECTOR_NUNITS (x);
22622 size = GET_MODE_UNIT_SIZE (mode);
22624 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22625 for (i = 0; i < units; i++)
22627 rtx elt = CONST_VECTOR_ELT (x, i);
22628 assemble_integer
22629 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22631 else
22632 for (i = 0; i < units; i++)
22634 rtx elt = CONST_VECTOR_ELT (x, i);
22635 assemble_real
22636 (*CONST_DOUBLE_REAL_VALUE (elt), GET_MODE_INNER (mode),
22637 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22640 return true;
22643 return default_assemble_integer (x, size, aligned_p);
22646 static void
22647 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22649 section *s;
22651 if (!TARGET_AAPCS_BASED)
22653 (is_ctor ?
22654 default_named_section_asm_out_constructor
22655 : default_named_section_asm_out_destructor) (symbol, priority);
22656 return;
22659 /* Put these in the .init_array section, using a special relocation. */
22660 if (priority != DEFAULT_INIT_PRIORITY)
22662 char buf[18];
22663 sprintf (buf, "%s.%.5u",
22664 is_ctor ? ".init_array" : ".fini_array",
22665 priority);
22666 s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL_TREE);
22668 else if (is_ctor)
22669 s = ctors_section;
22670 else
22671 s = dtors_section;
22673 switch_to_section (s);
22674 assemble_align (POINTER_SIZE);
22675 fputs ("\t.word\t", asm_out_file);
22676 output_addr_const (asm_out_file, symbol);
22677 fputs ("(target1)\n", asm_out_file);
22680 /* Add a function to the list of static constructors. */
22682 static void
22683 arm_elf_asm_constructor (rtx symbol, int priority)
22685 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22688 /* Add a function to the list of static destructors. */
22690 static void
22691 arm_elf_asm_destructor (rtx symbol, int priority)
22693 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22696 /* A finite state machine takes care of noticing whether or not instructions
22697 can be conditionally executed, and thus decrease execution time and code
22698 size by deleting branch instructions. The fsm is controlled by
22699 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22701 /* The state of the fsm controlling condition codes are:
22702 0: normal, do nothing special
22703 1: make ASM_OUTPUT_OPCODE not output this instruction
22704 2: make ASM_OUTPUT_OPCODE not output this instruction
22705 3: make instructions conditional
22706 4: make instructions conditional
22708 State transitions (state->state by whom under condition):
22709 0 -> 1 final_prescan_insn if the `target' is a label
22710 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22711 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22712 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22713 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22714 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22715 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22716 (the target insn is arm_target_insn).
22718 If the jump clobbers the conditions then we use states 2 and 4.
22720 A similar thing can be done with conditional return insns.
22722 XXX In case the `target' is an unconditional branch, this conditionalising
22723 of the instructions always reduces code size, but not always execution
22724 time. But then, I want to reduce the code size to somewhere near what
22725 /bin/cc produces. */
22727 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22728 instructions. When a COND_EXEC instruction is seen the subsequent
22729 instructions are scanned so that multiple conditional instructions can be
22730 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22731 specify the length and true/false mask for the IT block. These will be
22732 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22734 /* Returns the index of the ARM condition code string in
22735 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22736 COMPARISON should be an rtx like `(eq (...) (...))'. */
22738 enum arm_cond_code
22739 maybe_get_arm_condition_code (rtx comparison)
22741 machine_mode mode = GET_MODE (XEXP (comparison, 0));
22742 enum arm_cond_code code;
22743 enum rtx_code comp_code = GET_CODE (comparison);
22745 if (GET_MODE_CLASS (mode) != MODE_CC)
22746 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
22747 XEXP (comparison, 1));
22749 switch (mode)
22751 case CC_DNEmode: code = ARM_NE; goto dominance;
22752 case CC_DEQmode: code = ARM_EQ; goto dominance;
22753 case CC_DGEmode: code = ARM_GE; goto dominance;
22754 case CC_DGTmode: code = ARM_GT; goto dominance;
22755 case CC_DLEmode: code = ARM_LE; goto dominance;
22756 case CC_DLTmode: code = ARM_LT; goto dominance;
22757 case CC_DGEUmode: code = ARM_CS; goto dominance;
22758 case CC_DGTUmode: code = ARM_HI; goto dominance;
22759 case CC_DLEUmode: code = ARM_LS; goto dominance;
22760 case CC_DLTUmode: code = ARM_CC;
22762 dominance:
22763 if (comp_code == EQ)
22764 return ARM_INVERSE_CONDITION_CODE (code);
22765 if (comp_code == NE)
22766 return code;
22767 return ARM_NV;
22769 case CC_NOOVmode:
22770 switch (comp_code)
22772 case NE: return ARM_NE;
22773 case EQ: return ARM_EQ;
22774 case GE: return ARM_PL;
22775 case LT: return ARM_MI;
22776 default: return ARM_NV;
22779 case CC_Zmode:
22780 switch (comp_code)
22782 case NE: return ARM_NE;
22783 case EQ: return ARM_EQ;
22784 default: return ARM_NV;
22787 case CC_Nmode:
22788 switch (comp_code)
22790 case NE: return ARM_MI;
22791 case EQ: return ARM_PL;
22792 default: return ARM_NV;
22795 case CCFPEmode:
22796 case CCFPmode:
22797 /* We can handle all cases except UNEQ and LTGT. */
22798 switch (comp_code)
22800 case GE: return ARM_GE;
22801 case GT: return ARM_GT;
22802 case LE: return ARM_LS;
22803 case LT: return ARM_MI;
22804 case NE: return ARM_NE;
22805 case EQ: return ARM_EQ;
22806 case ORDERED: return ARM_VC;
22807 case UNORDERED: return ARM_VS;
22808 case UNLT: return ARM_LT;
22809 case UNLE: return ARM_LE;
22810 case UNGT: return ARM_HI;
22811 case UNGE: return ARM_PL;
22812 /* UNEQ and LTGT do not have a representation. */
22813 case UNEQ: /* Fall through. */
22814 case LTGT: /* Fall through. */
22815 default: return ARM_NV;
22818 case CC_SWPmode:
22819 switch (comp_code)
22821 case NE: return ARM_NE;
22822 case EQ: return ARM_EQ;
22823 case GE: return ARM_LE;
22824 case GT: return ARM_LT;
22825 case LE: return ARM_GE;
22826 case LT: return ARM_GT;
22827 case GEU: return ARM_LS;
22828 case GTU: return ARM_CC;
22829 case LEU: return ARM_CS;
22830 case LTU: return ARM_HI;
22831 default: return ARM_NV;
22834 case CC_Cmode:
22835 switch (comp_code)
22837 case LTU: return ARM_CS;
22838 case GEU: return ARM_CC;
22839 case NE: return ARM_CS;
22840 case EQ: return ARM_CC;
22841 default: return ARM_NV;
22844 case CC_CZmode:
22845 switch (comp_code)
22847 case NE: return ARM_NE;
22848 case EQ: return ARM_EQ;
22849 case GEU: return ARM_CS;
22850 case GTU: return ARM_HI;
22851 case LEU: return ARM_LS;
22852 case LTU: return ARM_CC;
22853 default: return ARM_NV;
22856 case CC_NCVmode:
22857 switch (comp_code)
22859 case GE: return ARM_GE;
22860 case LT: return ARM_LT;
22861 case GEU: return ARM_CS;
22862 case LTU: return ARM_CC;
22863 default: return ARM_NV;
22866 case CC_Vmode:
22867 switch (comp_code)
22869 case NE: return ARM_VS;
22870 case EQ: return ARM_VC;
22871 default: return ARM_NV;
22874 case CCmode:
22875 switch (comp_code)
22877 case NE: return ARM_NE;
22878 case EQ: return ARM_EQ;
22879 case GE: return ARM_GE;
22880 case GT: return ARM_GT;
22881 case LE: return ARM_LE;
22882 case LT: return ARM_LT;
22883 case GEU: return ARM_CS;
22884 case GTU: return ARM_HI;
22885 case LEU: return ARM_LS;
22886 case LTU: return ARM_CC;
22887 default: return ARM_NV;
22890 default: gcc_unreachable ();
22894 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22895 static enum arm_cond_code
22896 get_arm_condition_code (rtx comparison)
22898 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
22899 gcc_assert (code != ARM_NV);
22900 return code;
22903 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22904 instructions. */
22905 void
22906 thumb2_final_prescan_insn (rtx_insn *insn)
22908 rtx_insn *first_insn = insn;
22909 rtx body = PATTERN (insn);
22910 rtx predicate;
22911 enum arm_cond_code code;
22912 int n;
22913 int mask;
22914 int max;
22916 /* max_insns_skipped in the tune was already taken into account in the
22917 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
22918 just emit the IT blocks as we can. It does not make sense to split
22919 the IT blocks. */
22920 max = MAX_INSN_PER_IT_BLOCK;
22922 /* Remove the previous insn from the count of insns to be output. */
22923 if (arm_condexec_count)
22924 arm_condexec_count--;
22926 /* Nothing to do if we are already inside a conditional block. */
22927 if (arm_condexec_count)
22928 return;
22930 if (GET_CODE (body) != COND_EXEC)
22931 return;
22933 /* Conditional jumps are implemented directly. */
22934 if (JUMP_P (insn))
22935 return;
22937 predicate = COND_EXEC_TEST (body);
22938 arm_current_cc = get_arm_condition_code (predicate);
22940 n = get_attr_ce_count (insn);
22941 arm_condexec_count = 1;
22942 arm_condexec_mask = (1 << n) - 1;
22943 arm_condexec_masklen = n;
22944 /* See if subsequent instructions can be combined into the same block. */
22945 for (;;)
22947 insn = next_nonnote_insn (insn);
22949 /* Jumping into the middle of an IT block is illegal, so a label or
22950 barrier terminates the block. */
22951 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
22952 break;
22954 body = PATTERN (insn);
22955 /* USE and CLOBBER aren't really insns, so just skip them. */
22956 if (GET_CODE (body) == USE
22957 || GET_CODE (body) == CLOBBER)
22958 continue;
22960 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
22961 if (GET_CODE (body) != COND_EXEC)
22962 break;
22963 /* Maximum number of conditionally executed instructions in a block. */
22964 n = get_attr_ce_count (insn);
22965 if (arm_condexec_masklen + n > max)
22966 break;
22968 predicate = COND_EXEC_TEST (body);
22969 code = get_arm_condition_code (predicate);
22970 mask = (1 << n) - 1;
22971 if (arm_current_cc == code)
22972 arm_condexec_mask |= (mask << arm_condexec_masklen);
22973 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
22974 break;
22976 arm_condexec_count++;
22977 arm_condexec_masklen += n;
22979 /* A jump must be the last instruction in a conditional block. */
22980 if (JUMP_P (insn))
22981 break;
22983 /* Restore recog_data (getting the attributes of other insns can
22984 destroy this array, but final.c assumes that it remains intact
22985 across this call). */
22986 extract_constrain_insn_cached (first_insn);
22989 void
22990 arm_final_prescan_insn (rtx_insn *insn)
22992 /* BODY will hold the body of INSN. */
22993 rtx body = PATTERN (insn);
22995 /* This will be 1 if trying to repeat the trick, and things need to be
22996 reversed if it appears to fail. */
22997 int reverse = 0;
22999 /* If we start with a return insn, we only succeed if we find another one. */
23000 int seeking_return = 0;
23001 enum rtx_code return_code = UNKNOWN;
23003 /* START_INSN will hold the insn from where we start looking. This is the
23004 first insn after the following code_label if REVERSE is true. */
23005 rtx_insn *start_insn = insn;
23007 /* If in state 4, check if the target branch is reached, in order to
23008 change back to state 0. */
23009 if (arm_ccfsm_state == 4)
23011 if (insn == arm_target_insn)
23013 arm_target_insn = NULL;
23014 arm_ccfsm_state = 0;
23016 return;
23019 /* If in state 3, it is possible to repeat the trick, if this insn is an
23020 unconditional branch to a label, and immediately following this branch
23021 is the previous target label which is only used once, and the label this
23022 branch jumps to is not too far off. */
23023 if (arm_ccfsm_state == 3)
23025 if (simplejump_p (insn))
23027 start_insn = next_nonnote_insn (start_insn);
23028 if (BARRIER_P (start_insn))
23030 /* XXX Isn't this always a barrier? */
23031 start_insn = next_nonnote_insn (start_insn);
23033 if (LABEL_P (start_insn)
23034 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23035 && LABEL_NUSES (start_insn) == 1)
23036 reverse = TRUE;
23037 else
23038 return;
23040 else if (ANY_RETURN_P (body))
23042 start_insn = next_nonnote_insn (start_insn);
23043 if (BARRIER_P (start_insn))
23044 start_insn = next_nonnote_insn (start_insn);
23045 if (LABEL_P (start_insn)
23046 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23047 && LABEL_NUSES (start_insn) == 1)
23049 reverse = TRUE;
23050 seeking_return = 1;
23051 return_code = GET_CODE (body);
23053 else
23054 return;
23056 else
23057 return;
23060 gcc_assert (!arm_ccfsm_state || reverse);
23061 if (!JUMP_P (insn))
23062 return;
23064 /* This jump might be paralleled with a clobber of the condition codes
23065 the jump should always come first */
23066 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
23067 body = XVECEXP (body, 0, 0);
23069 if (reverse
23070 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
23071 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
23073 int insns_skipped;
23074 int fail = FALSE, succeed = FALSE;
23075 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
23076 int then_not_else = TRUE;
23077 rtx_insn *this_insn = start_insn;
23078 rtx label = 0;
23080 /* Register the insn jumped to. */
23081 if (reverse)
23083 if (!seeking_return)
23084 label = XEXP (SET_SRC (body), 0);
23086 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
23087 label = XEXP (XEXP (SET_SRC (body), 1), 0);
23088 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
23090 label = XEXP (XEXP (SET_SRC (body), 2), 0);
23091 then_not_else = FALSE;
23093 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
23095 seeking_return = 1;
23096 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
23098 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
23100 seeking_return = 1;
23101 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
23102 then_not_else = FALSE;
23104 else
23105 gcc_unreachable ();
23107 /* See how many insns this branch skips, and what kind of insns. If all
23108 insns are okay, and the label or unconditional branch to the same
23109 label is not too far away, succeed. */
23110 for (insns_skipped = 0;
23111 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
23113 rtx scanbody;
23115 this_insn = next_nonnote_insn (this_insn);
23116 if (!this_insn)
23117 break;
23119 switch (GET_CODE (this_insn))
23121 case CODE_LABEL:
23122 /* Succeed if it is the target label, otherwise fail since
23123 control falls in from somewhere else. */
23124 if (this_insn == label)
23126 arm_ccfsm_state = 1;
23127 succeed = TRUE;
23129 else
23130 fail = TRUE;
23131 break;
23133 case BARRIER:
23134 /* Succeed if the following insn is the target label.
23135 Otherwise fail.
23136 If return insns are used then the last insn in a function
23137 will be a barrier. */
23138 this_insn = next_nonnote_insn (this_insn);
23139 if (this_insn && this_insn == label)
23141 arm_ccfsm_state = 1;
23142 succeed = TRUE;
23144 else
23145 fail = TRUE;
23146 break;
23148 case CALL_INSN:
23149 /* The AAPCS says that conditional calls should not be
23150 used since they make interworking inefficient (the
23151 linker can't transform BL<cond> into BLX). That's
23152 only a problem if the machine has BLX. */
23153 if (arm_arch5)
23155 fail = TRUE;
23156 break;
23159 /* Succeed if the following insn is the target label, or
23160 if the following two insns are a barrier and the
23161 target label. */
23162 this_insn = next_nonnote_insn (this_insn);
23163 if (this_insn && BARRIER_P (this_insn))
23164 this_insn = next_nonnote_insn (this_insn);
23166 if (this_insn && this_insn == label
23167 && insns_skipped < max_insns_skipped)
23169 arm_ccfsm_state = 1;
23170 succeed = TRUE;
23172 else
23173 fail = TRUE;
23174 break;
23176 case JUMP_INSN:
23177 /* If this is an unconditional branch to the same label, succeed.
23178 If it is to another label, do nothing. If it is conditional,
23179 fail. */
23180 /* XXX Probably, the tests for SET and the PC are
23181 unnecessary. */
23183 scanbody = PATTERN (this_insn);
23184 if (GET_CODE (scanbody) == SET
23185 && GET_CODE (SET_DEST (scanbody)) == PC)
23187 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
23188 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
23190 arm_ccfsm_state = 2;
23191 succeed = TRUE;
23193 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
23194 fail = TRUE;
23196 /* Fail if a conditional return is undesirable (e.g. on a
23197 StrongARM), but still allow this if optimizing for size. */
23198 else if (GET_CODE (scanbody) == return_code
23199 && !use_return_insn (TRUE, NULL)
23200 && !optimize_size)
23201 fail = TRUE;
23202 else if (GET_CODE (scanbody) == return_code)
23204 arm_ccfsm_state = 2;
23205 succeed = TRUE;
23207 else if (GET_CODE (scanbody) == PARALLEL)
23209 switch (get_attr_conds (this_insn))
23211 case CONDS_NOCOND:
23212 break;
23213 default:
23214 fail = TRUE;
23215 break;
23218 else
23219 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
23221 break;
23223 case INSN:
23224 /* Instructions using or affecting the condition codes make it
23225 fail. */
23226 scanbody = PATTERN (this_insn);
23227 if (!(GET_CODE (scanbody) == SET
23228 || GET_CODE (scanbody) == PARALLEL)
23229 || get_attr_conds (this_insn) != CONDS_NOCOND)
23230 fail = TRUE;
23231 break;
23233 default:
23234 break;
23237 if (succeed)
23239 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
23240 arm_target_label = CODE_LABEL_NUMBER (label);
23241 else
23243 gcc_assert (seeking_return || arm_ccfsm_state == 2);
23245 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
23247 this_insn = next_nonnote_insn (this_insn);
23248 gcc_assert (!this_insn
23249 || (!BARRIER_P (this_insn)
23250 && !LABEL_P (this_insn)));
23252 if (!this_insn)
23254 /* Oh, dear! we ran off the end.. give up. */
23255 extract_constrain_insn_cached (insn);
23256 arm_ccfsm_state = 0;
23257 arm_target_insn = NULL;
23258 return;
23260 arm_target_insn = this_insn;
23263 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23264 what it was. */
23265 if (!reverse)
23266 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
23268 if (reverse || then_not_else)
23269 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
23272 /* Restore recog_data (getting the attributes of other insns can
23273 destroy this array, but final.c assumes that it remains intact
23274 across this call. */
23275 extract_constrain_insn_cached (insn);
23279 /* Output IT instructions. */
23280 void
23281 thumb2_asm_output_opcode (FILE * stream)
23283 char buff[5];
23284 int n;
23286 if (arm_condexec_mask)
23288 for (n = 0; n < arm_condexec_masklen; n++)
23289 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
23290 buff[n] = 0;
23291 asm_fprintf(stream, "i%s\t%s\n\t", buff,
23292 arm_condition_codes[arm_current_cc]);
23293 arm_condexec_mask = 0;
23297 /* Returns true if REGNO is a valid register
23298 for holding a quantity of type MODE. */
23300 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
23302 if (GET_MODE_CLASS (mode) == MODE_CC)
23303 return (regno == CC_REGNUM
23304 || (TARGET_HARD_FLOAT
23305 && regno == VFPCC_REGNUM));
23307 if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
23308 return false;
23310 if (TARGET_THUMB1)
23311 /* For the Thumb we only allow values bigger than SImode in
23312 registers 0 - 6, so that there is always a second low
23313 register available to hold the upper part of the value.
23314 We probably we ought to ensure that the register is the
23315 start of an even numbered register pair. */
23316 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
23318 if (TARGET_HARD_FLOAT && IS_VFP_REGNUM (regno))
23320 if (mode == SFmode || mode == SImode)
23321 return VFP_REGNO_OK_FOR_SINGLE (regno);
23323 if (mode == DFmode)
23324 return VFP_REGNO_OK_FOR_DOUBLE (regno);
23326 if (mode == HFmode)
23327 return VFP_REGNO_OK_FOR_SINGLE (regno);
23329 /* VFP registers can hold HImode values. */
23330 if (mode == HImode)
23331 return VFP_REGNO_OK_FOR_SINGLE (regno);
23333 if (TARGET_NEON)
23334 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
23335 || (VALID_NEON_QREG_MODE (mode)
23336 && NEON_REGNO_OK_FOR_QUAD (regno))
23337 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
23338 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
23339 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
23340 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
23341 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23343 return FALSE;
23346 if (TARGET_REALLY_IWMMXT)
23348 if (IS_IWMMXT_GR_REGNUM (regno))
23349 return mode == SImode;
23351 if (IS_IWMMXT_REGNUM (regno))
23352 return VALID_IWMMXT_REG_MODE (mode);
23355 /* We allow almost any value to be stored in the general registers.
23356 Restrict doubleword quantities to even register pairs in ARM state
23357 so that we can use ldrd. Do not allow very large Neon structure
23358 opaque modes in general registers; they would use too many. */
23359 if (regno <= LAST_ARM_REGNUM)
23361 if (ARM_NUM_REGS (mode) > 4)
23362 return FALSE;
23364 if (TARGET_THUMB2)
23365 return TRUE;
23367 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23370 if (regno == FRAME_POINTER_REGNUM
23371 || regno == ARG_POINTER_REGNUM)
23372 /* We only allow integers in the fake hard registers. */
23373 return GET_MODE_CLASS (mode) == MODE_INT;
23375 return FALSE;
23378 /* Implement MODES_TIEABLE_P. */
23380 bool
23381 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
23383 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23384 return true;
23386 /* We specifically want to allow elements of "structure" modes to
23387 be tieable to the structure. This more general condition allows
23388 other rarer situations too. */
23389 if (TARGET_NEON
23390 && (VALID_NEON_DREG_MODE (mode1)
23391 || VALID_NEON_QREG_MODE (mode1)
23392 || VALID_NEON_STRUCT_MODE (mode1))
23393 && (VALID_NEON_DREG_MODE (mode2)
23394 || VALID_NEON_QREG_MODE (mode2)
23395 || VALID_NEON_STRUCT_MODE (mode2)))
23396 return true;
23398 return false;
23401 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23402 not used in arm mode. */
23404 enum reg_class
23405 arm_regno_class (int regno)
23407 if (regno == PC_REGNUM)
23408 return NO_REGS;
23410 if (TARGET_THUMB1)
23412 if (regno == STACK_POINTER_REGNUM)
23413 return STACK_REG;
23414 if (regno == CC_REGNUM)
23415 return CC_REG;
23416 if (regno < 8)
23417 return LO_REGS;
23418 return HI_REGS;
23421 if (TARGET_THUMB2 && regno < 8)
23422 return LO_REGS;
23424 if ( regno <= LAST_ARM_REGNUM
23425 || regno == FRAME_POINTER_REGNUM
23426 || regno == ARG_POINTER_REGNUM)
23427 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23429 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23430 return TARGET_THUMB2 ? CC_REG : NO_REGS;
23432 if (IS_VFP_REGNUM (regno))
23434 if (regno <= D7_VFP_REGNUM)
23435 return VFP_D0_D7_REGS;
23436 else if (regno <= LAST_LO_VFP_REGNUM)
23437 return VFP_LO_REGS;
23438 else
23439 return VFP_HI_REGS;
23442 if (IS_IWMMXT_REGNUM (regno))
23443 return IWMMXT_REGS;
23445 if (IS_IWMMXT_GR_REGNUM (regno))
23446 return IWMMXT_GR_REGS;
23448 return NO_REGS;
23451 /* Handle a special case when computing the offset
23452 of an argument from the frame pointer. */
23454 arm_debugger_arg_offset (int value, rtx addr)
23456 rtx_insn *insn;
23458 /* We are only interested if dbxout_parms() failed to compute the offset. */
23459 if (value != 0)
23460 return 0;
23462 /* We can only cope with the case where the address is held in a register. */
23463 if (!REG_P (addr))
23464 return 0;
23466 /* If we are using the frame pointer to point at the argument, then
23467 an offset of 0 is correct. */
23468 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23469 return 0;
23471 /* If we are using the stack pointer to point at the
23472 argument, then an offset of 0 is correct. */
23473 /* ??? Check this is consistent with thumb2 frame layout. */
23474 if ((TARGET_THUMB || !frame_pointer_needed)
23475 && REGNO (addr) == SP_REGNUM)
23476 return 0;
23478 /* Oh dear. The argument is pointed to by a register rather
23479 than being held in a register, or being stored at a known
23480 offset from the frame pointer. Since GDB only understands
23481 those two kinds of argument we must translate the address
23482 held in the register into an offset from the frame pointer.
23483 We do this by searching through the insns for the function
23484 looking to see where this register gets its value. If the
23485 register is initialized from the frame pointer plus an offset
23486 then we are in luck and we can continue, otherwise we give up.
23488 This code is exercised by producing debugging information
23489 for a function with arguments like this:
23491 double func (double a, double b, int c, double d) {return d;}
23493 Without this code the stab for parameter 'd' will be set to
23494 an offset of 0 from the frame pointer, rather than 8. */
23496 /* The if() statement says:
23498 If the insn is a normal instruction
23499 and if the insn is setting the value in a register
23500 and if the register being set is the register holding the address of the argument
23501 and if the address is computing by an addition
23502 that involves adding to a register
23503 which is the frame pointer
23504 a constant integer
23506 then... */
23508 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23510 if ( NONJUMP_INSN_P (insn)
23511 && GET_CODE (PATTERN (insn)) == SET
23512 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23513 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23514 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23515 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23516 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23519 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23521 break;
23525 if (value == 0)
23527 debug_rtx (addr);
23528 warning (0, "unable to compute real location of stacked parameter");
23529 value = 8; /* XXX magic hack */
23532 return value;
23535 /* Implement TARGET_PROMOTED_TYPE. */
23537 static tree
23538 arm_promoted_type (const_tree t)
23540 if (SCALAR_FLOAT_TYPE_P (t)
23541 && TYPE_PRECISION (t) == 16
23542 && TYPE_MAIN_VARIANT (t) == arm_fp16_type_node)
23543 return float_type_node;
23544 return NULL_TREE;
23547 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23548 This simply adds HFmode as a supported mode; even though we don't
23549 implement arithmetic on this type directly, it's supported by
23550 optabs conversions, much the way the double-word arithmetic is
23551 special-cased in the default hook. */
23553 static bool
23554 arm_scalar_mode_supported_p (machine_mode mode)
23556 if (mode == HFmode)
23557 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
23558 else if (ALL_FIXED_POINT_MODE_P (mode))
23559 return true;
23560 else
23561 return default_scalar_mode_supported_p (mode);
23564 /* Set the value of FLT_EVAL_METHOD.
23565 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
23567 0: evaluate all operations and constants, whose semantic type has at
23568 most the range and precision of type float, to the range and
23569 precision of float; evaluate all other operations and constants to
23570 the range and precision of the semantic type;
23572 N, where _FloatN is a supported interchange floating type
23573 evaluate all operations and constants, whose semantic type has at
23574 most the range and precision of _FloatN type, to the range and
23575 precision of the _FloatN type; evaluate all other operations and
23576 constants to the range and precision of the semantic type;
23578 If we have the ARMv8.2-A extensions then we support _Float16 in native
23579 precision, so we should set this to 16. Otherwise, we support the type,
23580 but want to evaluate expressions in float precision, so set this to
23581 0. */
23583 static enum flt_eval_method
23584 arm_excess_precision (enum excess_precision_type type)
23586 switch (type)
23588 case EXCESS_PRECISION_TYPE_FAST:
23589 case EXCESS_PRECISION_TYPE_STANDARD:
23590 /* We can calculate either in 16-bit range and precision or
23591 32-bit range and precision. Make that decision based on whether
23592 we have native support for the ARMv8.2-A 16-bit floating-point
23593 instructions or not. */
23594 return (TARGET_VFP_FP16INST
23595 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
23596 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
23597 case EXCESS_PRECISION_TYPE_IMPLICIT:
23598 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
23599 default:
23600 gcc_unreachable ();
23602 return FLT_EVAL_METHOD_UNPREDICTABLE;
23606 /* Implement TARGET_FLOATN_MODE. Make very sure that we don't provide
23607 _Float16 if we are using anything other than ieee format for 16-bit
23608 floating point. Otherwise, punt to the default implementation. */
23609 static machine_mode
23610 arm_floatn_mode (int n, bool extended)
23612 if (!extended && n == 16)
23613 return arm_fp16_format == ARM_FP16_FORMAT_IEEE ? HFmode : VOIDmode;
23615 return default_floatn_mode (n, extended);
23619 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23620 not to early-clobber SRC registers in the process.
23622 We assume that the operands described by SRC and DEST represent a
23623 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
23624 number of components into which the copy has been decomposed. */
23625 void
23626 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
23628 unsigned int i;
23630 if (!reg_overlap_mentioned_p (operands[0], operands[1])
23631 || REGNO (operands[0]) < REGNO (operands[1]))
23633 for (i = 0; i < count; i++)
23635 operands[2 * i] = dest[i];
23636 operands[2 * i + 1] = src[i];
23639 else
23641 for (i = 0; i < count; i++)
23643 operands[2 * i] = dest[count - i - 1];
23644 operands[2 * i + 1] = src[count - i - 1];
23649 /* Split operands into moves from op[1] + op[2] into op[0]. */
23651 void
23652 neon_split_vcombine (rtx operands[3])
23654 unsigned int dest = REGNO (operands[0]);
23655 unsigned int src1 = REGNO (operands[1]);
23656 unsigned int src2 = REGNO (operands[2]);
23657 machine_mode halfmode = GET_MODE (operands[1]);
23658 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
23659 rtx destlo, desthi;
23661 if (src1 == dest && src2 == dest + halfregs)
23663 /* No-op move. Can't split to nothing; emit something. */
23664 emit_note (NOTE_INSN_DELETED);
23665 return;
23668 /* Preserve register attributes for variable tracking. */
23669 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
23670 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
23671 GET_MODE_SIZE (halfmode));
23673 /* Special case of reversed high/low parts. Use VSWP. */
23674 if (src2 == dest && src1 == dest + halfregs)
23676 rtx x = gen_rtx_SET (destlo, operands[1]);
23677 rtx y = gen_rtx_SET (desthi, operands[2]);
23678 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
23679 return;
23682 if (!reg_overlap_mentioned_p (operands[2], destlo))
23684 /* Try to avoid unnecessary moves if part of the result
23685 is in the right place already. */
23686 if (src1 != dest)
23687 emit_move_insn (destlo, operands[1]);
23688 if (src2 != dest + halfregs)
23689 emit_move_insn (desthi, operands[2]);
23691 else
23693 if (src2 != dest + halfregs)
23694 emit_move_insn (desthi, operands[2]);
23695 if (src1 != dest)
23696 emit_move_insn (destlo, operands[1]);
23700 /* Return the number (counting from 0) of
23701 the least significant set bit in MASK. */
23703 inline static int
23704 number_of_first_bit_set (unsigned mask)
23706 return ctz_hwi (mask);
23709 /* Like emit_multi_reg_push, but allowing for a different set of
23710 registers to be described as saved. MASK is the set of registers
23711 to be saved; REAL_REGS is the set of registers to be described as
23712 saved. If REAL_REGS is 0, only describe the stack adjustment. */
23714 static rtx_insn *
23715 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
23717 unsigned long regno;
23718 rtx par[10], tmp, reg;
23719 rtx_insn *insn;
23720 int i, j;
23722 /* Build the parallel of the registers actually being stored. */
23723 for (i = 0; mask; ++i, mask &= mask - 1)
23725 regno = ctz_hwi (mask);
23726 reg = gen_rtx_REG (SImode, regno);
23728 if (i == 0)
23729 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
23730 else
23731 tmp = gen_rtx_USE (VOIDmode, reg);
23733 par[i] = tmp;
23736 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23737 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
23738 tmp = gen_frame_mem (BLKmode, tmp);
23739 tmp = gen_rtx_SET (tmp, par[0]);
23740 par[0] = tmp;
23742 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
23743 insn = emit_insn (tmp);
23745 /* Always build the stack adjustment note for unwind info. */
23746 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23747 tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
23748 par[0] = tmp;
23750 /* Build the parallel of the registers recorded as saved for unwind. */
23751 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
23753 regno = ctz_hwi (real_regs);
23754 reg = gen_rtx_REG (SImode, regno);
23756 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
23757 tmp = gen_frame_mem (SImode, tmp);
23758 tmp = gen_rtx_SET (tmp, reg);
23759 RTX_FRAME_RELATED_P (tmp) = 1;
23760 par[j + 1] = tmp;
23763 if (j == 0)
23764 tmp = par[0];
23765 else
23767 RTX_FRAME_RELATED_P (par[0]) = 1;
23768 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
23771 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
23773 return insn;
23776 /* Emit code to push or pop registers to or from the stack. F is the
23777 assembly file. MASK is the registers to pop. */
23778 static void
23779 thumb_pop (FILE *f, unsigned long mask)
23781 int regno;
23782 int lo_mask = mask & 0xFF;
23783 int pushed_words = 0;
23785 gcc_assert (mask);
23787 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
23789 /* Special case. Do not generate a POP PC statement here, do it in
23790 thumb_exit() */
23791 thumb_exit (f, -1);
23792 return;
23795 fprintf (f, "\tpop\t{");
23797 /* Look at the low registers first. */
23798 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
23800 if (lo_mask & 1)
23802 asm_fprintf (f, "%r", regno);
23804 if ((lo_mask & ~1) != 0)
23805 fprintf (f, ", ");
23807 pushed_words++;
23811 if (mask & (1 << PC_REGNUM))
23813 /* Catch popping the PC. */
23814 if (TARGET_INTERWORK || TARGET_BACKTRACE || crtl->calls_eh_return
23815 || IS_CMSE_ENTRY (arm_current_func_type ()))
23817 /* The PC is never poped directly, instead
23818 it is popped into r3 and then BX is used. */
23819 fprintf (f, "}\n");
23821 thumb_exit (f, -1);
23823 return;
23825 else
23827 if (mask & 0xFF)
23828 fprintf (f, ", ");
23830 asm_fprintf (f, "%r", PC_REGNUM);
23834 fprintf (f, "}\n");
23837 /* Generate code to return from a thumb function.
23838 If 'reg_containing_return_addr' is -1, then the return address is
23839 actually on the stack, at the stack pointer. */
23840 static void
23841 thumb_exit (FILE *f, int reg_containing_return_addr)
23843 unsigned regs_available_for_popping;
23844 unsigned regs_to_pop;
23845 int pops_needed;
23846 unsigned available;
23847 unsigned required;
23848 machine_mode mode;
23849 int size;
23850 int restore_a4 = FALSE;
23852 /* Compute the registers we need to pop. */
23853 regs_to_pop = 0;
23854 pops_needed = 0;
23856 if (reg_containing_return_addr == -1)
23858 regs_to_pop |= 1 << LR_REGNUM;
23859 ++pops_needed;
23862 if (TARGET_BACKTRACE)
23864 /* Restore the (ARM) frame pointer and stack pointer. */
23865 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
23866 pops_needed += 2;
23869 /* If there is nothing to pop then just emit the BX instruction and
23870 return. */
23871 if (pops_needed == 0)
23873 if (crtl->calls_eh_return)
23874 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23876 if (IS_CMSE_ENTRY (arm_current_func_type ()))
23878 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n",
23879 reg_containing_return_addr);
23880 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
23882 else
23883 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23884 return;
23886 /* Otherwise if we are not supporting interworking and we have not created
23887 a backtrace structure and the function was not entered in ARM mode then
23888 just pop the return address straight into the PC. */
23889 else if (!TARGET_INTERWORK
23890 && !TARGET_BACKTRACE
23891 && !is_called_in_ARM_mode (current_function_decl)
23892 && !crtl->calls_eh_return
23893 && !IS_CMSE_ENTRY (arm_current_func_type ()))
23895 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
23896 return;
23899 /* Find out how many of the (return) argument registers we can corrupt. */
23900 regs_available_for_popping = 0;
23902 /* If returning via __builtin_eh_return, the bottom three registers
23903 all contain information needed for the return. */
23904 if (crtl->calls_eh_return)
23905 size = 12;
23906 else
23908 /* If we can deduce the registers used from the function's
23909 return value. This is more reliable that examining
23910 df_regs_ever_live_p () because that will be set if the register is
23911 ever used in the function, not just if the register is used
23912 to hold a return value. */
23914 if (crtl->return_rtx != 0)
23915 mode = GET_MODE (crtl->return_rtx);
23916 else
23917 mode = DECL_MODE (DECL_RESULT (current_function_decl));
23919 size = GET_MODE_SIZE (mode);
23921 if (size == 0)
23923 /* In a void function we can use any argument register.
23924 In a function that returns a structure on the stack
23925 we can use the second and third argument registers. */
23926 if (mode == VOIDmode)
23927 regs_available_for_popping =
23928 (1 << ARG_REGISTER (1))
23929 | (1 << ARG_REGISTER (2))
23930 | (1 << ARG_REGISTER (3));
23931 else
23932 regs_available_for_popping =
23933 (1 << ARG_REGISTER (2))
23934 | (1 << ARG_REGISTER (3));
23936 else if (size <= 4)
23937 regs_available_for_popping =
23938 (1 << ARG_REGISTER (2))
23939 | (1 << ARG_REGISTER (3));
23940 else if (size <= 8)
23941 regs_available_for_popping =
23942 (1 << ARG_REGISTER (3));
23945 /* Match registers to be popped with registers into which we pop them. */
23946 for (available = regs_available_for_popping,
23947 required = regs_to_pop;
23948 required != 0 && available != 0;
23949 available &= ~(available & - available),
23950 required &= ~(required & - required))
23951 -- pops_needed;
23953 /* If we have any popping registers left over, remove them. */
23954 if (available > 0)
23955 regs_available_for_popping &= ~available;
23957 /* Otherwise if we need another popping register we can use
23958 the fourth argument register. */
23959 else if (pops_needed)
23961 /* If we have not found any free argument registers and
23962 reg a4 contains the return address, we must move it. */
23963 if (regs_available_for_popping == 0
23964 && reg_containing_return_addr == LAST_ARG_REGNUM)
23966 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
23967 reg_containing_return_addr = LR_REGNUM;
23969 else if (size > 12)
23971 /* Register a4 is being used to hold part of the return value,
23972 but we have dire need of a free, low register. */
23973 restore_a4 = TRUE;
23975 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
23978 if (reg_containing_return_addr != LAST_ARG_REGNUM)
23980 /* The fourth argument register is available. */
23981 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
23983 --pops_needed;
23987 /* Pop as many registers as we can. */
23988 thumb_pop (f, regs_available_for_popping);
23990 /* Process the registers we popped. */
23991 if (reg_containing_return_addr == -1)
23993 /* The return address was popped into the lowest numbered register. */
23994 regs_to_pop &= ~(1 << LR_REGNUM);
23996 reg_containing_return_addr =
23997 number_of_first_bit_set (regs_available_for_popping);
23999 /* Remove this register for the mask of available registers, so that
24000 the return address will not be corrupted by further pops. */
24001 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
24004 /* If we popped other registers then handle them here. */
24005 if (regs_available_for_popping)
24007 int frame_pointer;
24009 /* Work out which register currently contains the frame pointer. */
24010 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
24012 /* Move it into the correct place. */
24013 asm_fprintf (f, "\tmov\t%r, %r\n",
24014 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
24016 /* (Temporarily) remove it from the mask of popped registers. */
24017 regs_available_for_popping &= ~(1 << frame_pointer);
24018 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
24020 if (regs_available_for_popping)
24022 int stack_pointer;
24024 /* We popped the stack pointer as well,
24025 find the register that contains it. */
24026 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
24028 /* Move it into the stack register. */
24029 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
24031 /* At this point we have popped all necessary registers, so
24032 do not worry about restoring regs_available_for_popping
24033 to its correct value:
24035 assert (pops_needed == 0)
24036 assert (regs_available_for_popping == (1 << frame_pointer))
24037 assert (regs_to_pop == (1 << STACK_POINTER)) */
24039 else
24041 /* Since we have just move the popped value into the frame
24042 pointer, the popping register is available for reuse, and
24043 we know that we still have the stack pointer left to pop. */
24044 regs_available_for_popping |= (1 << frame_pointer);
24048 /* If we still have registers left on the stack, but we no longer have
24049 any registers into which we can pop them, then we must move the return
24050 address into the link register and make available the register that
24051 contained it. */
24052 if (regs_available_for_popping == 0 && pops_needed > 0)
24054 regs_available_for_popping |= 1 << reg_containing_return_addr;
24056 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
24057 reg_containing_return_addr);
24059 reg_containing_return_addr = LR_REGNUM;
24062 /* If we have registers left on the stack then pop some more.
24063 We know that at most we will want to pop FP and SP. */
24064 if (pops_needed > 0)
24066 int popped_into;
24067 int move_to;
24069 thumb_pop (f, regs_available_for_popping);
24071 /* We have popped either FP or SP.
24072 Move whichever one it is into the correct register. */
24073 popped_into = number_of_first_bit_set (regs_available_for_popping);
24074 move_to = number_of_first_bit_set (regs_to_pop);
24076 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
24078 regs_to_pop &= ~(1 << move_to);
24080 --pops_needed;
24083 /* If we still have not popped everything then we must have only
24084 had one register available to us and we are now popping the SP. */
24085 if (pops_needed > 0)
24087 int popped_into;
24089 thumb_pop (f, regs_available_for_popping);
24091 popped_into = number_of_first_bit_set (regs_available_for_popping);
24093 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
24095 assert (regs_to_pop == (1 << STACK_POINTER))
24096 assert (pops_needed == 1)
24100 /* If necessary restore the a4 register. */
24101 if (restore_a4)
24103 if (reg_containing_return_addr != LR_REGNUM)
24105 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24106 reg_containing_return_addr = LR_REGNUM;
24109 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
24112 if (crtl->calls_eh_return)
24113 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
24115 /* Return to caller. */
24116 if (IS_CMSE_ENTRY (arm_current_func_type ()))
24118 /* This is for the cases where LR is not being used to contain the return
24119 address. It may therefore contain information that we might not want
24120 to leak, hence it must be cleared. The value in R0 will never be a
24121 secret at this point, so it is safe to use it, see the clearing code
24122 in 'cmse_nonsecure_entry_clear_before_return'. */
24123 if (reg_containing_return_addr != LR_REGNUM)
24124 asm_fprintf (f, "\tmov\tlr, r0\n");
24126 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr);
24127 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
24129 else
24130 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
24133 /* Scan INSN just before assembler is output for it.
24134 For Thumb-1, we track the status of the condition codes; this
24135 information is used in the cbranchsi4_insn pattern. */
24136 void
24137 thumb1_final_prescan_insn (rtx_insn *insn)
24139 if (flag_print_asm_name)
24140 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
24141 INSN_ADDRESSES (INSN_UID (insn)));
24142 /* Don't overwrite the previous setter when we get to a cbranch. */
24143 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
24145 enum attr_conds conds;
24147 if (cfun->machine->thumb1_cc_insn)
24149 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
24150 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
24151 CC_STATUS_INIT;
24153 conds = get_attr_conds (insn);
24154 if (conds == CONDS_SET)
24156 rtx set = single_set (insn);
24157 cfun->machine->thumb1_cc_insn = insn;
24158 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
24159 cfun->machine->thumb1_cc_op1 = const0_rtx;
24160 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
24161 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
24163 rtx src1 = XEXP (SET_SRC (set), 1);
24164 if (src1 == const0_rtx)
24165 cfun->machine->thumb1_cc_mode = CCmode;
24167 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
24169 /* Record the src register operand instead of dest because
24170 cprop_hardreg pass propagates src. */
24171 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
24174 else if (conds != CONDS_NOCOND)
24175 cfun->machine->thumb1_cc_insn = NULL_RTX;
24178 /* Check if unexpected far jump is used. */
24179 if (cfun->machine->lr_save_eliminated
24180 && get_attr_far_jump (insn) == FAR_JUMP_YES)
24181 internal_error("Unexpected thumb1 far jump");
24185 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
24187 unsigned HOST_WIDE_INT mask = 0xff;
24188 int i;
24190 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
24191 if (val == 0) /* XXX */
24192 return 0;
24194 for (i = 0; i < 25; i++)
24195 if ((val & (mask << i)) == val)
24196 return 1;
24198 return 0;
24201 /* Returns nonzero if the current function contains,
24202 or might contain a far jump. */
24203 static int
24204 thumb_far_jump_used_p (void)
24206 rtx_insn *insn;
24207 bool far_jump = false;
24208 unsigned int func_size = 0;
24210 /* If we have already decided that far jumps may be used,
24211 do not bother checking again, and always return true even if
24212 it turns out that they are not being used. Once we have made
24213 the decision that far jumps are present (and that hence the link
24214 register will be pushed onto the stack) we cannot go back on it. */
24215 if (cfun->machine->far_jump_used)
24216 return 1;
24218 /* If this function is not being called from the prologue/epilogue
24219 generation code then it must be being called from the
24220 INITIAL_ELIMINATION_OFFSET macro. */
24221 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
24223 /* In this case we know that we are being asked about the elimination
24224 of the arg pointer register. If that register is not being used,
24225 then there are no arguments on the stack, and we do not have to
24226 worry that a far jump might force the prologue to push the link
24227 register, changing the stack offsets. In this case we can just
24228 return false, since the presence of far jumps in the function will
24229 not affect stack offsets.
24231 If the arg pointer is live (or if it was live, but has now been
24232 eliminated and so set to dead) then we do have to test to see if
24233 the function might contain a far jump. This test can lead to some
24234 false negatives, since before reload is completed, then length of
24235 branch instructions is not known, so gcc defaults to returning their
24236 longest length, which in turn sets the far jump attribute to true.
24238 A false negative will not result in bad code being generated, but it
24239 will result in a needless push and pop of the link register. We
24240 hope that this does not occur too often.
24242 If we need doubleword stack alignment this could affect the other
24243 elimination offsets so we can't risk getting it wrong. */
24244 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
24245 cfun->machine->arg_pointer_live = 1;
24246 else if (!cfun->machine->arg_pointer_live)
24247 return 0;
24250 /* We should not change far_jump_used during or after reload, as there is
24251 no chance to change stack frame layout. */
24252 if (reload_in_progress || reload_completed)
24253 return 0;
24255 /* Check to see if the function contains a branch
24256 insn with the far jump attribute set. */
24257 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
24259 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
24261 far_jump = true;
24263 func_size += get_attr_length (insn);
24266 /* Attribute far_jump will always be true for thumb1 before
24267 shorten_branch pass. So checking far_jump attribute before
24268 shorten_branch isn't much useful.
24270 Following heuristic tries to estimate more accurately if a far jump
24271 may finally be used. The heuristic is very conservative as there is
24272 no chance to roll-back the decision of not to use far jump.
24274 Thumb1 long branch offset is -2048 to 2046. The worst case is each
24275 2-byte insn is associated with a 4 byte constant pool. Using
24276 function size 2048/3 as the threshold is conservative enough. */
24277 if (far_jump)
24279 if ((func_size * 3) >= 2048)
24281 /* Record the fact that we have decided that
24282 the function does use far jumps. */
24283 cfun->machine->far_jump_used = 1;
24284 return 1;
24288 return 0;
24291 /* Return nonzero if FUNC must be entered in ARM mode. */
24292 static bool
24293 is_called_in_ARM_mode (tree func)
24295 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
24297 /* Ignore the problem about functions whose address is taken. */
24298 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
24299 return true;
24301 #ifdef ARM_PE
24302 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
24303 #else
24304 return false;
24305 #endif
24308 /* Given the stack offsets and register mask in OFFSETS, decide how
24309 many additional registers to push instead of subtracting a constant
24310 from SP. For epilogues the principle is the same except we use pop.
24311 FOR_PROLOGUE indicates which we're generating. */
24312 static int
24313 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
24315 HOST_WIDE_INT amount;
24316 unsigned long live_regs_mask = offsets->saved_regs_mask;
24317 /* Extract a mask of the ones we can give to the Thumb's push/pop
24318 instruction. */
24319 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
24320 /* Then count how many other high registers will need to be pushed. */
24321 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24322 int n_free, reg_base, size;
24324 if (!for_prologue && frame_pointer_needed)
24325 amount = offsets->locals_base - offsets->saved_regs;
24326 else
24327 amount = offsets->outgoing_args - offsets->saved_regs;
24329 /* If the stack frame size is 512 exactly, we can save one load
24330 instruction, which should make this a win even when optimizing
24331 for speed. */
24332 if (!optimize_size && amount != 512)
24333 return 0;
24335 /* Can't do this if there are high registers to push. */
24336 if (high_regs_pushed != 0)
24337 return 0;
24339 /* Shouldn't do it in the prologue if no registers would normally
24340 be pushed at all. In the epilogue, also allow it if we'll have
24341 a pop insn for the PC. */
24342 if (l_mask == 0
24343 && (for_prologue
24344 || TARGET_BACKTRACE
24345 || (live_regs_mask & 1 << LR_REGNUM) == 0
24346 || TARGET_INTERWORK
24347 || crtl->args.pretend_args_size != 0))
24348 return 0;
24350 /* Don't do this if thumb_expand_prologue wants to emit instructions
24351 between the push and the stack frame allocation. */
24352 if (for_prologue
24353 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
24354 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
24355 return 0;
24357 reg_base = 0;
24358 n_free = 0;
24359 if (!for_prologue)
24361 size = arm_size_return_regs ();
24362 reg_base = ARM_NUM_INTS (size);
24363 live_regs_mask >>= reg_base;
24366 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
24367 && (for_prologue || call_used_regs[reg_base + n_free]))
24369 live_regs_mask >>= 1;
24370 n_free++;
24373 if (n_free == 0)
24374 return 0;
24375 gcc_assert (amount / 4 * 4 == amount);
24377 if (amount >= 512 && (amount - n_free * 4) < 512)
24378 return (amount - 508) / 4;
24379 if (amount <= n_free * 4)
24380 return amount / 4;
24381 return 0;
24384 /* The bits which aren't usefully expanded as rtl. */
24385 const char *
24386 thumb1_unexpanded_epilogue (void)
24388 arm_stack_offsets *offsets;
24389 int regno;
24390 unsigned long live_regs_mask = 0;
24391 int high_regs_pushed = 0;
24392 int extra_pop;
24393 int had_to_push_lr;
24394 int size;
24396 if (cfun->machine->return_used_this_function != 0)
24397 return "";
24399 if (IS_NAKED (arm_current_func_type ()))
24400 return "";
24402 offsets = arm_get_frame_offsets ();
24403 live_regs_mask = offsets->saved_regs_mask;
24404 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24406 /* If we can deduce the registers used from the function's return value.
24407 This is more reliable that examining df_regs_ever_live_p () because that
24408 will be set if the register is ever used in the function, not just if
24409 the register is used to hold a return value. */
24410 size = arm_size_return_regs ();
24412 extra_pop = thumb1_extra_regs_pushed (offsets, false);
24413 if (extra_pop > 0)
24415 unsigned long extra_mask = (1 << extra_pop) - 1;
24416 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
24419 /* The prolog may have pushed some high registers to use as
24420 work registers. e.g. the testsuite file:
24421 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24422 compiles to produce:
24423 push {r4, r5, r6, r7, lr}
24424 mov r7, r9
24425 mov r6, r8
24426 push {r6, r7}
24427 as part of the prolog. We have to undo that pushing here. */
24429 if (high_regs_pushed)
24431 unsigned long mask = live_regs_mask & 0xff;
24432 int next_hi_reg;
24434 /* The available low registers depend on the size of the value we are
24435 returning. */
24436 if (size <= 12)
24437 mask |= 1 << 3;
24438 if (size <= 8)
24439 mask |= 1 << 2;
24441 if (mask == 0)
24442 /* Oh dear! We have no low registers into which we can pop
24443 high registers! */
24444 internal_error
24445 ("no low registers available for popping high registers");
24447 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
24448 if (live_regs_mask & (1 << next_hi_reg))
24449 break;
24451 while (high_regs_pushed)
24453 /* Find lo register(s) into which the high register(s) can
24454 be popped. */
24455 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24457 if (mask & (1 << regno))
24458 high_regs_pushed--;
24459 if (high_regs_pushed == 0)
24460 break;
24463 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
24465 /* Pop the values into the low register(s). */
24466 thumb_pop (asm_out_file, mask);
24468 /* Move the value(s) into the high registers. */
24469 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24471 if (mask & (1 << regno))
24473 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
24474 regno);
24476 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
24477 if (live_regs_mask & (1 << next_hi_reg))
24478 break;
24482 live_regs_mask &= ~0x0f00;
24485 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
24486 live_regs_mask &= 0xff;
24488 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
24490 /* Pop the return address into the PC. */
24491 if (had_to_push_lr)
24492 live_regs_mask |= 1 << PC_REGNUM;
24494 /* Either no argument registers were pushed or a backtrace
24495 structure was created which includes an adjusted stack
24496 pointer, so just pop everything. */
24497 if (live_regs_mask)
24498 thumb_pop (asm_out_file, live_regs_mask);
24500 /* We have either just popped the return address into the
24501 PC or it is was kept in LR for the entire function.
24502 Note that thumb_pop has already called thumb_exit if the
24503 PC was in the list. */
24504 if (!had_to_push_lr)
24505 thumb_exit (asm_out_file, LR_REGNUM);
24507 else
24509 /* Pop everything but the return address. */
24510 if (live_regs_mask)
24511 thumb_pop (asm_out_file, live_regs_mask);
24513 if (had_to_push_lr)
24515 if (size > 12)
24517 /* We have no free low regs, so save one. */
24518 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
24519 LAST_ARG_REGNUM);
24522 /* Get the return address into a temporary register. */
24523 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
24525 if (size > 12)
24527 /* Move the return address to lr. */
24528 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
24529 LAST_ARG_REGNUM);
24530 /* Restore the low register. */
24531 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
24532 IP_REGNUM);
24533 regno = LR_REGNUM;
24535 else
24536 regno = LAST_ARG_REGNUM;
24538 else
24539 regno = LR_REGNUM;
24541 /* Remove the argument registers that were pushed onto the stack. */
24542 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
24543 SP_REGNUM, SP_REGNUM,
24544 crtl->args.pretend_args_size);
24546 thumb_exit (asm_out_file, regno);
24549 return "";
24552 /* Functions to save and restore machine-specific function data. */
24553 static struct machine_function *
24554 arm_init_machine_status (void)
24556 struct machine_function *machine;
24557 machine = ggc_cleared_alloc<machine_function> ();
24559 #if ARM_FT_UNKNOWN != 0
24560 machine->func_type = ARM_FT_UNKNOWN;
24561 #endif
24562 return machine;
24565 /* Return an RTX indicating where the return address to the
24566 calling function can be found. */
24568 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
24570 if (count != 0)
24571 return NULL_RTX;
24573 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
24576 /* Do anything needed before RTL is emitted for each function. */
24577 void
24578 arm_init_expanders (void)
24580 /* Arrange to initialize and mark the machine per-function status. */
24581 init_machine_status = arm_init_machine_status;
24583 /* This is to stop the combine pass optimizing away the alignment
24584 adjustment of va_arg. */
24585 /* ??? It is claimed that this should not be necessary. */
24586 if (cfun)
24587 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
24590 /* Check that FUNC is called with a different mode. */
24592 bool
24593 arm_change_mode_p (tree func)
24595 if (TREE_CODE (func) != FUNCTION_DECL)
24596 return false;
24598 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
24600 if (!callee_tree)
24601 callee_tree = target_option_default_node;
24603 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
24604 int flags = callee_opts->x_target_flags;
24606 return (TARGET_THUMB_P (flags) != TARGET_THUMB);
24609 /* Like arm_compute_initial_elimination offset. Simpler because there
24610 isn't an ABI specified frame pointer for Thumb. Instead, we set it
24611 to point at the base of the local variables after static stack
24612 space for a function has been allocated. */
24614 HOST_WIDE_INT
24615 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
24617 arm_stack_offsets *offsets;
24619 offsets = arm_get_frame_offsets ();
24621 switch (from)
24623 case ARG_POINTER_REGNUM:
24624 switch (to)
24626 case STACK_POINTER_REGNUM:
24627 return offsets->outgoing_args - offsets->saved_args;
24629 case FRAME_POINTER_REGNUM:
24630 return offsets->soft_frame - offsets->saved_args;
24632 case ARM_HARD_FRAME_POINTER_REGNUM:
24633 return offsets->saved_regs - offsets->saved_args;
24635 case THUMB_HARD_FRAME_POINTER_REGNUM:
24636 return offsets->locals_base - offsets->saved_args;
24638 default:
24639 gcc_unreachable ();
24641 break;
24643 case FRAME_POINTER_REGNUM:
24644 switch (to)
24646 case STACK_POINTER_REGNUM:
24647 return offsets->outgoing_args - offsets->soft_frame;
24649 case ARM_HARD_FRAME_POINTER_REGNUM:
24650 return offsets->saved_regs - offsets->soft_frame;
24652 case THUMB_HARD_FRAME_POINTER_REGNUM:
24653 return offsets->locals_base - offsets->soft_frame;
24655 default:
24656 gcc_unreachable ();
24658 break;
24660 default:
24661 gcc_unreachable ();
24665 /* Generate the function's prologue. */
24667 void
24668 thumb1_expand_prologue (void)
24670 rtx_insn *insn;
24672 HOST_WIDE_INT amount;
24673 HOST_WIDE_INT size;
24674 arm_stack_offsets *offsets;
24675 unsigned long func_type;
24676 int regno;
24677 unsigned long live_regs_mask;
24678 unsigned long l_mask;
24679 unsigned high_regs_pushed = 0;
24680 bool lr_needs_saving;
24682 func_type = arm_current_func_type ();
24684 /* Naked functions don't have prologues. */
24685 if (IS_NAKED (func_type))
24687 if (flag_stack_usage_info)
24688 current_function_static_stack_size = 0;
24689 return;
24692 if (IS_INTERRUPT (func_type))
24694 error ("interrupt Service Routines cannot be coded in Thumb mode");
24695 return;
24698 if (is_called_in_ARM_mode (current_function_decl))
24699 emit_insn (gen_prologue_thumb1_interwork ());
24701 offsets = arm_get_frame_offsets ();
24702 live_regs_mask = offsets->saved_regs_mask;
24703 lr_needs_saving = live_regs_mask & (1 << LR_REGNUM);
24705 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
24706 l_mask = live_regs_mask & 0x40ff;
24707 /* Then count how many other high registers will need to be pushed. */
24708 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24710 if (crtl->args.pretend_args_size)
24712 rtx x = GEN_INT (-crtl->args.pretend_args_size);
24714 if (cfun->machine->uses_anonymous_args)
24716 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
24717 unsigned long mask;
24719 mask = 1ul << (LAST_ARG_REGNUM + 1);
24720 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
24722 insn = thumb1_emit_multi_reg_push (mask, 0);
24724 else
24726 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24727 stack_pointer_rtx, x));
24729 RTX_FRAME_RELATED_P (insn) = 1;
24732 if (TARGET_BACKTRACE)
24734 HOST_WIDE_INT offset = 0;
24735 unsigned work_register;
24736 rtx work_reg, x, arm_hfp_rtx;
24738 /* We have been asked to create a stack backtrace structure.
24739 The code looks like this:
24741 0 .align 2
24742 0 func:
24743 0 sub SP, #16 Reserve space for 4 registers.
24744 2 push {R7} Push low registers.
24745 4 add R7, SP, #20 Get the stack pointer before the push.
24746 6 str R7, [SP, #8] Store the stack pointer
24747 (before reserving the space).
24748 8 mov R7, PC Get hold of the start of this code + 12.
24749 10 str R7, [SP, #16] Store it.
24750 12 mov R7, FP Get hold of the current frame pointer.
24751 14 str R7, [SP, #4] Store it.
24752 16 mov R7, LR Get hold of the current return address.
24753 18 str R7, [SP, #12] Store it.
24754 20 add R7, SP, #16 Point at the start of the
24755 backtrace structure.
24756 22 mov FP, R7 Put this value into the frame pointer. */
24758 work_register = thumb_find_work_register (live_regs_mask);
24759 work_reg = gen_rtx_REG (SImode, work_register);
24760 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
24762 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24763 stack_pointer_rtx, GEN_INT (-16)));
24764 RTX_FRAME_RELATED_P (insn) = 1;
24766 if (l_mask)
24768 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
24769 RTX_FRAME_RELATED_P (insn) = 1;
24770 lr_needs_saving = false;
24772 offset = bit_count (l_mask) * UNITS_PER_WORD;
24775 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
24776 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24778 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
24779 x = gen_frame_mem (SImode, x);
24780 emit_move_insn (x, work_reg);
24782 /* Make sure that the instruction fetching the PC is in the right place
24783 to calculate "start of backtrace creation code + 12". */
24784 /* ??? The stores using the common WORK_REG ought to be enough to
24785 prevent the scheduler from doing anything weird. Failing that
24786 we could always move all of the following into an UNSPEC_VOLATILE. */
24787 if (l_mask)
24789 x = gen_rtx_REG (SImode, PC_REGNUM);
24790 emit_move_insn (work_reg, x);
24792 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24793 x = gen_frame_mem (SImode, x);
24794 emit_move_insn (x, work_reg);
24796 emit_move_insn (work_reg, arm_hfp_rtx);
24798 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24799 x = gen_frame_mem (SImode, x);
24800 emit_move_insn (x, work_reg);
24802 else
24804 emit_move_insn (work_reg, arm_hfp_rtx);
24806 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24807 x = gen_frame_mem (SImode, x);
24808 emit_move_insn (x, work_reg);
24810 x = gen_rtx_REG (SImode, PC_REGNUM);
24811 emit_move_insn (work_reg, x);
24813 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24814 x = gen_frame_mem (SImode, x);
24815 emit_move_insn (x, work_reg);
24818 x = gen_rtx_REG (SImode, LR_REGNUM);
24819 emit_move_insn (work_reg, x);
24821 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
24822 x = gen_frame_mem (SImode, x);
24823 emit_move_insn (x, work_reg);
24825 x = GEN_INT (offset + 12);
24826 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24828 emit_move_insn (arm_hfp_rtx, work_reg);
24830 /* Optimization: If we are not pushing any low registers but we are going
24831 to push some high registers then delay our first push. This will just
24832 be a push of LR and we can combine it with the push of the first high
24833 register. */
24834 else if ((l_mask & 0xff) != 0
24835 || (high_regs_pushed == 0 && lr_needs_saving))
24837 unsigned long mask = l_mask;
24838 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
24839 insn = thumb1_emit_multi_reg_push (mask, mask);
24840 RTX_FRAME_RELATED_P (insn) = 1;
24841 lr_needs_saving = false;
24844 if (high_regs_pushed)
24846 unsigned pushable_regs;
24847 unsigned next_hi_reg;
24848 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
24849 : crtl->args.info.nregs;
24850 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
24852 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
24853 if (live_regs_mask & (1 << next_hi_reg))
24854 break;
24856 /* Here we need to mask out registers used for passing arguments
24857 even if they can be pushed. This is to avoid using them to stash the high
24858 registers. Such kind of stash may clobber the use of arguments. */
24859 pushable_regs = l_mask & (~arg_regs_mask);
24860 if (lr_needs_saving)
24861 pushable_regs &= ~(1 << LR_REGNUM);
24863 if (pushable_regs == 0)
24864 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
24866 while (high_regs_pushed > 0)
24868 unsigned long real_regs_mask = 0;
24869 unsigned long push_mask = 0;
24871 for (regno = LR_REGNUM; regno >= 0; regno --)
24873 if (pushable_regs & (1 << regno))
24875 emit_move_insn (gen_rtx_REG (SImode, regno),
24876 gen_rtx_REG (SImode, next_hi_reg));
24878 high_regs_pushed --;
24879 real_regs_mask |= (1 << next_hi_reg);
24880 push_mask |= (1 << regno);
24882 if (high_regs_pushed)
24884 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
24885 next_hi_reg --)
24886 if (live_regs_mask & (1 << next_hi_reg))
24887 break;
24889 else
24890 break;
24894 /* If we had to find a work register and we have not yet
24895 saved the LR then add it to the list of regs to push. */
24896 if (lr_needs_saving)
24898 push_mask |= 1 << LR_REGNUM;
24899 real_regs_mask |= 1 << LR_REGNUM;
24900 lr_needs_saving = false;
24903 insn = thumb1_emit_multi_reg_push (push_mask, real_regs_mask);
24904 RTX_FRAME_RELATED_P (insn) = 1;
24908 /* Load the pic register before setting the frame pointer,
24909 so we can use r7 as a temporary work register. */
24910 if (flag_pic && arm_pic_register != INVALID_REGNUM)
24911 arm_load_pic_register (live_regs_mask);
24913 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
24914 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
24915 stack_pointer_rtx);
24917 size = offsets->outgoing_args - offsets->saved_args;
24918 if (flag_stack_usage_info)
24919 current_function_static_stack_size = size;
24921 /* If we have a frame, then do stack checking. FIXME: not implemented. */
24922 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK && size)
24923 sorry ("-fstack-check=specific for Thumb-1");
24925 amount = offsets->outgoing_args - offsets->saved_regs;
24926 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
24927 if (amount)
24929 if (amount < 512)
24931 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
24932 GEN_INT (- amount)));
24933 RTX_FRAME_RELATED_P (insn) = 1;
24935 else
24937 rtx reg, dwarf;
24939 /* The stack decrement is too big for an immediate value in a single
24940 insn. In theory we could issue multiple subtracts, but after
24941 three of them it becomes more space efficient to place the full
24942 value in the constant pool and load into a register. (Also the
24943 ARM debugger really likes to see only one stack decrement per
24944 function). So instead we look for a scratch register into which
24945 we can load the decrement, and then we subtract this from the
24946 stack pointer. Unfortunately on the thumb the only available
24947 scratch registers are the argument registers, and we cannot use
24948 these as they may hold arguments to the function. Instead we
24949 attempt to locate a call preserved register which is used by this
24950 function. If we can find one, then we know that it will have
24951 been pushed at the start of the prologue and so we can corrupt
24952 it now. */
24953 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
24954 if (live_regs_mask & (1 << regno))
24955 break;
24957 gcc_assert(regno <= LAST_LO_REGNUM);
24959 reg = gen_rtx_REG (SImode, regno);
24961 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
24963 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24964 stack_pointer_rtx, reg));
24966 dwarf = gen_rtx_SET (stack_pointer_rtx,
24967 plus_constant (Pmode, stack_pointer_rtx,
24968 -amount));
24969 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
24970 RTX_FRAME_RELATED_P (insn) = 1;
24974 if (frame_pointer_needed)
24975 thumb_set_frame_pointer (offsets);
24977 /* If we are profiling, make sure no instructions are scheduled before
24978 the call to mcount. Similarly if the user has requested no
24979 scheduling in the prolog. Similarly if we want non-call exceptions
24980 using the EABI unwinder, to prevent faulting instructions from being
24981 swapped with a stack adjustment. */
24982 if (crtl->profile || !TARGET_SCHED_PROLOG
24983 || (arm_except_unwind_info (&global_options) == UI_TARGET
24984 && cfun->can_throw_non_call_exceptions))
24985 emit_insn (gen_blockage ());
24987 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
24988 if (live_regs_mask & 0xff)
24989 cfun->machine->lr_save_eliminated = 0;
24992 /* Clear caller saved registers not used to pass return values and leaked
24993 condition flags before exiting a cmse_nonsecure_entry function. */
24995 void
24996 cmse_nonsecure_entry_clear_before_return (void)
24998 uint64_t to_clear_mask[2];
24999 uint32_t padding_bits_to_clear = 0;
25000 uint32_t * padding_bits_to_clear_ptr = &padding_bits_to_clear;
25001 int regno, maxregno = IP_REGNUM;
25002 tree result_type;
25003 rtx result_rtl;
25005 to_clear_mask[0] = (1ULL << (NUM_ARG_REGS)) - 1;
25006 to_clear_mask[0] |= (1ULL << IP_REGNUM);
25008 /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
25009 registers. We also check that TARGET_HARD_FLOAT and !TARGET_THUMB1 hold
25010 to make sure the instructions used to clear them are present. */
25011 if (TARGET_HARD_FLOAT && !TARGET_THUMB1)
25013 uint64_t float_mask = (1ULL << (D7_VFP_REGNUM + 1)) - 1;
25014 maxregno = LAST_VFP_REGNUM;
25016 float_mask &= ~((1ULL << FIRST_VFP_REGNUM) - 1);
25017 to_clear_mask[0] |= float_mask;
25019 float_mask = (1ULL << (maxregno - 63)) - 1;
25020 to_clear_mask[1] = float_mask;
25022 /* Make sure we don't clear the two scratch registers used to clear the
25023 relevant FPSCR bits in output_return_instruction. */
25024 emit_use (gen_rtx_REG (SImode, IP_REGNUM));
25025 to_clear_mask[0] &= ~(1ULL << IP_REGNUM);
25026 emit_use (gen_rtx_REG (SImode, 4));
25027 to_clear_mask[0] &= ~(1ULL << 4);
25030 /* If the user has defined registers to be caller saved, these are no longer
25031 restored by the function before returning and must thus be cleared for
25032 security purposes. */
25033 for (regno = NUM_ARG_REGS; regno < LAST_VFP_REGNUM; regno++)
25035 /* We do not touch registers that can be used to pass arguments as per
25036 the AAPCS, since these should never be made callee-saved by user
25037 options. */
25038 if (IN_RANGE (regno, FIRST_VFP_REGNUM, D7_VFP_REGNUM))
25039 continue;
25040 if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM))
25041 continue;
25042 if (call_used_regs[regno])
25043 to_clear_mask[regno / 64] |= (1ULL << (regno % 64));
25046 /* Make sure we do not clear the registers used to return the result in. */
25047 result_type = TREE_TYPE (DECL_RESULT (current_function_decl));
25048 if (!VOID_TYPE_P (result_type))
25050 result_rtl = arm_function_value (result_type, current_function_decl, 0);
25052 /* No need to check that we return in registers, because we don't
25053 support returning on stack yet. */
25054 to_clear_mask[0]
25055 &= ~compute_not_to_clear_mask (result_type, result_rtl, 0,
25056 padding_bits_to_clear_ptr);
25059 if (padding_bits_to_clear != 0)
25061 rtx reg_rtx;
25062 /* Padding bits to clear is not 0 so we know we are dealing with
25063 returning a composite type, which only uses r0. Let's make sure that
25064 r1-r3 is cleared too, we will use r1 as a scratch register. */
25065 gcc_assert ((to_clear_mask[0] & 0xe) == 0xe);
25067 reg_rtx = gen_rtx_REG (SImode, R1_REGNUM);
25069 /* Fill the lower half of the negated padding_bits_to_clear. */
25070 emit_move_insn (reg_rtx,
25071 GEN_INT ((((~padding_bits_to_clear) << 16u) >> 16u)));
25073 /* Also fill the top half of the negated padding_bits_to_clear. */
25074 if (((~padding_bits_to_clear) >> 16) > 0)
25075 emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode, reg_rtx,
25076 GEN_INT (16),
25077 GEN_INT (16)),
25078 GEN_INT ((~padding_bits_to_clear) >> 16)));
25080 emit_insn (gen_andsi3 (gen_rtx_REG (SImode, R0_REGNUM),
25081 gen_rtx_REG (SImode, R0_REGNUM),
25082 reg_rtx));
25085 for (regno = R0_REGNUM; regno <= maxregno; regno++)
25087 if (!(to_clear_mask[regno / 64] & (1ULL << (regno % 64))))
25088 continue;
25090 if (IS_VFP_REGNUM (regno))
25092 /* If regno is an even vfp register and its successor is also to
25093 be cleared, use vmov. */
25094 if (TARGET_VFP_DOUBLE
25095 && VFP_REGNO_OK_FOR_DOUBLE (regno)
25096 && to_clear_mask[regno / 64] & (1ULL << ((regno % 64) + 1)))
25098 emit_move_insn (gen_rtx_REG (DFmode, regno),
25099 CONST1_RTX (DFmode));
25100 emit_use (gen_rtx_REG (DFmode, regno));
25101 regno++;
25103 else
25105 emit_move_insn (gen_rtx_REG (SFmode, regno),
25106 CONST1_RTX (SFmode));
25107 emit_use (gen_rtx_REG (SFmode, regno));
25110 else
25112 if (TARGET_THUMB1)
25114 if (regno == R0_REGNUM)
25115 emit_move_insn (gen_rtx_REG (SImode, regno),
25116 const0_rtx);
25117 else
25118 /* R0 has either been cleared before, see code above, or it
25119 holds a return value, either way it is not secret
25120 information. */
25121 emit_move_insn (gen_rtx_REG (SImode, regno),
25122 gen_rtx_REG (SImode, R0_REGNUM));
25123 emit_use (gen_rtx_REG (SImode, regno));
25125 else
25127 emit_move_insn (gen_rtx_REG (SImode, regno),
25128 gen_rtx_REG (SImode, LR_REGNUM));
25129 emit_use (gen_rtx_REG (SImode, regno));
25135 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
25136 POP instruction can be generated. LR should be replaced by PC. All
25137 the checks required are already done by USE_RETURN_INSN (). Hence,
25138 all we really need to check here is if single register is to be
25139 returned, or multiple register return. */
25140 void
25141 thumb2_expand_return (bool simple_return)
25143 int i, num_regs;
25144 unsigned long saved_regs_mask;
25145 arm_stack_offsets *offsets;
25147 offsets = arm_get_frame_offsets ();
25148 saved_regs_mask = offsets->saved_regs_mask;
25150 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
25151 if (saved_regs_mask & (1 << i))
25152 num_regs++;
25154 if (!simple_return && saved_regs_mask)
25156 /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
25157 functions or adapt code to handle according to ACLE. This path should
25158 not be reachable for cmse_nonsecure_entry functions though we prefer
25159 to assert it for now to ensure that future code changes do not silently
25160 change this behavior. */
25161 gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
25162 if (num_regs == 1)
25164 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25165 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
25166 rtx addr = gen_rtx_MEM (SImode,
25167 gen_rtx_POST_INC (SImode,
25168 stack_pointer_rtx));
25169 set_mem_alias_set (addr, get_frame_alias_set ());
25170 XVECEXP (par, 0, 0) = ret_rtx;
25171 XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
25172 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
25173 emit_jump_insn (par);
25175 else
25177 saved_regs_mask &= ~ (1 << LR_REGNUM);
25178 saved_regs_mask |= (1 << PC_REGNUM);
25179 arm_emit_multi_reg_pop (saved_regs_mask);
25182 else
25184 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25185 cmse_nonsecure_entry_clear_before_return ();
25186 emit_jump_insn (simple_return_rtx);
25190 void
25191 thumb1_expand_epilogue (void)
25193 HOST_WIDE_INT amount;
25194 arm_stack_offsets *offsets;
25195 int regno;
25197 /* Naked functions don't have prologues. */
25198 if (IS_NAKED (arm_current_func_type ()))
25199 return;
25201 offsets = arm_get_frame_offsets ();
25202 amount = offsets->outgoing_args - offsets->saved_regs;
25204 if (frame_pointer_needed)
25206 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
25207 amount = offsets->locals_base - offsets->saved_regs;
25209 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
25211 gcc_assert (amount >= 0);
25212 if (amount)
25214 emit_insn (gen_blockage ());
25216 if (amount < 512)
25217 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25218 GEN_INT (amount)));
25219 else
25221 /* r3 is always free in the epilogue. */
25222 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
25224 emit_insn (gen_movsi (reg, GEN_INT (amount)));
25225 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
25229 /* Emit a USE (stack_pointer_rtx), so that
25230 the stack adjustment will not be deleted. */
25231 emit_insn (gen_force_register_use (stack_pointer_rtx));
25233 if (crtl->profile || !TARGET_SCHED_PROLOG)
25234 emit_insn (gen_blockage ());
25236 /* Emit a clobber for each insn that will be restored in the epilogue,
25237 so that flow2 will get register lifetimes correct. */
25238 for (regno = 0; regno < 13; regno++)
25239 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
25240 emit_clobber (gen_rtx_REG (SImode, regno));
25242 if (! df_regs_ever_live_p (LR_REGNUM))
25243 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
25245 /* Clear all caller-saved regs that are not used to return. */
25246 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25247 cmse_nonsecure_entry_clear_before_return ();
25250 /* Epilogue code for APCS frame. */
25251 static void
25252 arm_expand_epilogue_apcs_frame (bool really_return)
25254 unsigned long func_type;
25255 unsigned long saved_regs_mask;
25256 int num_regs = 0;
25257 int i;
25258 int floats_from_frame = 0;
25259 arm_stack_offsets *offsets;
25261 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
25262 func_type = arm_current_func_type ();
25264 /* Get frame offsets for ARM. */
25265 offsets = arm_get_frame_offsets ();
25266 saved_regs_mask = offsets->saved_regs_mask;
25268 /* Find the offset of the floating-point save area in the frame. */
25269 floats_from_frame
25270 = (offsets->saved_args
25271 + arm_compute_static_chain_stack_bytes ()
25272 - offsets->frame);
25274 /* Compute how many core registers saved and how far away the floats are. */
25275 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25276 if (saved_regs_mask & (1 << i))
25278 num_regs++;
25279 floats_from_frame += 4;
25282 if (TARGET_HARD_FLOAT)
25284 int start_reg;
25285 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
25287 /* The offset is from IP_REGNUM. */
25288 int saved_size = arm_get_vfp_saved_size ();
25289 if (saved_size > 0)
25291 rtx_insn *insn;
25292 floats_from_frame += saved_size;
25293 insn = emit_insn (gen_addsi3 (ip_rtx,
25294 hard_frame_pointer_rtx,
25295 GEN_INT (-floats_from_frame)));
25296 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
25297 ip_rtx, hard_frame_pointer_rtx);
25300 /* Generate VFP register multi-pop. */
25301 start_reg = FIRST_VFP_REGNUM;
25303 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
25304 /* Look for a case where a reg does not need restoring. */
25305 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25306 && (!df_regs_ever_live_p (i + 1)
25307 || call_used_regs[i + 1]))
25309 if (start_reg != i)
25310 arm_emit_vfp_multi_reg_pop (start_reg,
25311 (i - start_reg) / 2,
25312 gen_rtx_REG (SImode,
25313 IP_REGNUM));
25314 start_reg = i + 2;
25317 /* Restore the remaining regs that we have discovered (or possibly
25318 even all of them, if the conditional in the for loop never
25319 fired). */
25320 if (start_reg != i)
25321 arm_emit_vfp_multi_reg_pop (start_reg,
25322 (i - start_reg) / 2,
25323 gen_rtx_REG (SImode, IP_REGNUM));
25326 if (TARGET_IWMMXT)
25328 /* The frame pointer is guaranteed to be non-double-word aligned, as
25329 it is set to double-word-aligned old_stack_pointer - 4. */
25330 rtx_insn *insn;
25331 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
25333 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
25334 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25336 rtx addr = gen_frame_mem (V2SImode,
25337 plus_constant (Pmode, hard_frame_pointer_rtx,
25338 - lrm_count * 4));
25339 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25340 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25341 gen_rtx_REG (V2SImode, i),
25342 NULL_RTX);
25343 lrm_count += 2;
25347 /* saved_regs_mask should contain IP which contains old stack pointer
25348 at the time of activation creation. Since SP and IP are adjacent registers,
25349 we can restore the value directly into SP. */
25350 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
25351 saved_regs_mask &= ~(1 << IP_REGNUM);
25352 saved_regs_mask |= (1 << SP_REGNUM);
25354 /* There are two registers left in saved_regs_mask - LR and PC. We
25355 only need to restore LR (the return address), but to
25356 save time we can load it directly into PC, unless we need a
25357 special function exit sequence, or we are not really returning. */
25358 if (really_return
25359 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
25360 && !crtl->calls_eh_return)
25361 /* Delete LR from the register mask, so that LR on
25362 the stack is loaded into the PC in the register mask. */
25363 saved_regs_mask &= ~(1 << LR_REGNUM);
25364 else
25365 saved_regs_mask &= ~(1 << PC_REGNUM);
25367 num_regs = bit_count (saved_regs_mask);
25368 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
25370 rtx_insn *insn;
25371 emit_insn (gen_blockage ());
25372 /* Unwind the stack to just below the saved registers. */
25373 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25374 hard_frame_pointer_rtx,
25375 GEN_INT (- 4 * num_regs)));
25377 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
25378 stack_pointer_rtx, hard_frame_pointer_rtx);
25381 arm_emit_multi_reg_pop (saved_regs_mask);
25383 if (IS_INTERRUPT (func_type))
25385 /* Interrupt handlers will have pushed the
25386 IP onto the stack, so restore it now. */
25387 rtx_insn *insn;
25388 rtx addr = gen_rtx_MEM (SImode,
25389 gen_rtx_POST_INC (SImode,
25390 stack_pointer_rtx));
25391 set_mem_alias_set (addr, get_frame_alias_set ());
25392 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
25393 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25394 gen_rtx_REG (SImode, IP_REGNUM),
25395 NULL_RTX);
25398 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
25399 return;
25401 if (crtl->calls_eh_return)
25402 emit_insn (gen_addsi3 (stack_pointer_rtx,
25403 stack_pointer_rtx,
25404 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25406 if (IS_STACKALIGN (func_type))
25407 /* Restore the original stack pointer. Before prologue, the stack was
25408 realigned and the original stack pointer saved in r0. For details,
25409 see comment in arm_expand_prologue. */
25410 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25412 emit_jump_insn (simple_return_rtx);
25415 /* Generate RTL to represent ARM epilogue. Really_return is true if the
25416 function is not a sibcall. */
25417 void
25418 arm_expand_epilogue (bool really_return)
25420 unsigned long func_type;
25421 unsigned long saved_regs_mask;
25422 int num_regs = 0;
25423 int i;
25424 int amount;
25425 arm_stack_offsets *offsets;
25427 func_type = arm_current_func_type ();
25429 /* Naked functions don't have epilogue. Hence, generate return pattern, and
25430 let output_return_instruction take care of instruction emission if any. */
25431 if (IS_NAKED (func_type)
25432 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
25434 if (really_return)
25435 emit_jump_insn (simple_return_rtx);
25436 return;
25439 /* If we are throwing an exception, then we really must be doing a
25440 return, so we can't tail-call. */
25441 gcc_assert (!crtl->calls_eh_return || really_return);
25443 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
25445 arm_expand_epilogue_apcs_frame (really_return);
25446 return;
25449 /* Get frame offsets for ARM. */
25450 offsets = arm_get_frame_offsets ();
25451 saved_regs_mask = offsets->saved_regs_mask;
25452 num_regs = bit_count (saved_regs_mask);
25454 if (frame_pointer_needed)
25456 rtx_insn *insn;
25457 /* Restore stack pointer if necessary. */
25458 if (TARGET_ARM)
25460 /* In ARM mode, frame pointer points to first saved register.
25461 Restore stack pointer to last saved register. */
25462 amount = offsets->frame - offsets->saved_regs;
25464 /* Force out any pending memory operations that reference stacked data
25465 before stack de-allocation occurs. */
25466 emit_insn (gen_blockage ());
25467 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25468 hard_frame_pointer_rtx,
25469 GEN_INT (amount)));
25470 arm_add_cfa_adjust_cfa_note (insn, amount,
25471 stack_pointer_rtx,
25472 hard_frame_pointer_rtx);
25474 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25475 deleted. */
25476 emit_insn (gen_force_register_use (stack_pointer_rtx));
25478 else
25480 /* In Thumb-2 mode, the frame pointer points to the last saved
25481 register. */
25482 amount = offsets->locals_base - offsets->saved_regs;
25483 if (amount)
25485 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
25486 hard_frame_pointer_rtx,
25487 GEN_INT (amount)));
25488 arm_add_cfa_adjust_cfa_note (insn, amount,
25489 hard_frame_pointer_rtx,
25490 hard_frame_pointer_rtx);
25493 /* Force out any pending memory operations that reference stacked data
25494 before stack de-allocation occurs. */
25495 emit_insn (gen_blockage ());
25496 insn = emit_insn (gen_movsi (stack_pointer_rtx,
25497 hard_frame_pointer_rtx));
25498 arm_add_cfa_adjust_cfa_note (insn, 0,
25499 stack_pointer_rtx,
25500 hard_frame_pointer_rtx);
25501 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25502 deleted. */
25503 emit_insn (gen_force_register_use (stack_pointer_rtx));
25506 else
25508 /* Pop off outgoing args and local frame to adjust stack pointer to
25509 last saved register. */
25510 amount = offsets->outgoing_args - offsets->saved_regs;
25511 if (amount)
25513 rtx_insn *tmp;
25514 /* Force out any pending memory operations that reference stacked data
25515 before stack de-allocation occurs. */
25516 emit_insn (gen_blockage ());
25517 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
25518 stack_pointer_rtx,
25519 GEN_INT (amount)));
25520 arm_add_cfa_adjust_cfa_note (tmp, amount,
25521 stack_pointer_rtx, stack_pointer_rtx);
25522 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25523 not deleted. */
25524 emit_insn (gen_force_register_use (stack_pointer_rtx));
25528 if (TARGET_HARD_FLOAT)
25530 /* Generate VFP register multi-pop. */
25531 int end_reg = LAST_VFP_REGNUM + 1;
25533 /* Scan the registers in reverse order. We need to match
25534 any groupings made in the prologue and generate matching
25535 vldm operations. The need to match groups is because,
25536 unlike pop, vldm can only do consecutive regs. */
25537 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
25538 /* Look for a case where a reg does not need restoring. */
25539 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25540 && (!df_regs_ever_live_p (i + 1)
25541 || call_used_regs[i + 1]))
25543 /* Restore the regs discovered so far (from reg+2 to
25544 end_reg). */
25545 if (end_reg > i + 2)
25546 arm_emit_vfp_multi_reg_pop (i + 2,
25547 (end_reg - (i + 2)) / 2,
25548 stack_pointer_rtx);
25549 end_reg = i;
25552 /* Restore the remaining regs that we have discovered (or possibly
25553 even all of them, if the conditional in the for loop never
25554 fired). */
25555 if (end_reg > i + 2)
25556 arm_emit_vfp_multi_reg_pop (i + 2,
25557 (end_reg - (i + 2)) / 2,
25558 stack_pointer_rtx);
25561 if (TARGET_IWMMXT)
25562 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
25563 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25565 rtx_insn *insn;
25566 rtx addr = gen_rtx_MEM (V2SImode,
25567 gen_rtx_POST_INC (SImode,
25568 stack_pointer_rtx));
25569 set_mem_alias_set (addr, get_frame_alias_set ());
25570 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25571 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25572 gen_rtx_REG (V2SImode, i),
25573 NULL_RTX);
25574 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25575 stack_pointer_rtx, stack_pointer_rtx);
25578 if (saved_regs_mask)
25580 rtx insn;
25581 bool return_in_pc = false;
25583 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
25584 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
25585 && !IS_CMSE_ENTRY (func_type)
25586 && !IS_STACKALIGN (func_type)
25587 && really_return
25588 && crtl->args.pretend_args_size == 0
25589 && saved_regs_mask & (1 << LR_REGNUM)
25590 && !crtl->calls_eh_return)
25592 saved_regs_mask &= ~(1 << LR_REGNUM);
25593 saved_regs_mask |= (1 << PC_REGNUM);
25594 return_in_pc = true;
25597 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
25599 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25600 if (saved_regs_mask & (1 << i))
25602 rtx addr = gen_rtx_MEM (SImode,
25603 gen_rtx_POST_INC (SImode,
25604 stack_pointer_rtx));
25605 set_mem_alias_set (addr, get_frame_alias_set ());
25607 if (i == PC_REGNUM)
25609 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25610 XVECEXP (insn, 0, 0) = ret_rtx;
25611 XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
25612 addr);
25613 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
25614 insn = emit_jump_insn (insn);
25616 else
25618 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
25619 addr));
25620 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25621 gen_rtx_REG (SImode, i),
25622 NULL_RTX);
25623 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25624 stack_pointer_rtx,
25625 stack_pointer_rtx);
25629 else
25631 if (TARGET_LDRD
25632 && current_tune->prefer_ldrd_strd
25633 && !optimize_function_for_size_p (cfun))
25635 if (TARGET_THUMB2)
25636 thumb2_emit_ldrd_pop (saved_regs_mask);
25637 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
25638 arm_emit_ldrd_pop (saved_regs_mask);
25639 else
25640 arm_emit_multi_reg_pop (saved_regs_mask);
25642 else
25643 arm_emit_multi_reg_pop (saved_regs_mask);
25646 if (return_in_pc)
25647 return;
25650 amount
25651 = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
25652 if (amount)
25654 int i, j;
25655 rtx dwarf = NULL_RTX;
25656 rtx_insn *tmp =
25657 emit_insn (gen_addsi3 (stack_pointer_rtx,
25658 stack_pointer_rtx,
25659 GEN_INT (amount)));
25661 RTX_FRAME_RELATED_P (tmp) = 1;
25663 if (cfun->machine->uses_anonymous_args)
25665 /* Restore pretend args. Refer arm_expand_prologue on how to save
25666 pretend_args in stack. */
25667 int num_regs = crtl->args.pretend_args_size / 4;
25668 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
25669 for (j = 0, i = 0; j < num_regs; i++)
25670 if (saved_regs_mask & (1 << i))
25672 rtx reg = gen_rtx_REG (SImode, i);
25673 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
25674 j++;
25676 REG_NOTES (tmp) = dwarf;
25678 arm_add_cfa_adjust_cfa_note (tmp, amount,
25679 stack_pointer_rtx, stack_pointer_rtx);
25682 /* Clear all caller-saved regs that are not used to return. */
25683 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25685 /* CMSE_ENTRY always returns. */
25686 gcc_assert (really_return);
25687 cmse_nonsecure_entry_clear_before_return ();
25690 if (!really_return)
25691 return;
25693 if (crtl->calls_eh_return)
25694 emit_insn (gen_addsi3 (stack_pointer_rtx,
25695 stack_pointer_rtx,
25696 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25698 if (IS_STACKALIGN (func_type))
25699 /* Restore the original stack pointer. Before prologue, the stack was
25700 realigned and the original stack pointer saved in r0. For details,
25701 see comment in arm_expand_prologue. */
25702 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25704 emit_jump_insn (simple_return_rtx);
25707 /* Implementation of insn prologue_thumb1_interwork. This is the first
25708 "instruction" of a function called in ARM mode. Swap to thumb mode. */
25710 const char *
25711 thumb1_output_interwork (void)
25713 const char * name;
25714 FILE *f = asm_out_file;
25716 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
25717 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
25718 == SYMBOL_REF);
25719 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25721 /* Generate code sequence to switch us into Thumb mode. */
25722 /* The .code 32 directive has already been emitted by
25723 ASM_DECLARE_FUNCTION_NAME. */
25724 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
25725 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
25727 /* Generate a label, so that the debugger will notice the
25728 change in instruction sets. This label is also used by
25729 the assembler to bypass the ARM code when this function
25730 is called from a Thumb encoded function elsewhere in the
25731 same file. Hence the definition of STUB_NAME here must
25732 agree with the definition in gas/config/tc-arm.c. */
25734 #define STUB_NAME ".real_start_of"
25736 fprintf (f, "\t.code\t16\n");
25737 #ifdef ARM_PE
25738 if (arm_dllexport_name_p (name))
25739 name = arm_strip_name_encoding (name);
25740 #endif
25741 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
25742 fprintf (f, "\t.thumb_func\n");
25743 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
25745 return "";
25748 /* Handle the case of a double word load into a low register from
25749 a computed memory address. The computed address may involve a
25750 register which is overwritten by the load. */
25751 const char *
25752 thumb_load_double_from_address (rtx *operands)
25754 rtx addr;
25755 rtx base;
25756 rtx offset;
25757 rtx arg1;
25758 rtx arg2;
25760 gcc_assert (REG_P (operands[0]));
25761 gcc_assert (MEM_P (operands[1]));
25763 /* Get the memory address. */
25764 addr = XEXP (operands[1], 0);
25766 /* Work out how the memory address is computed. */
25767 switch (GET_CODE (addr))
25769 case REG:
25770 operands[2] = adjust_address (operands[1], SImode, 4);
25772 if (REGNO (operands[0]) == REGNO (addr))
25774 output_asm_insn ("ldr\t%H0, %2", operands);
25775 output_asm_insn ("ldr\t%0, %1", operands);
25777 else
25779 output_asm_insn ("ldr\t%0, %1", operands);
25780 output_asm_insn ("ldr\t%H0, %2", operands);
25782 break;
25784 case CONST:
25785 /* Compute <address> + 4 for the high order load. */
25786 operands[2] = adjust_address (operands[1], SImode, 4);
25788 output_asm_insn ("ldr\t%0, %1", operands);
25789 output_asm_insn ("ldr\t%H0, %2", operands);
25790 break;
25792 case PLUS:
25793 arg1 = XEXP (addr, 0);
25794 arg2 = XEXP (addr, 1);
25796 if (CONSTANT_P (arg1))
25797 base = arg2, offset = arg1;
25798 else
25799 base = arg1, offset = arg2;
25801 gcc_assert (REG_P (base));
25803 /* Catch the case of <address> = <reg> + <reg> */
25804 if (REG_P (offset))
25806 int reg_offset = REGNO (offset);
25807 int reg_base = REGNO (base);
25808 int reg_dest = REGNO (operands[0]);
25810 /* Add the base and offset registers together into the
25811 higher destination register. */
25812 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
25813 reg_dest + 1, reg_base, reg_offset);
25815 /* Load the lower destination register from the address in
25816 the higher destination register. */
25817 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
25818 reg_dest, reg_dest + 1);
25820 /* Load the higher destination register from its own address
25821 plus 4. */
25822 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
25823 reg_dest + 1, reg_dest + 1);
25825 else
25827 /* Compute <address> + 4 for the high order load. */
25828 operands[2] = adjust_address (operands[1], SImode, 4);
25830 /* If the computed address is held in the low order register
25831 then load the high order register first, otherwise always
25832 load the low order register first. */
25833 if (REGNO (operands[0]) == REGNO (base))
25835 output_asm_insn ("ldr\t%H0, %2", operands);
25836 output_asm_insn ("ldr\t%0, %1", operands);
25838 else
25840 output_asm_insn ("ldr\t%0, %1", operands);
25841 output_asm_insn ("ldr\t%H0, %2", operands);
25844 break;
25846 case LABEL_REF:
25847 /* With no registers to worry about we can just load the value
25848 directly. */
25849 operands[2] = adjust_address (operands[1], SImode, 4);
25851 output_asm_insn ("ldr\t%H0, %2", operands);
25852 output_asm_insn ("ldr\t%0, %1", operands);
25853 break;
25855 default:
25856 gcc_unreachable ();
25859 return "";
25862 const char *
25863 thumb_output_move_mem_multiple (int n, rtx *operands)
25865 switch (n)
25867 case 2:
25868 if (REGNO (operands[4]) > REGNO (operands[5]))
25869 std::swap (operands[4], operands[5]);
25871 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
25872 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
25873 break;
25875 case 3:
25876 if (REGNO (operands[4]) > REGNO (operands[5]))
25877 std::swap (operands[4], operands[5]);
25878 if (REGNO (operands[5]) > REGNO (operands[6]))
25879 std::swap (operands[5], operands[6]);
25880 if (REGNO (operands[4]) > REGNO (operands[5]))
25881 std::swap (operands[4], operands[5]);
25883 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
25884 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
25885 break;
25887 default:
25888 gcc_unreachable ();
25891 return "";
25894 /* Output a call-via instruction for thumb state. */
25895 const char *
25896 thumb_call_via_reg (rtx reg)
25898 int regno = REGNO (reg);
25899 rtx *labelp;
25901 gcc_assert (regno < LR_REGNUM);
25903 /* If we are in the normal text section we can use a single instance
25904 per compilation unit. If we are doing function sections, then we need
25905 an entry per section, since we can't rely on reachability. */
25906 if (in_section == text_section)
25908 thumb_call_reg_needed = 1;
25910 if (thumb_call_via_label[regno] == NULL)
25911 thumb_call_via_label[regno] = gen_label_rtx ();
25912 labelp = thumb_call_via_label + regno;
25914 else
25916 if (cfun->machine->call_via[regno] == NULL)
25917 cfun->machine->call_via[regno] = gen_label_rtx ();
25918 labelp = cfun->machine->call_via + regno;
25921 output_asm_insn ("bl\t%a0", labelp);
25922 return "";
25925 /* Routines for generating rtl. */
25926 void
25927 thumb_expand_movmemqi (rtx *operands)
25929 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
25930 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
25931 HOST_WIDE_INT len = INTVAL (operands[2]);
25932 HOST_WIDE_INT offset = 0;
25934 while (len >= 12)
25936 emit_insn (gen_movmem12b (out, in, out, in));
25937 len -= 12;
25940 if (len >= 8)
25942 emit_insn (gen_movmem8b (out, in, out, in));
25943 len -= 8;
25946 if (len >= 4)
25948 rtx reg = gen_reg_rtx (SImode);
25949 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
25950 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
25951 len -= 4;
25952 offset += 4;
25955 if (len >= 2)
25957 rtx reg = gen_reg_rtx (HImode);
25958 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
25959 plus_constant (Pmode, in,
25960 offset))));
25961 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
25962 offset)),
25963 reg));
25964 len -= 2;
25965 offset += 2;
25968 if (len)
25970 rtx reg = gen_reg_rtx (QImode);
25971 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
25972 plus_constant (Pmode, in,
25973 offset))));
25974 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
25975 offset)),
25976 reg));
25980 void
25981 thumb_reload_out_hi (rtx *operands)
25983 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
25986 /* Return the length of a function name prefix
25987 that starts with the character 'c'. */
25988 static int
25989 arm_get_strip_length (int c)
25991 switch (c)
25993 ARM_NAME_ENCODING_LENGTHS
25994 default: return 0;
25998 /* Return a pointer to a function's name with any
25999 and all prefix encodings stripped from it. */
26000 const char *
26001 arm_strip_name_encoding (const char *name)
26003 int skip;
26005 while ((skip = arm_get_strip_length (* name)))
26006 name += skip;
26008 return name;
26011 /* If there is a '*' anywhere in the name's prefix, then
26012 emit the stripped name verbatim, otherwise prepend an
26013 underscore if leading underscores are being used. */
26014 void
26015 arm_asm_output_labelref (FILE *stream, const char *name)
26017 int skip;
26018 int verbatim = 0;
26020 while ((skip = arm_get_strip_length (* name)))
26022 verbatim |= (*name == '*');
26023 name += skip;
26026 if (verbatim)
26027 fputs (name, stream);
26028 else
26029 asm_fprintf (stream, "%U%s", name);
26032 /* This function is used to emit an EABI tag and its associated value.
26033 We emit the numerical value of the tag in case the assembler does not
26034 support textual tags. (Eg gas prior to 2.20). If requested we include
26035 the tag name in a comment so that anyone reading the assembler output
26036 will know which tag is being set.
26038 This function is not static because arm-c.c needs it too. */
26040 void
26041 arm_emit_eabi_attribute (const char *name, int num, int val)
26043 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
26044 if (flag_verbose_asm || flag_debug_asm)
26045 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
26046 asm_fprintf (asm_out_file, "\n");
26049 /* This function is used to print CPU tuning information as comment
26050 in assembler file. Pointers are not printed for now. */
26052 void
26053 arm_print_tune_info (void)
26055 asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune parameters\n");
26056 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "constant_limit:\t%d\n",
26057 current_tune->constant_limit);
26058 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26059 "max_insns_skipped:\t%d\n", current_tune->max_insns_skipped);
26060 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26061 "prefetch.num_slots:\t%d\n", current_tune->prefetch.num_slots);
26062 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26063 "prefetch.l1_cache_size:\t%d\n",
26064 current_tune->prefetch.l1_cache_size);
26065 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26066 "prefetch.l1_cache_line_size:\t%d\n",
26067 current_tune->prefetch.l1_cache_line_size);
26068 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26069 "prefer_constant_pool:\t%d\n",
26070 (int) current_tune->prefer_constant_pool);
26071 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26072 "branch_cost:\t(s:speed, p:predictable)\n");
26073 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\ts&p\tcost\n");
26074 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t00\t%d\n",
26075 current_tune->branch_cost (false, false));
26076 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t01\t%d\n",
26077 current_tune->branch_cost (false, true));
26078 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t10\t%d\n",
26079 current_tune->branch_cost (true, false));
26080 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t11\t%d\n",
26081 current_tune->branch_cost (true, true));
26082 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26083 "prefer_ldrd_strd:\t%d\n",
26084 (int) current_tune->prefer_ldrd_strd);
26085 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26086 "logical_op_non_short_circuit:\t[%d,%d]\n",
26087 (int) current_tune->logical_op_non_short_circuit_thumb,
26088 (int) current_tune->logical_op_non_short_circuit_arm);
26089 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26090 "prefer_neon_for_64bits:\t%d\n",
26091 (int) current_tune->prefer_neon_for_64bits);
26092 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26093 "disparage_flag_setting_t16_encodings:\t%d\n",
26094 (int) current_tune->disparage_flag_setting_t16_encodings);
26095 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26096 "string_ops_prefer_neon:\t%d\n",
26097 (int) current_tune->string_ops_prefer_neon);
26098 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26099 "max_insns_inline_memset:\t%d\n",
26100 current_tune->max_insns_inline_memset);
26101 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "fusible_ops:\t%u\n",
26102 current_tune->fusible_ops);
26103 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "sched_autopref:\t%d\n",
26104 (int) current_tune->sched_autopref);
26107 static void
26108 arm_file_start (void)
26110 int val;
26112 if (TARGET_BPABI)
26114 /* We don't have a specified CPU. Use the architecture to
26115 generate the tags.
26117 Note: it might be better to do this unconditionally, then the
26118 assembler would not need to know about all new CPU names as
26119 they are added. */
26120 if (!arm_active_target.core_name)
26122 /* armv7ve doesn't support any extensions. */
26123 if (strcmp (arm_active_target.arch_name, "armv7ve") == 0)
26125 /* Keep backward compatability for assemblers
26126 which don't support armv7ve. */
26127 asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
26128 asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
26129 asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
26130 asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
26131 asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
26133 else
26135 const char* pos = strchr (arm_active_target.arch_name, '+');
26136 if (pos)
26138 char buf[32];
26139 gcc_assert (strlen (arm_active_target.arch_name)
26140 <= sizeof (buf) / sizeof (*pos));
26141 strncpy (buf, arm_active_target.arch_name,
26142 (pos - arm_active_target.arch_name) * sizeof (*pos));
26143 buf[pos - arm_active_target.arch_name] = '\0';
26144 asm_fprintf (asm_out_file, "\t.arch %s\n", buf);
26145 asm_fprintf (asm_out_file, "\t.arch_extension %s\n", pos + 1);
26147 else
26148 asm_fprintf (asm_out_file, "\t.arch %s\n",
26149 arm_active_target.arch_name);
26152 else if (strncmp (arm_active_target.core_name, "generic", 7) == 0)
26153 asm_fprintf (asm_out_file, "\t.arch %s\n",
26154 arm_active_target.core_name + 8);
26155 else
26157 const char* truncated_name
26158 = arm_rewrite_selected_cpu (arm_active_target.core_name);
26159 asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
26162 if (print_tune_info)
26163 arm_print_tune_info ();
26165 if (! TARGET_SOFT_FLOAT)
26167 if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
26168 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
26170 if (TARGET_HARD_FLOAT_ABI)
26171 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
26174 /* Some of these attributes only apply when the corresponding features
26175 are used. However we don't have any easy way of figuring this out.
26176 Conservatively record the setting that would have been used. */
26178 if (flag_rounding_math)
26179 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
26181 if (!flag_unsafe_math_optimizations)
26183 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
26184 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
26186 if (flag_signaling_nans)
26187 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
26189 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
26190 flag_finite_math_only ? 1 : 3);
26192 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
26193 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
26194 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
26195 flag_short_enums ? 1 : 2);
26197 /* Tag_ABI_optimization_goals. */
26198 if (optimize_size)
26199 val = 4;
26200 else if (optimize >= 2)
26201 val = 2;
26202 else if (optimize)
26203 val = 1;
26204 else
26205 val = 6;
26206 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
26208 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
26209 unaligned_access);
26211 if (arm_fp16_format)
26212 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
26213 (int) arm_fp16_format);
26215 if (arm_lang_output_object_attributes_hook)
26216 arm_lang_output_object_attributes_hook();
26219 default_file_start ();
26222 static void
26223 arm_file_end (void)
26225 int regno;
26227 if (NEED_INDICATE_EXEC_STACK)
26228 /* Add .note.GNU-stack. */
26229 file_end_indicate_exec_stack ();
26231 if (! thumb_call_reg_needed)
26232 return;
26234 switch_to_section (text_section);
26235 asm_fprintf (asm_out_file, "\t.code 16\n");
26236 ASM_OUTPUT_ALIGN (asm_out_file, 1);
26238 for (regno = 0; regno < LR_REGNUM; regno++)
26240 rtx label = thumb_call_via_label[regno];
26242 if (label != 0)
26244 targetm.asm_out.internal_label (asm_out_file, "L",
26245 CODE_LABEL_NUMBER (label));
26246 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
26251 #ifndef ARM_PE
26252 /* Symbols in the text segment can be accessed without indirecting via the
26253 constant pool; it may take an extra binary operation, but this is still
26254 faster than indirecting via memory. Don't do this when not optimizing,
26255 since we won't be calculating al of the offsets necessary to do this
26256 simplification. */
26258 static void
26259 arm_encode_section_info (tree decl, rtx rtl, int first)
26261 if (optimize > 0 && TREE_CONSTANT (decl))
26262 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
26264 default_encode_section_info (decl, rtl, first);
26266 #endif /* !ARM_PE */
26268 static void
26269 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
26271 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
26272 && !strcmp (prefix, "L"))
26274 arm_ccfsm_state = 0;
26275 arm_target_insn = NULL;
26277 default_internal_label (stream, prefix, labelno);
26280 /* Output code to add DELTA to the first argument, and then jump
26281 to FUNCTION. Used for C++ multiple inheritance. */
26283 static void
26284 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26285 HOST_WIDE_INT, tree function)
26287 static int thunk_label = 0;
26288 char label[256];
26289 char labelpc[256];
26290 int mi_delta = delta;
26291 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
26292 int shift = 0;
26293 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
26294 ? 1 : 0);
26295 if (mi_delta < 0)
26296 mi_delta = - mi_delta;
26298 final_start_function (emit_barrier (), file, 1);
26300 if (TARGET_THUMB1)
26302 int labelno = thunk_label++;
26303 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
26304 /* Thunks are entered in arm mode when available. */
26305 if (TARGET_THUMB1_ONLY)
26307 /* push r3 so we can use it as a temporary. */
26308 /* TODO: Omit this save if r3 is not used. */
26309 fputs ("\tpush {r3}\n", file);
26310 fputs ("\tldr\tr3, ", file);
26312 else
26314 fputs ("\tldr\tr12, ", file);
26316 assemble_name (file, label);
26317 fputc ('\n', file);
26318 if (flag_pic)
26320 /* If we are generating PIC, the ldr instruction below loads
26321 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
26322 the address of the add + 8, so we have:
26324 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
26325 = target + 1.
26327 Note that we have "+ 1" because some versions of GNU ld
26328 don't set the low bit of the result for R_ARM_REL32
26329 relocations against thumb function symbols.
26330 On ARMv6M this is +4, not +8. */
26331 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
26332 assemble_name (file, labelpc);
26333 fputs (":\n", file);
26334 if (TARGET_THUMB1_ONLY)
26336 /* This is 2 insns after the start of the thunk, so we know it
26337 is 4-byte aligned. */
26338 fputs ("\tadd\tr3, pc, r3\n", file);
26339 fputs ("\tmov r12, r3\n", file);
26341 else
26342 fputs ("\tadd\tr12, pc, r12\n", file);
26344 else if (TARGET_THUMB1_ONLY)
26345 fputs ("\tmov r12, r3\n", file);
26347 if (TARGET_THUMB1_ONLY)
26349 if (mi_delta > 255)
26351 fputs ("\tldr\tr3, ", file);
26352 assemble_name (file, label);
26353 fputs ("+4\n", file);
26354 asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
26355 mi_op, this_regno, this_regno);
26357 else if (mi_delta != 0)
26359 /* Thumb1 unified syntax requires s suffix in instruction name when
26360 one of the operands is immediate. */
26361 asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
26362 mi_op, this_regno, this_regno,
26363 mi_delta);
26366 else
26368 /* TODO: Use movw/movt for large constants when available. */
26369 while (mi_delta != 0)
26371 if ((mi_delta & (3 << shift)) == 0)
26372 shift += 2;
26373 else
26375 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
26376 mi_op, this_regno, this_regno,
26377 mi_delta & (0xff << shift));
26378 mi_delta &= ~(0xff << shift);
26379 shift += 8;
26383 if (TARGET_THUMB1)
26385 if (TARGET_THUMB1_ONLY)
26386 fputs ("\tpop\t{r3}\n", file);
26388 fprintf (file, "\tbx\tr12\n");
26389 ASM_OUTPUT_ALIGN (file, 2);
26390 assemble_name (file, label);
26391 fputs (":\n", file);
26392 if (flag_pic)
26394 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
26395 rtx tem = XEXP (DECL_RTL (function), 0);
26396 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
26397 pipeline offset is four rather than eight. Adjust the offset
26398 accordingly. */
26399 tem = plus_constant (GET_MODE (tem), tem,
26400 TARGET_THUMB1_ONLY ? -3 : -7);
26401 tem = gen_rtx_MINUS (GET_MODE (tem),
26402 tem,
26403 gen_rtx_SYMBOL_REF (Pmode,
26404 ggc_strdup (labelpc)));
26405 assemble_integer (tem, 4, BITS_PER_WORD, 1);
26407 else
26408 /* Output ".word .LTHUNKn". */
26409 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
26411 if (TARGET_THUMB1_ONLY && mi_delta > 255)
26412 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
26414 else
26416 fputs ("\tb\t", file);
26417 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
26418 if (NEED_PLT_RELOC)
26419 fputs ("(PLT)", file);
26420 fputc ('\n', file);
26423 final_end_function ();
26426 /* MI thunk handling for TARGET_32BIT. */
26428 static void
26429 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26430 HOST_WIDE_INT vcall_offset, tree function)
26432 /* On ARM, this_regno is R0 or R1 depending on
26433 whether the function returns an aggregate or not.
26435 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
26436 function)
26437 ? R1_REGNUM : R0_REGNUM);
26439 rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
26440 rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
26441 reload_completed = 1;
26442 emit_note (NOTE_INSN_PROLOGUE_END);
26444 /* Add DELTA to THIS_RTX. */
26445 if (delta != 0)
26446 arm_split_constant (PLUS, Pmode, NULL_RTX,
26447 delta, this_rtx, this_rtx, false);
26449 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
26450 if (vcall_offset != 0)
26452 /* Load *THIS_RTX. */
26453 emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
26454 /* Compute *THIS_RTX + VCALL_OFFSET. */
26455 arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
26456 false);
26457 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
26458 emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
26459 emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
26462 /* Generate a tail call to the target function. */
26463 if (!TREE_USED (function))
26465 assemble_external (function);
26466 TREE_USED (function) = 1;
26468 rtx funexp = XEXP (DECL_RTL (function), 0);
26469 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
26470 rtx_insn * insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
26471 SIBLING_CALL_P (insn) = 1;
26473 insn = get_insns ();
26474 shorten_branches (insn);
26475 final_start_function (insn, file, 1);
26476 final (insn, file, 1);
26477 final_end_function ();
26479 /* Stop pretending this is a post-reload pass. */
26480 reload_completed = 0;
26483 /* Output code to add DELTA to the first argument, and then jump
26484 to FUNCTION. Used for C++ multiple inheritance. */
26486 static void
26487 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
26488 HOST_WIDE_INT vcall_offset, tree function)
26490 if (TARGET_32BIT)
26491 arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
26492 else
26493 arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
26497 arm_emit_vector_const (FILE *file, rtx x)
26499 int i;
26500 const char * pattern;
26502 gcc_assert (GET_CODE (x) == CONST_VECTOR);
26504 switch (GET_MODE (x))
26506 case V2SImode: pattern = "%08x"; break;
26507 case V4HImode: pattern = "%04x"; break;
26508 case V8QImode: pattern = "%02x"; break;
26509 default: gcc_unreachable ();
26512 fprintf (file, "0x");
26513 for (i = CONST_VECTOR_NUNITS (x); i--;)
26515 rtx element;
26517 element = CONST_VECTOR_ELT (x, i);
26518 fprintf (file, pattern, INTVAL (element));
26521 return 1;
26524 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
26525 HFmode constant pool entries are actually loaded with ldr. */
26526 void
26527 arm_emit_fp16_const (rtx c)
26529 long bits;
26531 bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
26532 if (WORDS_BIG_ENDIAN)
26533 assemble_zeros (2);
26534 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
26535 if (!WORDS_BIG_ENDIAN)
26536 assemble_zeros (2);
26539 const char *
26540 arm_output_load_gr (rtx *operands)
26542 rtx reg;
26543 rtx offset;
26544 rtx wcgr;
26545 rtx sum;
26547 if (!MEM_P (operands [1])
26548 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
26549 || !REG_P (reg = XEXP (sum, 0))
26550 || !CONST_INT_P (offset = XEXP (sum, 1))
26551 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
26552 return "wldrw%?\t%0, %1";
26554 /* Fix up an out-of-range load of a GR register. */
26555 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
26556 wcgr = operands[0];
26557 operands[0] = reg;
26558 output_asm_insn ("ldr%?\t%0, %1", operands);
26560 operands[0] = wcgr;
26561 operands[1] = reg;
26562 output_asm_insn ("tmcr%?\t%0, %1", operands);
26563 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
26565 return "";
26568 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
26570 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
26571 named arg and all anonymous args onto the stack.
26572 XXX I know the prologue shouldn't be pushing registers, but it is faster
26573 that way. */
26575 static void
26576 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
26577 machine_mode mode,
26578 tree type,
26579 int *pretend_size,
26580 int second_time ATTRIBUTE_UNUSED)
26582 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
26583 int nregs;
26585 cfun->machine->uses_anonymous_args = 1;
26586 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
26588 nregs = pcum->aapcs_ncrn;
26589 if (nregs & 1)
26591 int res = arm_needs_doubleword_align (mode, type);
26592 if (res < 0 && warn_psabi)
26593 inform (input_location, "parameter passing for argument of "
26594 "type %qT changed in GCC 7.1", type);
26595 else if (res > 0)
26596 nregs++;
26599 else
26600 nregs = pcum->nregs;
26602 if (nregs < NUM_ARG_REGS)
26603 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
26606 /* We can't rely on the caller doing the proper promotion when
26607 using APCS or ATPCS. */
26609 static bool
26610 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
26612 return !TARGET_AAPCS_BASED;
26615 static machine_mode
26616 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
26617 machine_mode mode,
26618 int *punsignedp ATTRIBUTE_UNUSED,
26619 const_tree fntype ATTRIBUTE_UNUSED,
26620 int for_return ATTRIBUTE_UNUSED)
26622 if (GET_MODE_CLASS (mode) == MODE_INT
26623 && GET_MODE_SIZE (mode) < 4)
26624 return SImode;
26626 return mode;
26630 static bool
26631 arm_default_short_enums (void)
26633 return ARM_DEFAULT_SHORT_ENUMS;
26637 /* AAPCS requires that anonymous bitfields affect structure alignment. */
26639 static bool
26640 arm_align_anon_bitfield (void)
26642 return TARGET_AAPCS_BASED;
26646 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
26648 static tree
26649 arm_cxx_guard_type (void)
26651 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
26655 /* The EABI says test the least significant bit of a guard variable. */
26657 static bool
26658 arm_cxx_guard_mask_bit (void)
26660 return TARGET_AAPCS_BASED;
26664 /* The EABI specifies that all array cookies are 8 bytes long. */
26666 static tree
26667 arm_get_cookie_size (tree type)
26669 tree size;
26671 if (!TARGET_AAPCS_BASED)
26672 return default_cxx_get_cookie_size (type);
26674 size = build_int_cst (sizetype, 8);
26675 return size;
26679 /* The EABI says that array cookies should also contain the element size. */
26681 static bool
26682 arm_cookie_has_size (void)
26684 return TARGET_AAPCS_BASED;
26688 /* The EABI says constructors and destructors should return a pointer to
26689 the object constructed/destroyed. */
26691 static bool
26692 arm_cxx_cdtor_returns_this (void)
26694 return TARGET_AAPCS_BASED;
26697 /* The EABI says that an inline function may never be the key
26698 method. */
26700 static bool
26701 arm_cxx_key_method_may_be_inline (void)
26703 return !TARGET_AAPCS_BASED;
26706 static void
26707 arm_cxx_determine_class_data_visibility (tree decl)
26709 if (!TARGET_AAPCS_BASED
26710 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
26711 return;
26713 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
26714 is exported. However, on systems without dynamic vague linkage,
26715 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
26716 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
26717 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
26718 else
26719 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
26720 DECL_VISIBILITY_SPECIFIED (decl) = 1;
26723 static bool
26724 arm_cxx_class_data_always_comdat (void)
26726 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26727 vague linkage if the class has no key function. */
26728 return !TARGET_AAPCS_BASED;
26732 /* The EABI says __aeabi_atexit should be used to register static
26733 destructors. */
26735 static bool
26736 arm_cxx_use_aeabi_atexit (void)
26738 return TARGET_AAPCS_BASED;
26742 void
26743 arm_set_return_address (rtx source, rtx scratch)
26745 arm_stack_offsets *offsets;
26746 HOST_WIDE_INT delta;
26747 rtx addr;
26748 unsigned long saved_regs;
26750 offsets = arm_get_frame_offsets ();
26751 saved_regs = offsets->saved_regs_mask;
26753 if ((saved_regs & (1 << LR_REGNUM)) == 0)
26754 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26755 else
26757 if (frame_pointer_needed)
26758 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
26759 else
26761 /* LR will be the first saved register. */
26762 delta = offsets->outgoing_args - (offsets->frame + 4);
26765 if (delta >= 4096)
26767 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
26768 GEN_INT (delta & ~4095)));
26769 addr = scratch;
26770 delta &= 4095;
26772 else
26773 addr = stack_pointer_rtx;
26775 addr = plus_constant (Pmode, addr, delta);
26777 /* The store needs to be marked as frame related in order to prevent
26778 DSE from deleting it as dead if it is based on fp. */
26779 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26780 RTX_FRAME_RELATED_P (insn) = 1;
26781 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26786 void
26787 thumb_set_return_address (rtx source, rtx scratch)
26789 arm_stack_offsets *offsets;
26790 HOST_WIDE_INT delta;
26791 HOST_WIDE_INT limit;
26792 int reg;
26793 rtx addr;
26794 unsigned long mask;
26796 emit_use (source);
26798 offsets = arm_get_frame_offsets ();
26799 mask = offsets->saved_regs_mask;
26800 if (mask & (1 << LR_REGNUM))
26802 limit = 1024;
26803 /* Find the saved regs. */
26804 if (frame_pointer_needed)
26806 delta = offsets->soft_frame - offsets->saved_args;
26807 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
26808 if (TARGET_THUMB1)
26809 limit = 128;
26811 else
26813 delta = offsets->outgoing_args - offsets->saved_args;
26814 reg = SP_REGNUM;
26816 /* Allow for the stack frame. */
26817 if (TARGET_THUMB1 && TARGET_BACKTRACE)
26818 delta -= 16;
26819 /* The link register is always the first saved register. */
26820 delta -= 4;
26822 /* Construct the address. */
26823 addr = gen_rtx_REG (SImode, reg);
26824 if (delta > limit)
26826 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
26827 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
26828 addr = scratch;
26830 else
26831 addr = plus_constant (Pmode, addr, delta);
26833 /* The store needs to be marked as frame related in order to prevent
26834 DSE from deleting it as dead if it is based on fp. */
26835 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26836 RTX_FRAME_RELATED_P (insn) = 1;
26837 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26839 else
26840 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26843 /* Implements target hook vector_mode_supported_p. */
26844 bool
26845 arm_vector_mode_supported_p (machine_mode mode)
26847 /* Neon also supports V2SImode, etc. listed in the clause below. */
26848 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
26849 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
26850 || mode == V2DImode || mode == V8HFmode))
26851 return true;
26853 if ((TARGET_NEON || TARGET_IWMMXT)
26854 && ((mode == V2SImode)
26855 || (mode == V4HImode)
26856 || (mode == V8QImode)))
26857 return true;
26859 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
26860 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
26861 || mode == V2HAmode))
26862 return true;
26864 return false;
26867 /* Implements target hook array_mode_supported_p. */
26869 static bool
26870 arm_array_mode_supported_p (machine_mode mode,
26871 unsigned HOST_WIDE_INT nelems)
26873 if (TARGET_NEON
26874 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
26875 && (nelems >= 2 && nelems <= 4))
26876 return true;
26878 return false;
26881 /* Use the option -mvectorize-with-neon-double to override the use of quardword
26882 registers when autovectorizing for Neon, at least until multiple vector
26883 widths are supported properly by the middle-end. */
26885 static machine_mode
26886 arm_preferred_simd_mode (machine_mode mode)
26888 if (TARGET_NEON)
26889 switch (mode)
26891 case SFmode:
26892 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
26893 case SImode:
26894 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
26895 case HImode:
26896 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
26897 case QImode:
26898 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
26899 case DImode:
26900 if (!TARGET_NEON_VECTORIZE_DOUBLE)
26901 return V2DImode;
26902 break;
26904 default:;
26907 if (TARGET_REALLY_IWMMXT)
26908 switch (mode)
26910 case SImode:
26911 return V2SImode;
26912 case HImode:
26913 return V4HImode;
26914 case QImode:
26915 return V8QImode;
26917 default:;
26920 return word_mode;
26923 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
26925 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
26926 using r0-r4 for function arguments, r7 for the stack frame and don't have
26927 enough left over to do doubleword arithmetic. For Thumb-2 all the
26928 potentially problematic instructions accept high registers so this is not
26929 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
26930 that require many low registers. */
26931 static bool
26932 arm_class_likely_spilled_p (reg_class_t rclass)
26934 if ((TARGET_THUMB1 && rclass == LO_REGS)
26935 || rclass == CC_REG)
26936 return true;
26938 return false;
26941 /* Implements target hook small_register_classes_for_mode_p. */
26942 bool
26943 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
26945 return TARGET_THUMB1;
26948 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
26949 ARM insns and therefore guarantee that the shift count is modulo 256.
26950 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
26951 guarantee no particular behavior for out-of-range counts. */
26953 static unsigned HOST_WIDE_INT
26954 arm_shift_truncation_mask (machine_mode mode)
26956 return mode == SImode ? 255 : 0;
26960 /* Map internal gcc register numbers to DWARF2 register numbers. */
26962 unsigned int
26963 arm_dbx_register_number (unsigned int regno)
26965 if (regno < 16)
26966 return regno;
26968 if (IS_VFP_REGNUM (regno))
26970 /* See comment in arm_dwarf_register_span. */
26971 if (VFP_REGNO_OK_FOR_SINGLE (regno))
26972 return 64 + regno - FIRST_VFP_REGNUM;
26973 else
26974 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
26977 if (IS_IWMMXT_GR_REGNUM (regno))
26978 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
26980 if (IS_IWMMXT_REGNUM (regno))
26981 return 112 + regno - FIRST_IWMMXT_REGNUM;
26983 return DWARF_FRAME_REGISTERS;
26986 /* Dwarf models VFPv3 registers as 32 64-bit registers.
26987 GCC models tham as 64 32-bit registers, so we need to describe this to
26988 the DWARF generation code. Other registers can use the default. */
26989 static rtx
26990 arm_dwarf_register_span (rtx rtl)
26992 machine_mode mode;
26993 unsigned regno;
26994 rtx parts[16];
26995 int nregs;
26996 int i;
26998 regno = REGNO (rtl);
26999 if (!IS_VFP_REGNUM (regno))
27000 return NULL_RTX;
27002 /* XXX FIXME: The EABI defines two VFP register ranges:
27003 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
27004 256-287: D0-D31
27005 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
27006 corresponding D register. Until GDB supports this, we shall use the
27007 legacy encodings. We also use these encodings for D0-D15 for
27008 compatibility with older debuggers. */
27009 mode = GET_MODE (rtl);
27010 if (GET_MODE_SIZE (mode) < 8)
27011 return NULL_RTX;
27013 if (VFP_REGNO_OK_FOR_SINGLE (regno))
27015 nregs = GET_MODE_SIZE (mode) / 4;
27016 for (i = 0; i < nregs; i += 2)
27017 if (TARGET_BIG_END)
27019 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
27020 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
27022 else
27024 parts[i] = gen_rtx_REG (SImode, regno + i);
27025 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
27028 else
27030 nregs = GET_MODE_SIZE (mode) / 8;
27031 for (i = 0; i < nregs; i++)
27032 parts[i] = gen_rtx_REG (DImode, regno + i);
27035 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
27038 #if ARM_UNWIND_INFO
27039 /* Emit unwind directives for a store-multiple instruction or stack pointer
27040 push during alignment.
27041 These should only ever be generated by the function prologue code, so
27042 expect them to have a particular form.
27043 The store-multiple instruction sometimes pushes pc as the last register,
27044 although it should not be tracked into unwind information, or for -Os
27045 sometimes pushes some dummy registers before first register that needs
27046 to be tracked in unwind information; such dummy registers are there just
27047 to avoid separate stack adjustment, and will not be restored in the
27048 epilogue. */
27050 static void
27051 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
27053 int i;
27054 HOST_WIDE_INT offset;
27055 HOST_WIDE_INT nregs;
27056 int reg_size;
27057 unsigned reg;
27058 unsigned lastreg;
27059 unsigned padfirst = 0, padlast = 0;
27060 rtx e;
27062 e = XVECEXP (p, 0, 0);
27063 gcc_assert (GET_CODE (e) == SET);
27065 /* First insn will adjust the stack pointer. */
27066 gcc_assert (GET_CODE (e) == SET
27067 && REG_P (SET_DEST (e))
27068 && REGNO (SET_DEST (e)) == SP_REGNUM
27069 && GET_CODE (SET_SRC (e)) == PLUS);
27071 offset = -INTVAL (XEXP (SET_SRC (e), 1));
27072 nregs = XVECLEN (p, 0) - 1;
27073 gcc_assert (nregs);
27075 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
27076 if (reg < 16)
27078 /* For -Os dummy registers can be pushed at the beginning to
27079 avoid separate stack pointer adjustment. */
27080 e = XVECEXP (p, 0, 1);
27081 e = XEXP (SET_DEST (e), 0);
27082 if (GET_CODE (e) == PLUS)
27083 padfirst = INTVAL (XEXP (e, 1));
27084 gcc_assert (padfirst == 0 || optimize_size);
27085 /* The function prologue may also push pc, but not annotate it as it is
27086 never restored. We turn this into a stack pointer adjustment. */
27087 e = XVECEXP (p, 0, nregs);
27088 e = XEXP (SET_DEST (e), 0);
27089 if (GET_CODE (e) == PLUS)
27090 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
27091 else
27092 padlast = offset - 4;
27093 gcc_assert (padlast == 0 || padlast == 4);
27094 if (padlast == 4)
27095 fprintf (asm_out_file, "\t.pad #4\n");
27096 reg_size = 4;
27097 fprintf (asm_out_file, "\t.save {");
27099 else if (IS_VFP_REGNUM (reg))
27101 reg_size = 8;
27102 fprintf (asm_out_file, "\t.vsave {");
27104 else
27105 /* Unknown register type. */
27106 gcc_unreachable ();
27108 /* If the stack increment doesn't match the size of the saved registers,
27109 something has gone horribly wrong. */
27110 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
27112 offset = padfirst;
27113 lastreg = 0;
27114 /* The remaining insns will describe the stores. */
27115 for (i = 1; i <= nregs; i++)
27117 /* Expect (set (mem <addr>) (reg)).
27118 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
27119 e = XVECEXP (p, 0, i);
27120 gcc_assert (GET_CODE (e) == SET
27121 && MEM_P (SET_DEST (e))
27122 && REG_P (SET_SRC (e)));
27124 reg = REGNO (SET_SRC (e));
27125 gcc_assert (reg >= lastreg);
27127 if (i != 1)
27128 fprintf (asm_out_file, ", ");
27129 /* We can't use %r for vfp because we need to use the
27130 double precision register names. */
27131 if (IS_VFP_REGNUM (reg))
27132 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
27133 else
27134 asm_fprintf (asm_out_file, "%r", reg);
27136 if (flag_checking)
27138 /* Check that the addresses are consecutive. */
27139 e = XEXP (SET_DEST (e), 0);
27140 if (GET_CODE (e) == PLUS)
27141 gcc_assert (REG_P (XEXP (e, 0))
27142 && REGNO (XEXP (e, 0)) == SP_REGNUM
27143 && CONST_INT_P (XEXP (e, 1))
27144 && offset == INTVAL (XEXP (e, 1)));
27145 else
27146 gcc_assert (i == 1
27147 && REG_P (e)
27148 && REGNO (e) == SP_REGNUM);
27149 offset += reg_size;
27152 fprintf (asm_out_file, "}\n");
27153 if (padfirst)
27154 fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
27157 /* Emit unwind directives for a SET. */
27159 static void
27160 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
27162 rtx e0;
27163 rtx e1;
27164 unsigned reg;
27166 e0 = XEXP (p, 0);
27167 e1 = XEXP (p, 1);
27168 switch (GET_CODE (e0))
27170 case MEM:
27171 /* Pushing a single register. */
27172 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
27173 || !REG_P (XEXP (XEXP (e0, 0), 0))
27174 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
27175 abort ();
27177 asm_fprintf (asm_out_file, "\t.save ");
27178 if (IS_VFP_REGNUM (REGNO (e1)))
27179 asm_fprintf(asm_out_file, "{d%d}\n",
27180 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
27181 else
27182 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
27183 break;
27185 case REG:
27186 if (REGNO (e0) == SP_REGNUM)
27188 /* A stack increment. */
27189 if (GET_CODE (e1) != PLUS
27190 || !REG_P (XEXP (e1, 0))
27191 || REGNO (XEXP (e1, 0)) != SP_REGNUM
27192 || !CONST_INT_P (XEXP (e1, 1)))
27193 abort ();
27195 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
27196 -INTVAL (XEXP (e1, 1)));
27198 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
27200 HOST_WIDE_INT offset;
27202 if (GET_CODE (e1) == PLUS)
27204 if (!REG_P (XEXP (e1, 0))
27205 || !CONST_INT_P (XEXP (e1, 1)))
27206 abort ();
27207 reg = REGNO (XEXP (e1, 0));
27208 offset = INTVAL (XEXP (e1, 1));
27209 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
27210 HARD_FRAME_POINTER_REGNUM, reg,
27211 offset);
27213 else if (REG_P (e1))
27215 reg = REGNO (e1);
27216 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
27217 HARD_FRAME_POINTER_REGNUM, reg);
27219 else
27220 abort ();
27222 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
27224 /* Move from sp to reg. */
27225 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
27227 else if (GET_CODE (e1) == PLUS
27228 && REG_P (XEXP (e1, 0))
27229 && REGNO (XEXP (e1, 0)) == SP_REGNUM
27230 && CONST_INT_P (XEXP (e1, 1)))
27232 /* Set reg to offset from sp. */
27233 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
27234 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
27236 else
27237 abort ();
27238 break;
27240 default:
27241 abort ();
27246 /* Emit unwind directives for the given insn. */
27248 static void
27249 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
27251 rtx note, pat;
27252 bool handled_one = false;
27254 if (arm_except_unwind_info (&global_options) != UI_TARGET)
27255 return;
27257 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27258 && (TREE_NOTHROW (current_function_decl)
27259 || crtl->all_throwers_are_sibcalls))
27260 return;
27262 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
27263 return;
27265 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
27267 switch (REG_NOTE_KIND (note))
27269 case REG_FRAME_RELATED_EXPR:
27270 pat = XEXP (note, 0);
27271 goto found;
27273 case REG_CFA_REGISTER:
27274 pat = XEXP (note, 0);
27275 if (pat == NULL)
27277 pat = PATTERN (insn);
27278 if (GET_CODE (pat) == PARALLEL)
27279 pat = XVECEXP (pat, 0, 0);
27282 /* Only emitted for IS_STACKALIGN re-alignment. */
27284 rtx dest, src;
27285 unsigned reg;
27287 src = SET_SRC (pat);
27288 dest = SET_DEST (pat);
27290 gcc_assert (src == stack_pointer_rtx);
27291 reg = REGNO (dest);
27292 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
27293 reg + 0x90, reg);
27295 handled_one = true;
27296 break;
27298 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
27299 to get correct dwarf information for shrink-wrap. We should not
27300 emit unwind information for it because these are used either for
27301 pretend arguments or notes to adjust sp and restore registers from
27302 stack. */
27303 case REG_CFA_DEF_CFA:
27304 case REG_CFA_ADJUST_CFA:
27305 case REG_CFA_RESTORE:
27306 return;
27308 case REG_CFA_EXPRESSION:
27309 case REG_CFA_OFFSET:
27310 /* ??? Only handling here what we actually emit. */
27311 gcc_unreachable ();
27313 default:
27314 break;
27317 if (handled_one)
27318 return;
27319 pat = PATTERN (insn);
27320 found:
27322 switch (GET_CODE (pat))
27324 case SET:
27325 arm_unwind_emit_set (asm_out_file, pat);
27326 break;
27328 case SEQUENCE:
27329 /* Store multiple. */
27330 arm_unwind_emit_sequence (asm_out_file, pat);
27331 break;
27333 default:
27334 abort();
27339 /* Output a reference from a function exception table to the type_info
27340 object X. The EABI specifies that the symbol should be relocated by
27341 an R_ARM_TARGET2 relocation. */
27343 static bool
27344 arm_output_ttype (rtx x)
27346 fputs ("\t.word\t", asm_out_file);
27347 output_addr_const (asm_out_file, x);
27348 /* Use special relocations for symbol references. */
27349 if (!CONST_INT_P (x))
27350 fputs ("(TARGET2)", asm_out_file);
27351 fputc ('\n', asm_out_file);
27353 return TRUE;
27356 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
27358 static void
27359 arm_asm_emit_except_personality (rtx personality)
27361 fputs ("\t.personality\t", asm_out_file);
27362 output_addr_const (asm_out_file, personality);
27363 fputc ('\n', asm_out_file);
27365 #endif /* ARM_UNWIND_INFO */
27367 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
27369 static void
27370 arm_asm_init_sections (void)
27372 #if ARM_UNWIND_INFO
27373 exception_section = get_unnamed_section (0, output_section_asm_op,
27374 "\t.handlerdata");
27375 #endif /* ARM_UNWIND_INFO */
27377 #ifdef OBJECT_FORMAT_ELF
27378 if (target_pure_code)
27379 text_section->unnamed.data = "\t.section .text,\"0x20000006\",%progbits";
27380 #endif
27383 /* Output unwind directives for the start/end of a function. */
27385 void
27386 arm_output_fn_unwind (FILE * f, bool prologue)
27388 if (arm_except_unwind_info (&global_options) != UI_TARGET)
27389 return;
27391 if (prologue)
27392 fputs ("\t.fnstart\n", f);
27393 else
27395 /* If this function will never be unwound, then mark it as such.
27396 The came condition is used in arm_unwind_emit to suppress
27397 the frame annotations. */
27398 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27399 && (TREE_NOTHROW (current_function_decl)
27400 || crtl->all_throwers_are_sibcalls))
27401 fputs("\t.cantunwind\n", f);
27403 fputs ("\t.fnend\n", f);
27407 static bool
27408 arm_emit_tls_decoration (FILE *fp, rtx x)
27410 enum tls_reloc reloc;
27411 rtx val;
27413 val = XVECEXP (x, 0, 0);
27414 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
27416 output_addr_const (fp, val);
27418 switch (reloc)
27420 case TLS_GD32:
27421 fputs ("(tlsgd)", fp);
27422 break;
27423 case TLS_LDM32:
27424 fputs ("(tlsldm)", fp);
27425 break;
27426 case TLS_LDO32:
27427 fputs ("(tlsldo)", fp);
27428 break;
27429 case TLS_IE32:
27430 fputs ("(gottpoff)", fp);
27431 break;
27432 case TLS_LE32:
27433 fputs ("(tpoff)", fp);
27434 break;
27435 case TLS_DESCSEQ:
27436 fputs ("(tlsdesc)", fp);
27437 break;
27438 default:
27439 gcc_unreachable ();
27442 switch (reloc)
27444 case TLS_GD32:
27445 case TLS_LDM32:
27446 case TLS_IE32:
27447 case TLS_DESCSEQ:
27448 fputs (" + (. - ", fp);
27449 output_addr_const (fp, XVECEXP (x, 0, 2));
27450 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
27451 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
27452 output_addr_const (fp, XVECEXP (x, 0, 3));
27453 fputc (')', fp);
27454 break;
27455 default:
27456 break;
27459 return TRUE;
27462 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
27464 static void
27465 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
27467 gcc_assert (size == 4);
27468 fputs ("\t.word\t", file);
27469 output_addr_const (file, x);
27470 fputs ("(tlsldo)", file);
27473 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
27475 static bool
27476 arm_output_addr_const_extra (FILE *fp, rtx x)
27478 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
27479 return arm_emit_tls_decoration (fp, x);
27480 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
27482 char label[256];
27483 int labelno = INTVAL (XVECEXP (x, 0, 0));
27485 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
27486 assemble_name_raw (fp, label);
27488 return TRUE;
27490 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
27492 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
27493 if (GOT_PCREL)
27494 fputs ("+.", fp);
27495 fputs ("-(", fp);
27496 output_addr_const (fp, XVECEXP (x, 0, 0));
27497 fputc (')', fp);
27498 return TRUE;
27500 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
27502 output_addr_const (fp, XVECEXP (x, 0, 0));
27503 if (GOT_PCREL)
27504 fputs ("+.", fp);
27505 fputs ("-(", fp);
27506 output_addr_const (fp, XVECEXP (x, 0, 1));
27507 fputc (')', fp);
27508 return TRUE;
27510 else if (GET_CODE (x) == CONST_VECTOR)
27511 return arm_emit_vector_const (fp, x);
27513 return FALSE;
27516 /* Output assembly for a shift instruction.
27517 SET_FLAGS determines how the instruction modifies the condition codes.
27518 0 - Do not set condition codes.
27519 1 - Set condition codes.
27520 2 - Use smallest instruction. */
27521 const char *
27522 arm_output_shift(rtx * operands, int set_flags)
27524 char pattern[100];
27525 static const char flag_chars[3] = {'?', '.', '!'};
27526 const char *shift;
27527 HOST_WIDE_INT val;
27528 char c;
27530 c = flag_chars[set_flags];
27531 shift = shift_op(operands[3], &val);
27532 if (shift)
27534 if (val != -1)
27535 operands[2] = GEN_INT(val);
27536 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
27538 else
27539 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
27541 output_asm_insn (pattern, operands);
27542 return "";
27545 /* Output assembly for a WMMX immediate shift instruction. */
27546 const char *
27547 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
27549 int shift = INTVAL (operands[2]);
27550 char templ[50];
27551 machine_mode opmode = GET_MODE (operands[0]);
27553 gcc_assert (shift >= 0);
27555 /* If the shift value in the register versions is > 63 (for D qualifier),
27556 31 (for W qualifier) or 15 (for H qualifier). */
27557 if (((opmode == V4HImode) && (shift > 15))
27558 || ((opmode == V2SImode) && (shift > 31))
27559 || ((opmode == DImode) && (shift > 63)))
27561 if (wror_or_wsra)
27563 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27564 output_asm_insn (templ, operands);
27565 if (opmode == DImode)
27567 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
27568 output_asm_insn (templ, operands);
27571 else
27573 /* The destination register will contain all zeros. */
27574 sprintf (templ, "wzero\t%%0");
27575 output_asm_insn (templ, operands);
27577 return "";
27580 if ((opmode == DImode) && (shift > 32))
27582 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27583 output_asm_insn (templ, operands);
27584 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
27585 output_asm_insn (templ, operands);
27587 else
27589 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
27590 output_asm_insn (templ, operands);
27592 return "";
27595 /* Output assembly for a WMMX tinsr instruction. */
27596 const char *
27597 arm_output_iwmmxt_tinsr (rtx *operands)
27599 int mask = INTVAL (operands[3]);
27600 int i;
27601 char templ[50];
27602 int units = mode_nunits[GET_MODE (operands[0])];
27603 gcc_assert ((mask & (mask - 1)) == 0);
27604 for (i = 0; i < units; ++i)
27606 if ((mask & 0x01) == 1)
27608 break;
27610 mask >>= 1;
27612 gcc_assert (i < units);
27614 switch (GET_MODE (operands[0]))
27616 case V8QImode:
27617 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
27618 break;
27619 case V4HImode:
27620 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
27621 break;
27622 case V2SImode:
27623 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
27624 break;
27625 default:
27626 gcc_unreachable ();
27627 break;
27629 output_asm_insn (templ, operands);
27631 return "";
27634 /* Output a Thumb-1 casesi dispatch sequence. */
27635 const char *
27636 thumb1_output_casesi (rtx *operands)
27638 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
27640 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27642 switch (GET_MODE(diff_vec))
27644 case QImode:
27645 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27646 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
27647 case HImode:
27648 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27649 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
27650 case SImode:
27651 return "bl\t%___gnu_thumb1_case_si";
27652 default:
27653 gcc_unreachable ();
27657 /* Output a Thumb-2 casesi instruction. */
27658 const char *
27659 thumb2_output_casesi (rtx *operands)
27661 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
27663 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27665 output_asm_insn ("cmp\t%0, %1", operands);
27666 output_asm_insn ("bhi\t%l3", operands);
27667 switch (GET_MODE(diff_vec))
27669 case QImode:
27670 return "tbb\t[%|pc, %0]";
27671 case HImode:
27672 return "tbh\t[%|pc, %0, lsl #1]";
27673 case SImode:
27674 if (flag_pic)
27676 output_asm_insn ("adr\t%4, %l2", operands);
27677 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
27678 output_asm_insn ("add\t%4, %4, %5", operands);
27679 return "bx\t%4";
27681 else
27683 output_asm_insn ("adr\t%4, %l2", operands);
27684 return "ldr\t%|pc, [%4, %0, lsl #2]";
27686 default:
27687 gcc_unreachable ();
27691 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
27692 per-core tuning structs. */
27693 static int
27694 arm_issue_rate (void)
27696 return current_tune->issue_rate;
27699 /* Return how many instructions should scheduler lookahead to choose the
27700 best one. */
27701 static int
27702 arm_first_cycle_multipass_dfa_lookahead (void)
27704 int issue_rate = arm_issue_rate ();
27706 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
27709 /* Enable modeling of L2 auto-prefetcher. */
27710 static int
27711 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
27713 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
27716 const char *
27717 arm_mangle_type (const_tree type)
27719 /* The ARM ABI documents (10th October 2008) say that "__va_list"
27720 has to be managled as if it is in the "std" namespace. */
27721 if (TARGET_AAPCS_BASED
27722 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
27723 return "St9__va_list";
27725 /* Half-precision float. */
27726 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
27727 return "Dh";
27729 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
27730 builtin type. */
27731 if (TYPE_NAME (type) != NULL)
27732 return arm_mangle_builtin_type (type);
27734 /* Use the default mangling. */
27735 return NULL;
27738 /* Order of allocation of core registers for Thumb: this allocation is
27739 written over the corresponding initial entries of the array
27740 initialized with REG_ALLOC_ORDER. We allocate all low registers
27741 first. Saving and restoring a low register is usually cheaper than
27742 using a call-clobbered high register. */
27744 static const int thumb_core_reg_alloc_order[] =
27746 3, 2, 1, 0, 4, 5, 6, 7,
27747 12, 14, 8, 9, 10, 11
27750 /* Adjust register allocation order when compiling for Thumb. */
27752 void
27753 arm_order_regs_for_local_alloc (void)
27755 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
27756 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
27757 if (TARGET_THUMB)
27758 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
27759 sizeof (thumb_core_reg_alloc_order));
27762 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
27764 bool
27765 arm_frame_pointer_required (void)
27767 if (SUBTARGET_FRAME_POINTER_REQUIRED)
27768 return true;
27770 /* If the function receives nonlocal gotos, it needs to save the frame
27771 pointer in the nonlocal_goto_save_area object. */
27772 if (cfun->has_nonlocal_label)
27773 return true;
27775 /* The frame pointer is required for non-leaf APCS frames. */
27776 if (TARGET_ARM && TARGET_APCS_FRAME && !crtl->is_leaf)
27777 return true;
27779 /* If we are probing the stack in the prologue, we will have a faulting
27780 instruction prior to the stack adjustment and this requires a frame
27781 pointer if we want to catch the exception using the EABI unwinder. */
27782 if (!IS_INTERRUPT (arm_current_func_type ())
27783 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK
27784 && arm_except_unwind_info (&global_options) == UI_TARGET
27785 && cfun->can_throw_non_call_exceptions)
27787 HOST_WIDE_INT size = get_frame_size ();
27789 /* That's irrelevant if there is no stack adjustment. */
27790 if (size <= 0)
27791 return false;
27793 /* That's relevant only if there is a stack probe. */
27794 if (crtl->is_leaf && !cfun->calls_alloca)
27796 /* We don't have the final size of the frame so adjust. */
27797 size += 32 * UNITS_PER_WORD;
27798 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
27799 return true;
27801 else
27802 return true;
27805 return false;
27808 /* Only thumb1 can't support conditional execution, so return true if
27809 the target is not thumb1. */
27810 static bool
27811 arm_have_conditional_execution (void)
27813 return !TARGET_THUMB1;
27816 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
27817 static HOST_WIDE_INT
27818 arm_vector_alignment (const_tree type)
27820 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
27822 if (TARGET_AAPCS_BASED)
27823 align = MIN (align, 64);
27825 return align;
27828 static unsigned int
27829 arm_autovectorize_vector_sizes (void)
27831 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
27834 static bool
27835 arm_vector_alignment_reachable (const_tree type, bool is_packed)
27837 /* Vectors which aren't in packed structures will not be less aligned than
27838 the natural alignment of their element type, so this is safe. */
27839 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27840 return !is_packed;
27842 return default_builtin_vector_alignment_reachable (type, is_packed);
27845 static bool
27846 arm_builtin_support_vector_misalignment (machine_mode mode,
27847 const_tree type, int misalignment,
27848 bool is_packed)
27850 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27852 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
27854 if (is_packed)
27855 return align == 1;
27857 /* If the misalignment is unknown, we should be able to handle the access
27858 so long as it is not to a member of a packed data structure. */
27859 if (misalignment == -1)
27860 return true;
27862 /* Return true if the misalignment is a multiple of the natural alignment
27863 of the vector's element type. This is probably always going to be
27864 true in practice, since we've already established that this isn't a
27865 packed access. */
27866 return ((misalignment % align) == 0);
27869 return default_builtin_support_vector_misalignment (mode, type, misalignment,
27870 is_packed);
27873 static void
27874 arm_conditional_register_usage (void)
27876 int regno;
27878 if (TARGET_THUMB1 && optimize_size)
27880 /* When optimizing for size on Thumb-1, it's better not
27881 to use the HI regs, because of the overhead of
27882 stacking them. */
27883 for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
27884 fixed_regs[regno] = call_used_regs[regno] = 1;
27887 /* The link register can be clobbered by any branch insn,
27888 but we have no way to track that at present, so mark
27889 it as unavailable. */
27890 if (TARGET_THUMB1)
27891 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
27893 if (TARGET_32BIT && TARGET_HARD_FLOAT)
27895 /* VFPv3 registers are disabled when earlier VFP
27896 versions are selected due to the definition of
27897 LAST_VFP_REGNUM. */
27898 for (regno = FIRST_VFP_REGNUM;
27899 regno <= LAST_VFP_REGNUM; ++ regno)
27901 fixed_regs[regno] = 0;
27902 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
27903 || regno >= FIRST_VFP_REGNUM + 32;
27907 if (TARGET_REALLY_IWMMXT)
27909 regno = FIRST_IWMMXT_GR_REGNUM;
27910 /* The 2002/10/09 revision of the XScale ABI has wCG0
27911 and wCG1 as call-preserved registers. The 2002/11/21
27912 revision changed this so that all wCG registers are
27913 scratch registers. */
27914 for (regno = FIRST_IWMMXT_GR_REGNUM;
27915 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
27916 fixed_regs[regno] = 0;
27917 /* The XScale ABI has wR0 - wR9 as scratch registers,
27918 the rest as call-preserved registers. */
27919 for (regno = FIRST_IWMMXT_REGNUM;
27920 regno <= LAST_IWMMXT_REGNUM; ++ regno)
27922 fixed_regs[regno] = 0;
27923 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
27927 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
27929 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27930 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27932 else if (TARGET_APCS_STACK)
27934 fixed_regs[10] = 1;
27935 call_used_regs[10] = 1;
27937 /* -mcaller-super-interworking reserves r11 for calls to
27938 _interwork_r11_call_via_rN(). Making the register global
27939 is an easy way of ensuring that it remains valid for all
27940 calls. */
27941 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
27942 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
27944 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27945 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27946 if (TARGET_CALLER_INTERWORKING)
27947 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27949 SUBTARGET_CONDITIONAL_REGISTER_USAGE
27952 static reg_class_t
27953 arm_preferred_rename_class (reg_class_t rclass)
27955 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
27956 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
27957 and code size can be reduced. */
27958 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
27959 return LO_REGS;
27960 else
27961 return NO_REGS;
27964 /* Compute the attribute "length" of insn "*push_multi".
27965 So this function MUST be kept in sync with that insn pattern. */
27967 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
27969 int i, regno, hi_reg;
27970 int num_saves = XVECLEN (parallel_op, 0);
27972 /* ARM mode. */
27973 if (TARGET_ARM)
27974 return 4;
27975 /* Thumb1 mode. */
27976 if (TARGET_THUMB1)
27977 return 2;
27979 /* Thumb2 mode. */
27980 regno = REGNO (first_op);
27981 /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
27982 list is 8-bit. Normally this means all registers in the list must be
27983 LO_REGS, that is (R0 -R7). If any HI_REGS used, then we must use 32-bit
27984 encodings. There is one exception for PUSH that LR in HI_REGS can be used
27985 with 16-bit encoding. */
27986 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
27987 for (i = 1; i < num_saves && !hi_reg; i++)
27989 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
27990 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
27993 if (!hi_reg)
27994 return 2;
27995 return 4;
27998 /* Compute the attribute "length" of insn. Currently, this function is used
27999 for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
28000 "*pop_multiple_with_writeback_and_return". OPERANDS is the toplevel PARALLEL
28001 rtx, RETURN_PC is true if OPERANDS contains return insn. WRITE_BACK_P is
28002 true if OPERANDS contains insn which explicit updates base register. */
28005 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
28007 /* ARM mode. */
28008 if (TARGET_ARM)
28009 return 4;
28010 /* Thumb1 mode. */
28011 if (TARGET_THUMB1)
28012 return 2;
28014 rtx parallel_op = operands[0];
28015 /* Initialize to elements number of PARALLEL. */
28016 unsigned indx = XVECLEN (parallel_op, 0) - 1;
28017 /* Initialize the value to base register. */
28018 unsigned regno = REGNO (operands[1]);
28019 /* Skip return and write back pattern.
28020 We only need register pop pattern for later analysis. */
28021 unsigned first_indx = 0;
28022 first_indx += return_pc ? 1 : 0;
28023 first_indx += write_back_p ? 1 : 0;
28025 /* A pop operation can be done through LDM or POP. If the base register is SP
28026 and if it's with write back, then a LDM will be alias of POP. */
28027 bool pop_p = (regno == SP_REGNUM && write_back_p);
28028 bool ldm_p = !pop_p;
28030 /* Check base register for LDM. */
28031 if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
28032 return 4;
28034 /* Check each register in the list. */
28035 for (; indx >= first_indx; indx--)
28037 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
28038 /* For POP, PC in HI_REGS can be used with 16-bit encoding. See similar
28039 comment in arm_attr_length_push_multi. */
28040 if (REGNO_REG_CLASS (regno) == HI_REGS
28041 && (regno != PC_REGNUM || ldm_p))
28042 return 4;
28045 return 2;
28048 /* Compute the number of instructions emitted by output_move_double. */
28050 arm_count_output_move_double_insns (rtx *operands)
28052 int count;
28053 rtx ops[2];
28054 /* output_move_double may modify the operands array, so call it
28055 here on a copy of the array. */
28056 ops[0] = operands[0];
28057 ops[1] = operands[1];
28058 output_move_double (ops, false, &count);
28059 return count;
28063 vfp3_const_double_for_fract_bits (rtx operand)
28065 REAL_VALUE_TYPE r0;
28067 if (!CONST_DOUBLE_P (operand))
28068 return 0;
28070 r0 = *CONST_DOUBLE_REAL_VALUE (operand);
28071 if (exact_real_inverse (DFmode, &r0)
28072 && !REAL_VALUE_NEGATIVE (r0))
28074 if (exact_real_truncate (DFmode, &r0))
28076 HOST_WIDE_INT value = real_to_integer (&r0);
28077 value = value & 0xffffffff;
28078 if ((value != 0) && ( (value & (value - 1)) == 0))
28080 int ret = exact_log2 (value);
28081 gcc_assert (IN_RANGE (ret, 0, 31));
28082 return ret;
28086 return 0;
28089 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
28090 log2 is in [1, 32], return that log2. Otherwise return -1.
28091 This is used in the patterns for vcvt.s32.f32 floating-point to
28092 fixed-point conversions. */
28095 vfp3_const_double_for_bits (rtx x)
28097 const REAL_VALUE_TYPE *r;
28099 if (!CONST_DOUBLE_P (x))
28100 return -1;
28102 r = CONST_DOUBLE_REAL_VALUE (x);
28104 if (REAL_VALUE_NEGATIVE (*r)
28105 || REAL_VALUE_ISNAN (*r)
28106 || REAL_VALUE_ISINF (*r)
28107 || !real_isinteger (r, SFmode))
28108 return -1;
28110 HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
28112 /* The exact_log2 above will have returned -1 if this is
28113 not an exact log2. */
28114 if (!IN_RANGE (hwint, 1, 32))
28115 return -1;
28117 return hwint;
28121 /* Emit a memory barrier around an atomic sequence according to MODEL. */
28123 static void
28124 arm_pre_atomic_barrier (enum memmodel model)
28126 if (need_atomic_barrier_p (model, true))
28127 emit_insn (gen_memory_barrier ());
28130 static void
28131 arm_post_atomic_barrier (enum memmodel model)
28133 if (need_atomic_barrier_p (model, false))
28134 emit_insn (gen_memory_barrier ());
28137 /* Emit the load-exclusive and store-exclusive instructions.
28138 Use acquire and release versions if necessary. */
28140 static void
28141 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
28143 rtx (*gen) (rtx, rtx);
28145 if (acq)
28147 switch (mode)
28149 case QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
28150 case HImode: gen = gen_arm_load_acquire_exclusivehi; break;
28151 case SImode: gen = gen_arm_load_acquire_exclusivesi; break;
28152 case DImode: gen = gen_arm_load_acquire_exclusivedi; break;
28153 default:
28154 gcc_unreachable ();
28157 else
28159 switch (mode)
28161 case QImode: gen = gen_arm_load_exclusiveqi; break;
28162 case HImode: gen = gen_arm_load_exclusivehi; break;
28163 case SImode: gen = gen_arm_load_exclusivesi; break;
28164 case DImode: gen = gen_arm_load_exclusivedi; break;
28165 default:
28166 gcc_unreachable ();
28170 emit_insn (gen (rval, mem));
28173 static void
28174 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
28175 rtx mem, bool rel)
28177 rtx (*gen) (rtx, rtx, rtx);
28179 if (rel)
28181 switch (mode)
28183 case QImode: gen = gen_arm_store_release_exclusiveqi; break;
28184 case HImode: gen = gen_arm_store_release_exclusivehi; break;
28185 case SImode: gen = gen_arm_store_release_exclusivesi; break;
28186 case DImode: gen = gen_arm_store_release_exclusivedi; break;
28187 default:
28188 gcc_unreachable ();
28191 else
28193 switch (mode)
28195 case QImode: gen = gen_arm_store_exclusiveqi; break;
28196 case HImode: gen = gen_arm_store_exclusivehi; break;
28197 case SImode: gen = gen_arm_store_exclusivesi; break;
28198 case DImode: gen = gen_arm_store_exclusivedi; break;
28199 default:
28200 gcc_unreachable ();
28204 emit_insn (gen (bval, rval, mem));
28207 /* Mark the previous jump instruction as unlikely. */
28209 static void
28210 emit_unlikely_jump (rtx insn)
28212 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
28214 rtx_insn *jump = emit_jump_insn (insn);
28215 add_int_reg_note (jump, REG_BR_PROB, very_unlikely);
28218 /* Expand a compare and swap pattern. */
28220 void
28221 arm_expand_compare_and_swap (rtx operands[])
28223 rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
28224 machine_mode mode;
28225 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx);
28227 bval = operands[0];
28228 rval = operands[1];
28229 mem = operands[2];
28230 oldval = operands[3];
28231 newval = operands[4];
28232 is_weak = operands[5];
28233 mod_s = operands[6];
28234 mod_f = operands[7];
28235 mode = GET_MODE (mem);
28237 /* Normally the succ memory model must be stronger than fail, but in the
28238 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
28239 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
28241 if (TARGET_HAVE_LDACQ
28242 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
28243 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
28244 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
28246 switch (mode)
28248 case QImode:
28249 case HImode:
28250 /* For narrow modes, we're going to perform the comparison in SImode,
28251 so do the zero-extension now. */
28252 rval = gen_reg_rtx (SImode);
28253 oldval = convert_modes (SImode, mode, oldval, true);
28254 /* FALLTHRU */
28256 case SImode:
28257 /* Force the value into a register if needed. We waited until after
28258 the zero-extension above to do this properly. */
28259 if (!arm_add_operand (oldval, SImode))
28260 oldval = force_reg (SImode, oldval);
28261 break;
28263 case DImode:
28264 if (!cmpdi_operand (oldval, mode))
28265 oldval = force_reg (mode, oldval);
28266 break;
28268 default:
28269 gcc_unreachable ();
28272 if (TARGET_THUMB1)
28274 switch (mode)
28276 case QImode: gen = gen_atomic_compare_and_swapt1qi_1; break;
28277 case HImode: gen = gen_atomic_compare_and_swapt1hi_1; break;
28278 case SImode: gen = gen_atomic_compare_and_swapt1si_1; break;
28279 case DImode: gen = gen_atomic_compare_and_swapt1di_1; break;
28280 default:
28281 gcc_unreachable ();
28284 else
28286 switch (mode)
28288 case QImode: gen = gen_atomic_compare_and_swap32qi_1; break;
28289 case HImode: gen = gen_atomic_compare_and_swap32hi_1; break;
28290 case SImode: gen = gen_atomic_compare_and_swap32si_1; break;
28291 case DImode: gen = gen_atomic_compare_and_swap32di_1; break;
28292 default:
28293 gcc_unreachable ();
28297 bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CC_Zmode, CC_REGNUM);
28298 emit_insn (gen (bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f));
28300 if (mode == QImode || mode == HImode)
28301 emit_move_insn (operands[1], gen_lowpart (mode, rval));
28303 /* In all cases, we arrange for success to be signaled by Z set.
28304 This arrangement allows for the boolean result to be used directly
28305 in a subsequent branch, post optimization. For Thumb-1 targets, the
28306 boolean negation of the result is also stored in bval because Thumb-1
28307 backend lacks dependency tracking for CC flag due to flag-setting not
28308 being represented at RTL level. */
28309 if (TARGET_THUMB1)
28310 emit_insn (gen_cstoresi_eq0_thumb1 (bval, bdst));
28311 else
28313 x = gen_rtx_EQ (SImode, bdst, const0_rtx);
28314 emit_insn (gen_rtx_SET (bval, x));
28318 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
28319 another memory store between the load-exclusive and store-exclusive can
28320 reset the monitor from Exclusive to Open state. This means we must wait
28321 until after reload to split the pattern, lest we get a register spill in
28322 the middle of the atomic sequence. Success of the compare and swap is
28323 indicated by the Z flag set for 32bit targets and by neg_bval being zero
28324 for Thumb-1 targets (ie. negation of the boolean value returned by
28325 atomic_compare_and_swapmode standard pattern in operand 0). */
28327 void
28328 arm_split_compare_and_swap (rtx operands[])
28330 rtx rval, mem, oldval, newval, neg_bval;
28331 machine_mode mode;
28332 enum memmodel mod_s, mod_f;
28333 bool is_weak;
28334 rtx_code_label *label1, *label2;
28335 rtx x, cond;
28337 rval = operands[1];
28338 mem = operands[2];
28339 oldval = operands[3];
28340 newval = operands[4];
28341 is_weak = (operands[5] != const0_rtx);
28342 mod_s = memmodel_from_int (INTVAL (operands[6]));
28343 mod_f = memmodel_from_int (INTVAL (operands[7]));
28344 neg_bval = TARGET_THUMB1 ? operands[0] : operands[8];
28345 mode = GET_MODE (mem);
28347 bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
28349 bool use_acquire = TARGET_HAVE_LDACQ
28350 && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28351 || is_mm_release (mod_s));
28353 bool use_release = TARGET_HAVE_LDACQ
28354 && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28355 || is_mm_acquire (mod_s));
28357 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
28358 a full barrier is emitted after the store-release. */
28359 if (is_armv8_sync)
28360 use_acquire = false;
28362 /* Checks whether a barrier is needed and emits one accordingly. */
28363 if (!(use_acquire || use_release))
28364 arm_pre_atomic_barrier (mod_s);
28366 label1 = NULL;
28367 if (!is_weak)
28369 label1 = gen_label_rtx ();
28370 emit_label (label1);
28372 label2 = gen_label_rtx ();
28374 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
28376 /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
28377 as required to communicate with arm_expand_compare_and_swap. */
28378 if (TARGET_32BIT)
28380 cond = arm_gen_compare_reg (NE, rval, oldval, neg_bval);
28381 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28382 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
28383 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
28384 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
28386 else
28388 emit_move_insn (neg_bval, const1_rtx);
28389 cond = gen_rtx_NE (VOIDmode, rval, oldval);
28390 if (thumb1_cmpneg_operand (oldval, SImode))
28391 emit_unlikely_jump (gen_cbranchsi4_scratch (neg_bval, rval, oldval,
28392 label2, cond));
28393 else
28394 emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2));
28397 arm_emit_store_exclusive (mode, neg_bval, mem, newval, use_release);
28399 /* Weak or strong, we want EQ to be true for success, so that we
28400 match the flags that we got from the compare above. */
28401 if (TARGET_32BIT)
28403 cond = gen_rtx_REG (CCmode, CC_REGNUM);
28404 x = gen_rtx_COMPARE (CCmode, neg_bval, const0_rtx);
28405 emit_insn (gen_rtx_SET (cond, x));
28408 if (!is_weak)
28410 /* Z is set to boolean value of !neg_bval, as required to communicate
28411 with arm_expand_compare_and_swap. */
28412 x = gen_rtx_NE (VOIDmode, neg_bval, const0_rtx);
28413 emit_unlikely_jump (gen_cbranchsi4 (x, neg_bval, const0_rtx, label1));
28416 if (!is_mm_relaxed (mod_f))
28417 emit_label (label2);
28419 /* Checks whether a barrier is needed and emits one accordingly. */
28420 if (is_armv8_sync
28421 || !(use_acquire || use_release))
28422 arm_post_atomic_barrier (mod_s);
28424 if (is_mm_relaxed (mod_f))
28425 emit_label (label2);
28428 /* Split an atomic operation pattern. Operation is given by CODE and is one
28429 of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
28430 operation). Operation is performed on the content at MEM and on VALUE
28431 following the memory model MODEL_RTX. The content at MEM before and after
28432 the operation is returned in OLD_OUT and NEW_OUT respectively while the
28433 success of the operation is returned in COND. Using a scratch register or
28434 an operand register for these determines what result is returned for that
28435 pattern. */
28437 void
28438 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
28439 rtx value, rtx model_rtx, rtx cond)
28441 enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
28442 machine_mode mode = GET_MODE (mem);
28443 machine_mode wmode = (mode == DImode ? DImode : SImode);
28444 rtx_code_label *label;
28445 bool all_low_regs, bind_old_new;
28446 rtx x;
28448 bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
28450 bool use_acquire = TARGET_HAVE_LDACQ
28451 && !(is_mm_relaxed (model) || is_mm_consume (model)
28452 || is_mm_release (model));
28454 bool use_release = TARGET_HAVE_LDACQ
28455 && !(is_mm_relaxed (model) || is_mm_consume (model)
28456 || is_mm_acquire (model));
28458 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
28459 a full barrier is emitted after the store-release. */
28460 if (is_armv8_sync)
28461 use_acquire = false;
28463 /* Checks whether a barrier is needed and emits one accordingly. */
28464 if (!(use_acquire || use_release))
28465 arm_pre_atomic_barrier (model);
28467 label = gen_label_rtx ();
28468 emit_label (label);
28470 if (new_out)
28471 new_out = gen_lowpart (wmode, new_out);
28472 if (old_out)
28473 old_out = gen_lowpart (wmode, old_out);
28474 else
28475 old_out = new_out;
28476 value = simplify_gen_subreg (wmode, value, mode, 0);
28478 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
28480 /* Does the operation require destination and first operand to use the same
28481 register? This is decided by register constraints of relevant insn
28482 patterns in thumb1.md. */
28483 gcc_assert (!new_out || REG_P (new_out));
28484 all_low_regs = REG_P (value) && REGNO_REG_CLASS (REGNO (value)) == LO_REGS
28485 && new_out && REGNO_REG_CLASS (REGNO (new_out)) == LO_REGS
28486 && REGNO_REG_CLASS (REGNO (old_out)) == LO_REGS;
28487 bind_old_new =
28488 (TARGET_THUMB1
28489 && code != SET
28490 && code != MINUS
28491 && (code != PLUS || (!all_low_regs && !satisfies_constraint_L (value))));
28493 /* We want to return the old value while putting the result of the operation
28494 in the same register as the old value so copy the old value over to the
28495 destination register and use that register for the operation. */
28496 if (old_out && bind_old_new)
28498 emit_move_insn (new_out, old_out);
28499 old_out = new_out;
28502 switch (code)
28504 case SET:
28505 new_out = value;
28506 break;
28508 case NOT:
28509 x = gen_rtx_AND (wmode, old_out, value);
28510 emit_insn (gen_rtx_SET (new_out, x));
28511 x = gen_rtx_NOT (wmode, new_out);
28512 emit_insn (gen_rtx_SET (new_out, x));
28513 break;
28515 case MINUS:
28516 if (CONST_INT_P (value))
28518 value = GEN_INT (-INTVAL (value));
28519 code = PLUS;
28521 /* FALLTHRU */
28523 case PLUS:
28524 if (mode == DImode)
28526 /* DImode plus/minus need to clobber flags. */
28527 /* The adddi3 and subdi3 patterns are incorrectly written so that
28528 they require matching operands, even when we could easily support
28529 three operands. Thankfully, this can be fixed up post-splitting,
28530 as the individual add+adc patterns do accept three operands and
28531 post-reload cprop can make these moves go away. */
28532 emit_move_insn (new_out, old_out);
28533 if (code == PLUS)
28534 x = gen_adddi3 (new_out, new_out, value);
28535 else
28536 x = gen_subdi3 (new_out, new_out, value);
28537 emit_insn (x);
28538 break;
28540 /* FALLTHRU */
28542 default:
28543 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
28544 emit_insn (gen_rtx_SET (new_out, x));
28545 break;
28548 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
28549 use_release);
28551 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28552 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
28554 /* Checks whether a barrier is needed and emits one accordingly. */
28555 if (is_armv8_sync
28556 || !(use_acquire || use_release))
28557 arm_post_atomic_barrier (model);
28560 #define MAX_VECT_LEN 16
28562 struct expand_vec_perm_d
28564 rtx target, op0, op1;
28565 unsigned char perm[MAX_VECT_LEN];
28566 machine_mode vmode;
28567 unsigned char nelt;
28568 bool one_vector_p;
28569 bool testing_p;
28572 /* Generate a variable permutation. */
28574 static void
28575 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
28577 machine_mode vmode = GET_MODE (target);
28578 bool one_vector_p = rtx_equal_p (op0, op1);
28580 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
28581 gcc_checking_assert (GET_MODE (op0) == vmode);
28582 gcc_checking_assert (GET_MODE (op1) == vmode);
28583 gcc_checking_assert (GET_MODE (sel) == vmode);
28584 gcc_checking_assert (TARGET_NEON);
28586 if (one_vector_p)
28588 if (vmode == V8QImode)
28589 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
28590 else
28591 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
28593 else
28595 rtx pair;
28597 if (vmode == V8QImode)
28599 pair = gen_reg_rtx (V16QImode);
28600 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
28601 pair = gen_lowpart (TImode, pair);
28602 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
28604 else
28606 pair = gen_reg_rtx (OImode);
28607 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
28608 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
28613 void
28614 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
28616 machine_mode vmode = GET_MODE (target);
28617 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
28618 bool one_vector_p = rtx_equal_p (op0, op1);
28619 rtx rmask[MAX_VECT_LEN], mask;
28621 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28622 numbering of elements for big-endian, we must reverse the order. */
28623 gcc_checking_assert (!BYTES_BIG_ENDIAN);
28625 /* The VTBL instruction does not use a modulo index, so we must take care
28626 of that ourselves. */
28627 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
28628 for (i = 0; i < nelt; ++i)
28629 rmask[i] = mask;
28630 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
28631 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
28633 arm_expand_vec_perm_1 (target, op0, op1, sel);
28636 /* Map lane ordering between architectural lane order, and GCC lane order,
28637 taking into account ABI. See comment above output_move_neon for details. */
28639 static int
28640 neon_endian_lane_map (machine_mode mode, int lane)
28642 if (BYTES_BIG_ENDIAN)
28644 int nelems = GET_MODE_NUNITS (mode);
28645 /* Reverse lane order. */
28646 lane = (nelems - 1 - lane);
28647 /* Reverse D register order, to match ABI. */
28648 if (GET_MODE_SIZE (mode) == 16)
28649 lane = lane ^ (nelems / 2);
28651 return lane;
28654 /* Some permutations index into pairs of vectors, this is a helper function
28655 to map indexes into those pairs of vectors. */
28657 static int
28658 neon_pair_endian_lane_map (machine_mode mode, int lane)
28660 int nelem = GET_MODE_NUNITS (mode);
28661 if (BYTES_BIG_ENDIAN)
28662 lane =
28663 neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
28664 return lane;
28667 /* Generate or test for an insn that supports a constant permutation. */
28669 /* Recognize patterns for the VUZP insns. */
28671 static bool
28672 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
28674 unsigned int i, odd, mask, nelt = d->nelt;
28675 rtx out0, out1, in0, in1;
28676 rtx (*gen)(rtx, rtx, rtx, rtx);
28677 int first_elem;
28678 int swap_nelt;
28680 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28681 return false;
28683 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
28684 big endian pattern on 64 bit vectors, so we correct for that. */
28685 swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
28686 && GET_MODE_SIZE (d->vmode) == 8 ? d->nelt : 0;
28688 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
28690 if (first_elem == neon_endian_lane_map (d->vmode, 0))
28691 odd = 0;
28692 else if (first_elem == neon_endian_lane_map (d->vmode, 1))
28693 odd = 1;
28694 else
28695 return false;
28696 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28698 for (i = 0; i < nelt; i++)
28700 unsigned elt =
28701 (neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
28702 if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
28703 return false;
28706 /* Success! */
28707 if (d->testing_p)
28708 return true;
28710 switch (d->vmode)
28712 case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
28713 case V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
28714 case V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
28715 case V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
28716 case V8HFmode: gen = gen_neon_vuzpv8hf_internal; break;
28717 case V4HFmode: gen = gen_neon_vuzpv4hf_internal; break;
28718 case V4SImode: gen = gen_neon_vuzpv4si_internal; break;
28719 case V2SImode: gen = gen_neon_vuzpv2si_internal; break;
28720 case V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
28721 case V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
28722 default:
28723 gcc_unreachable ();
28726 in0 = d->op0;
28727 in1 = d->op1;
28728 if (swap_nelt != 0)
28729 std::swap (in0, in1);
28731 out0 = d->target;
28732 out1 = gen_reg_rtx (d->vmode);
28733 if (odd)
28734 std::swap (out0, out1);
28736 emit_insn (gen (out0, in0, in1, out1));
28737 return true;
28740 /* Recognize patterns for the VZIP insns. */
28742 static bool
28743 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
28745 unsigned int i, high, mask, nelt = d->nelt;
28746 rtx out0, out1, in0, in1;
28747 rtx (*gen)(rtx, rtx, rtx, rtx);
28748 int first_elem;
28749 bool is_swapped;
28751 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28752 return false;
28754 is_swapped = BYTES_BIG_ENDIAN;
28756 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
28758 high = nelt / 2;
28759 if (first_elem == neon_endian_lane_map (d->vmode, high))
28761 else if (first_elem == neon_endian_lane_map (d->vmode, 0))
28762 high = 0;
28763 else
28764 return false;
28765 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28767 for (i = 0; i < nelt / 2; i++)
28769 unsigned elt =
28770 neon_pair_endian_lane_map (d->vmode, i + high) & mask;
28771 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
28772 != elt)
28773 return false;
28774 elt =
28775 neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
28776 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
28777 != elt)
28778 return false;
28781 /* Success! */
28782 if (d->testing_p)
28783 return true;
28785 switch (d->vmode)
28787 case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
28788 case V8QImode: gen = gen_neon_vzipv8qi_internal; break;
28789 case V8HImode: gen = gen_neon_vzipv8hi_internal; break;
28790 case V4HImode: gen = gen_neon_vzipv4hi_internal; break;
28791 case V8HFmode: gen = gen_neon_vzipv8hf_internal; break;
28792 case V4HFmode: gen = gen_neon_vzipv4hf_internal; break;
28793 case V4SImode: gen = gen_neon_vzipv4si_internal; break;
28794 case V2SImode: gen = gen_neon_vzipv2si_internal; break;
28795 case V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
28796 case V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
28797 default:
28798 gcc_unreachable ();
28801 in0 = d->op0;
28802 in1 = d->op1;
28803 if (is_swapped)
28804 std::swap (in0, in1);
28806 out0 = d->target;
28807 out1 = gen_reg_rtx (d->vmode);
28808 if (high)
28809 std::swap (out0, out1);
28811 emit_insn (gen (out0, in0, in1, out1));
28812 return true;
28815 /* Recognize patterns for the VREV insns. */
28817 static bool
28818 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
28820 unsigned int i, j, diff, nelt = d->nelt;
28821 rtx (*gen)(rtx, rtx);
28823 if (!d->one_vector_p)
28824 return false;
28826 diff = d->perm[0];
28827 switch (diff)
28829 case 7:
28830 switch (d->vmode)
28832 case V16QImode: gen = gen_neon_vrev64v16qi; break;
28833 case V8QImode: gen = gen_neon_vrev64v8qi; break;
28834 default:
28835 return false;
28837 break;
28838 case 3:
28839 switch (d->vmode)
28841 case V16QImode: gen = gen_neon_vrev32v16qi; break;
28842 case V8QImode: gen = gen_neon_vrev32v8qi; break;
28843 case V8HImode: gen = gen_neon_vrev64v8hi; break;
28844 case V4HImode: gen = gen_neon_vrev64v4hi; break;
28845 case V8HFmode: gen = gen_neon_vrev64v8hf; break;
28846 case V4HFmode: gen = gen_neon_vrev64v4hf; break;
28847 default:
28848 return false;
28850 break;
28851 case 1:
28852 switch (d->vmode)
28854 case V16QImode: gen = gen_neon_vrev16v16qi; break;
28855 case V8QImode: gen = gen_neon_vrev16v8qi; break;
28856 case V8HImode: gen = gen_neon_vrev32v8hi; break;
28857 case V4HImode: gen = gen_neon_vrev32v4hi; break;
28858 case V4SImode: gen = gen_neon_vrev64v4si; break;
28859 case V2SImode: gen = gen_neon_vrev64v2si; break;
28860 case V4SFmode: gen = gen_neon_vrev64v4sf; break;
28861 case V2SFmode: gen = gen_neon_vrev64v2sf; break;
28862 default:
28863 return false;
28865 break;
28866 default:
28867 return false;
28870 for (i = 0; i < nelt ; i += diff + 1)
28871 for (j = 0; j <= diff; j += 1)
28873 /* This is guaranteed to be true as the value of diff
28874 is 7, 3, 1 and we should have enough elements in the
28875 queue to generate this. Getting a vector mask with a
28876 value of diff other than these values implies that
28877 something is wrong by the time we get here. */
28878 gcc_assert (i + j < nelt);
28879 if (d->perm[i + j] != i + diff - j)
28880 return false;
28883 /* Success! */
28884 if (d->testing_p)
28885 return true;
28887 emit_insn (gen (d->target, d->op0));
28888 return true;
28891 /* Recognize patterns for the VTRN insns. */
28893 static bool
28894 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
28896 unsigned int i, odd, mask, nelt = d->nelt;
28897 rtx out0, out1, in0, in1;
28898 rtx (*gen)(rtx, rtx, rtx, rtx);
28900 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28901 return false;
28903 /* Note that these are little-endian tests. Adjust for big-endian later. */
28904 if (d->perm[0] == 0)
28905 odd = 0;
28906 else if (d->perm[0] == 1)
28907 odd = 1;
28908 else
28909 return false;
28910 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28912 for (i = 0; i < nelt; i += 2)
28914 if (d->perm[i] != i + odd)
28915 return false;
28916 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
28917 return false;
28920 /* Success! */
28921 if (d->testing_p)
28922 return true;
28924 switch (d->vmode)
28926 case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
28927 case V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
28928 case V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
28929 case V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
28930 case V8HFmode: gen = gen_neon_vtrnv8hf_internal; break;
28931 case V4HFmode: gen = gen_neon_vtrnv4hf_internal; break;
28932 case V4SImode: gen = gen_neon_vtrnv4si_internal; break;
28933 case V2SImode: gen = gen_neon_vtrnv2si_internal; break;
28934 case V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
28935 case V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
28936 default:
28937 gcc_unreachable ();
28940 in0 = d->op0;
28941 in1 = d->op1;
28942 if (BYTES_BIG_ENDIAN)
28944 std::swap (in0, in1);
28945 odd = !odd;
28948 out0 = d->target;
28949 out1 = gen_reg_rtx (d->vmode);
28950 if (odd)
28951 std::swap (out0, out1);
28953 emit_insn (gen (out0, in0, in1, out1));
28954 return true;
28957 /* Recognize patterns for the VEXT insns. */
28959 static bool
28960 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
28962 unsigned int i, nelt = d->nelt;
28963 rtx (*gen) (rtx, rtx, rtx, rtx);
28964 rtx offset;
28966 unsigned int location;
28968 unsigned int next = d->perm[0] + 1;
28970 /* TODO: Handle GCC's numbering of elements for big-endian. */
28971 if (BYTES_BIG_ENDIAN)
28972 return false;
28974 /* Check if the extracted indexes are increasing by one. */
28975 for (i = 1; i < nelt; next++, i++)
28977 /* If we hit the most significant element of the 2nd vector in
28978 the previous iteration, no need to test further. */
28979 if (next == 2 * nelt)
28980 return false;
28982 /* If we are operating on only one vector: it could be a
28983 rotation. If there are only two elements of size < 64, let
28984 arm_evpc_neon_vrev catch it. */
28985 if (d->one_vector_p && (next == nelt))
28987 if ((nelt == 2) && (d->vmode != V2DImode))
28988 return false;
28989 else
28990 next = 0;
28993 if (d->perm[i] != next)
28994 return false;
28997 location = d->perm[0];
28999 switch (d->vmode)
29001 case V16QImode: gen = gen_neon_vextv16qi; break;
29002 case V8QImode: gen = gen_neon_vextv8qi; break;
29003 case V4HImode: gen = gen_neon_vextv4hi; break;
29004 case V8HImode: gen = gen_neon_vextv8hi; break;
29005 case V2SImode: gen = gen_neon_vextv2si; break;
29006 case V4SImode: gen = gen_neon_vextv4si; break;
29007 case V4HFmode: gen = gen_neon_vextv4hf; break;
29008 case V8HFmode: gen = gen_neon_vextv8hf; break;
29009 case V2SFmode: gen = gen_neon_vextv2sf; break;
29010 case V4SFmode: gen = gen_neon_vextv4sf; break;
29011 case V2DImode: gen = gen_neon_vextv2di; break;
29012 default:
29013 return false;
29016 /* Success! */
29017 if (d->testing_p)
29018 return true;
29020 offset = GEN_INT (location);
29021 emit_insn (gen (d->target, d->op0, d->op1, offset));
29022 return true;
29025 /* The NEON VTBL instruction is a fully variable permuation that's even
29026 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
29027 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
29028 can do slightly better by expanding this as a constant where we don't
29029 have to apply a mask. */
29031 static bool
29032 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
29034 rtx rperm[MAX_VECT_LEN], sel;
29035 machine_mode vmode = d->vmode;
29036 unsigned int i, nelt = d->nelt;
29038 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
29039 numbering of elements for big-endian, we must reverse the order. */
29040 if (BYTES_BIG_ENDIAN)
29041 return false;
29043 if (d->testing_p)
29044 return true;
29046 /* Generic code will try constant permutation twice. Once with the
29047 original mode and again with the elements lowered to QImode.
29048 So wait and don't do the selector expansion ourselves. */
29049 if (vmode != V8QImode && vmode != V16QImode)
29050 return false;
29052 for (i = 0; i < nelt; ++i)
29053 rperm[i] = GEN_INT (d->perm[i]);
29054 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
29055 sel = force_reg (vmode, sel);
29057 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
29058 return true;
29061 static bool
29062 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
29064 /* Check if the input mask matches vext before reordering the
29065 operands. */
29066 if (TARGET_NEON)
29067 if (arm_evpc_neon_vext (d))
29068 return true;
29070 /* The pattern matching functions above are written to look for a small
29071 number to begin the sequence (0, 1, N/2). If we begin with an index
29072 from the second operand, we can swap the operands. */
29073 if (d->perm[0] >= d->nelt)
29075 unsigned i, nelt = d->nelt;
29077 for (i = 0; i < nelt; ++i)
29078 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
29080 std::swap (d->op0, d->op1);
29083 if (TARGET_NEON)
29085 if (arm_evpc_neon_vuzp (d))
29086 return true;
29087 if (arm_evpc_neon_vzip (d))
29088 return true;
29089 if (arm_evpc_neon_vrev (d))
29090 return true;
29091 if (arm_evpc_neon_vtrn (d))
29092 return true;
29093 return arm_evpc_neon_vtbl (d);
29095 return false;
29098 /* Expand a vec_perm_const pattern. */
29100 bool
29101 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
29103 struct expand_vec_perm_d d;
29104 int i, nelt, which;
29106 d.target = target;
29107 d.op0 = op0;
29108 d.op1 = op1;
29110 d.vmode = GET_MODE (target);
29111 gcc_assert (VECTOR_MODE_P (d.vmode));
29112 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
29113 d.testing_p = false;
29115 for (i = which = 0; i < nelt; ++i)
29117 rtx e = XVECEXP (sel, 0, i);
29118 int ei = INTVAL (e) & (2 * nelt - 1);
29119 which |= (ei < nelt ? 1 : 2);
29120 d.perm[i] = ei;
29123 switch (which)
29125 default:
29126 gcc_unreachable();
29128 case 3:
29129 d.one_vector_p = false;
29130 if (!rtx_equal_p (op0, op1))
29131 break;
29133 /* The elements of PERM do not suggest that only the first operand
29134 is used, but both operands are identical. Allow easier matching
29135 of the permutation by folding the permutation into the single
29136 input vector. */
29137 /* FALLTHRU */
29138 case 2:
29139 for (i = 0; i < nelt; ++i)
29140 d.perm[i] &= nelt - 1;
29141 d.op0 = op1;
29142 d.one_vector_p = true;
29143 break;
29145 case 1:
29146 d.op1 = op0;
29147 d.one_vector_p = true;
29148 break;
29151 return arm_expand_vec_perm_const_1 (&d);
29154 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
29156 static bool
29157 arm_vectorize_vec_perm_const_ok (machine_mode vmode,
29158 const unsigned char *sel)
29160 struct expand_vec_perm_d d;
29161 unsigned int i, nelt, which;
29162 bool ret;
29164 d.vmode = vmode;
29165 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
29166 d.testing_p = true;
29167 memcpy (d.perm, sel, nelt);
29169 /* Categorize the set of elements in the selector. */
29170 for (i = which = 0; i < nelt; ++i)
29172 unsigned char e = d.perm[i];
29173 gcc_assert (e < 2 * nelt);
29174 which |= (e < nelt ? 1 : 2);
29177 /* For all elements from second vector, fold the elements to first. */
29178 if (which == 2)
29179 for (i = 0; i < nelt; ++i)
29180 d.perm[i] -= nelt;
29182 /* Check whether the mask can be applied to the vector type. */
29183 d.one_vector_p = (which != 3);
29185 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
29186 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
29187 if (!d.one_vector_p)
29188 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
29190 start_sequence ();
29191 ret = arm_expand_vec_perm_const_1 (&d);
29192 end_sequence ();
29194 return ret;
29197 bool
29198 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
29200 /* If we are soft float and we do not have ldrd
29201 then all auto increment forms are ok. */
29202 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
29203 return true;
29205 switch (code)
29207 /* Post increment and Pre Decrement are supported for all
29208 instruction forms except for vector forms. */
29209 case ARM_POST_INC:
29210 case ARM_PRE_DEC:
29211 if (VECTOR_MODE_P (mode))
29213 if (code != ARM_PRE_DEC)
29214 return true;
29215 else
29216 return false;
29219 return true;
29221 case ARM_POST_DEC:
29222 case ARM_PRE_INC:
29223 /* Without LDRD and mode size greater than
29224 word size, there is no point in auto-incrementing
29225 because ldm and stm will not have these forms. */
29226 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
29227 return false;
29229 /* Vector and floating point modes do not support
29230 these auto increment forms. */
29231 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
29232 return false;
29234 return true;
29236 default:
29237 return false;
29241 return false;
29244 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
29245 on ARM, since we know that shifts by negative amounts are no-ops.
29246 Additionally, the default expansion code is not available or suitable
29247 for post-reload insn splits (this can occur when the register allocator
29248 chooses not to do a shift in NEON).
29250 This function is used in both initial expand and post-reload splits, and
29251 handles all kinds of 64-bit shifts.
29253 Input requirements:
29254 - It is safe for the input and output to be the same register, but
29255 early-clobber rules apply for the shift amount and scratch registers.
29256 - Shift by register requires both scratch registers. In all other cases
29257 the scratch registers may be NULL.
29258 - Ashiftrt by a register also clobbers the CC register. */
29259 void
29260 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
29261 rtx amount, rtx scratch1, rtx scratch2)
29263 rtx out_high = gen_highpart (SImode, out);
29264 rtx out_low = gen_lowpart (SImode, out);
29265 rtx in_high = gen_highpart (SImode, in);
29266 rtx in_low = gen_lowpart (SImode, in);
29268 /* Terminology:
29269 in = the register pair containing the input value.
29270 out = the destination register pair.
29271 up = the high- or low-part of each pair.
29272 down = the opposite part to "up".
29273 In a shift, we can consider bits to shift from "up"-stream to
29274 "down"-stream, so in a left-shift "up" is the low-part and "down"
29275 is the high-part of each register pair. */
29277 rtx out_up = code == ASHIFT ? out_low : out_high;
29278 rtx out_down = code == ASHIFT ? out_high : out_low;
29279 rtx in_up = code == ASHIFT ? in_low : in_high;
29280 rtx in_down = code == ASHIFT ? in_high : in_low;
29282 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
29283 gcc_assert (out
29284 && (REG_P (out) || GET_CODE (out) == SUBREG)
29285 && GET_MODE (out) == DImode);
29286 gcc_assert (in
29287 && (REG_P (in) || GET_CODE (in) == SUBREG)
29288 && GET_MODE (in) == DImode);
29289 gcc_assert (amount
29290 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
29291 && GET_MODE (amount) == SImode)
29292 || CONST_INT_P (amount)));
29293 gcc_assert (scratch1 == NULL
29294 || (GET_CODE (scratch1) == SCRATCH)
29295 || (GET_MODE (scratch1) == SImode
29296 && REG_P (scratch1)));
29297 gcc_assert (scratch2 == NULL
29298 || (GET_CODE (scratch2) == SCRATCH)
29299 || (GET_MODE (scratch2) == SImode
29300 && REG_P (scratch2)));
29301 gcc_assert (!REG_P (out) || !REG_P (amount)
29302 || !HARD_REGISTER_P (out)
29303 || (REGNO (out) != REGNO (amount)
29304 && REGNO (out) + 1 != REGNO (amount)));
29306 /* Macros to make following code more readable. */
29307 #define SUB_32(DEST,SRC) \
29308 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
29309 #define RSB_32(DEST,SRC) \
29310 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
29311 #define SUB_S_32(DEST,SRC) \
29312 gen_addsi3_compare0 ((DEST), (SRC), \
29313 GEN_INT (-32))
29314 #define SET(DEST,SRC) \
29315 gen_rtx_SET ((DEST), (SRC))
29316 #define SHIFT(CODE,SRC,AMOUNT) \
29317 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
29318 #define LSHIFT(CODE,SRC,AMOUNT) \
29319 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
29320 SImode, (SRC), (AMOUNT))
29321 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
29322 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
29323 SImode, (SRC), (AMOUNT))
29324 #define ORR(A,B) \
29325 gen_rtx_IOR (SImode, (A), (B))
29326 #define BRANCH(COND,LABEL) \
29327 gen_arm_cond_branch ((LABEL), \
29328 gen_rtx_ ## COND (CCmode, cc_reg, \
29329 const0_rtx), \
29330 cc_reg)
29332 /* Shifts by register and shifts by constant are handled separately. */
29333 if (CONST_INT_P (amount))
29335 /* We have a shift-by-constant. */
29337 /* First, handle out-of-range shift amounts.
29338 In both cases we try to match the result an ARM instruction in a
29339 shift-by-register would give. This helps reduce execution
29340 differences between optimization levels, but it won't stop other
29341 parts of the compiler doing different things. This is "undefined
29342 behavior, in any case. */
29343 if (INTVAL (amount) <= 0)
29344 emit_insn (gen_movdi (out, in));
29345 else if (INTVAL (amount) >= 64)
29347 if (code == ASHIFTRT)
29349 rtx const31_rtx = GEN_INT (31);
29350 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
29351 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
29353 else
29354 emit_insn (gen_movdi (out, const0_rtx));
29357 /* Now handle valid shifts. */
29358 else if (INTVAL (amount) < 32)
29360 /* Shifts by a constant less than 32. */
29361 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
29363 /* Clearing the out register in DImode first avoids lots
29364 of spilling and results in less stack usage.
29365 Later this redundant insn is completely removed.
29366 Do that only if "in" and "out" are different registers. */
29367 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29368 emit_insn (SET (out, const0_rtx));
29369 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29370 emit_insn (SET (out_down,
29371 ORR (REV_LSHIFT (code, in_up, reverse_amount),
29372 out_down)));
29373 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29375 else
29377 /* Shifts by a constant greater than 31. */
29378 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
29380 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29381 emit_insn (SET (out, const0_rtx));
29382 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
29383 if (code == ASHIFTRT)
29384 emit_insn (gen_ashrsi3 (out_up, in_up,
29385 GEN_INT (31)));
29386 else
29387 emit_insn (SET (out_up, const0_rtx));
29390 else
29392 /* We have a shift-by-register. */
29393 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
29395 /* This alternative requires the scratch registers. */
29396 gcc_assert (scratch1 && REG_P (scratch1));
29397 gcc_assert (scratch2 && REG_P (scratch2));
29399 /* We will need the values "amount-32" and "32-amount" later.
29400 Swapping them around now allows the later code to be more general. */
29401 switch (code)
29403 case ASHIFT:
29404 emit_insn (SUB_32 (scratch1, amount));
29405 emit_insn (RSB_32 (scratch2, amount));
29406 break;
29407 case ASHIFTRT:
29408 emit_insn (RSB_32 (scratch1, amount));
29409 /* Also set CC = amount > 32. */
29410 emit_insn (SUB_S_32 (scratch2, amount));
29411 break;
29412 case LSHIFTRT:
29413 emit_insn (RSB_32 (scratch1, amount));
29414 emit_insn (SUB_32 (scratch2, amount));
29415 break;
29416 default:
29417 gcc_unreachable ();
29420 /* Emit code like this:
29422 arithmetic-left:
29423 out_down = in_down << amount;
29424 out_down = (in_up << (amount - 32)) | out_down;
29425 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
29426 out_up = in_up << amount;
29428 arithmetic-right:
29429 out_down = in_down >> amount;
29430 out_down = (in_up << (32 - amount)) | out_down;
29431 if (amount < 32)
29432 out_down = ((signed)in_up >> (amount - 32)) | out_down;
29433 out_up = in_up << amount;
29435 logical-right:
29436 out_down = in_down >> amount;
29437 out_down = (in_up << (32 - amount)) | out_down;
29438 if (amount < 32)
29439 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
29440 out_up = in_up << amount;
29442 The ARM and Thumb2 variants are the same but implemented slightly
29443 differently. If this were only called during expand we could just
29444 use the Thumb2 case and let combine do the right thing, but this
29445 can also be called from post-reload splitters. */
29447 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29449 if (!TARGET_THUMB2)
29451 /* Emit code for ARM mode. */
29452 emit_insn (SET (out_down,
29453 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
29454 if (code == ASHIFTRT)
29456 rtx_code_label *done_label = gen_label_rtx ();
29457 emit_jump_insn (BRANCH (LT, done_label));
29458 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
29459 out_down)));
29460 emit_label (done_label);
29462 else
29463 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
29464 out_down)));
29466 else
29468 /* Emit code for Thumb2 mode.
29469 Thumb2 can't do shift and or in one insn. */
29470 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
29471 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
29473 if (code == ASHIFTRT)
29475 rtx_code_label *done_label = gen_label_rtx ();
29476 emit_jump_insn (BRANCH (LT, done_label));
29477 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
29478 emit_insn (SET (out_down, ORR (out_down, scratch2)));
29479 emit_label (done_label);
29481 else
29483 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
29484 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
29488 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29491 #undef SUB_32
29492 #undef RSB_32
29493 #undef SUB_S_32
29494 #undef SET
29495 #undef SHIFT
29496 #undef LSHIFT
29497 #undef REV_LSHIFT
29498 #undef ORR
29499 #undef BRANCH
29502 /* Returns true if the pattern is a valid symbolic address, which is either a
29503 symbol_ref or (symbol_ref + addend).
29505 According to the ARM ELF ABI, the initial addend of REL-type relocations
29506 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
29507 literal field of the instruction as a 16-bit signed value in the range
29508 -32768 <= A < 32768. */
29510 bool
29511 arm_valid_symbolic_address_p (rtx addr)
29513 rtx xop0, xop1 = NULL_RTX;
29514 rtx tmp = addr;
29516 if (GET_CODE (tmp) == SYMBOL_REF || GET_CODE (tmp) == LABEL_REF)
29517 return true;
29519 /* (const (plus: symbol_ref const_int)) */
29520 if (GET_CODE (addr) == CONST)
29521 tmp = XEXP (addr, 0);
29523 if (GET_CODE (tmp) == PLUS)
29525 xop0 = XEXP (tmp, 0);
29526 xop1 = XEXP (tmp, 1);
29528 if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
29529 return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
29532 return false;
29535 /* Returns true if a valid comparison operation and makes
29536 the operands in a form that is valid. */
29537 bool
29538 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
29540 enum rtx_code code = GET_CODE (*comparison);
29541 int code_int;
29542 machine_mode mode = (GET_MODE (*op1) == VOIDmode)
29543 ? GET_MODE (*op2) : GET_MODE (*op1);
29545 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
29547 if (code == UNEQ || code == LTGT)
29548 return false;
29550 code_int = (int)code;
29551 arm_canonicalize_comparison (&code_int, op1, op2, 0);
29552 PUT_CODE (*comparison, (enum rtx_code)code_int);
29554 switch (mode)
29556 case SImode:
29557 if (!arm_add_operand (*op1, mode))
29558 *op1 = force_reg (mode, *op1);
29559 if (!arm_add_operand (*op2, mode))
29560 *op2 = force_reg (mode, *op2);
29561 return true;
29563 case DImode:
29564 if (!cmpdi_operand (*op1, mode))
29565 *op1 = force_reg (mode, *op1);
29566 if (!cmpdi_operand (*op2, mode))
29567 *op2 = force_reg (mode, *op2);
29568 return true;
29570 case HFmode:
29571 if (!TARGET_VFP_FP16INST)
29572 break;
29573 /* FP16 comparisons are done in SF mode. */
29574 mode = SFmode;
29575 *op1 = convert_to_mode (mode, *op1, 1);
29576 *op2 = convert_to_mode (mode, *op2, 1);
29577 /* Fall through. */
29578 case SFmode:
29579 case DFmode:
29580 if (!vfp_compare_operand (*op1, mode))
29581 *op1 = force_reg (mode, *op1);
29582 if (!vfp_compare_operand (*op2, mode))
29583 *op2 = force_reg (mode, *op2);
29584 return true;
29585 default:
29586 break;
29589 return false;
29593 /* Maximum number of instructions to set block of memory. */
29594 static int
29595 arm_block_set_max_insns (void)
29597 if (optimize_function_for_size_p (cfun))
29598 return 4;
29599 else
29600 return current_tune->max_insns_inline_memset;
29603 /* Return TRUE if it's profitable to set block of memory for
29604 non-vectorized case. VAL is the value to set the memory
29605 with. LENGTH is the number of bytes to set. ALIGN is the
29606 alignment of the destination memory in bytes. UNALIGNED_P
29607 is TRUE if we can only set the memory with instructions
29608 meeting alignment requirements. USE_STRD_P is TRUE if we
29609 can use strd to set the memory. */
29610 static bool
29611 arm_block_set_non_vect_profit_p (rtx val,
29612 unsigned HOST_WIDE_INT length,
29613 unsigned HOST_WIDE_INT align,
29614 bool unaligned_p, bool use_strd_p)
29616 int num = 0;
29617 /* For leftovers in bytes of 0-7, we can set the memory block using
29618 strb/strh/str with minimum instruction number. */
29619 const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
29621 if (unaligned_p)
29623 num = arm_const_inline_cost (SET, val);
29624 num += length / align + length % align;
29626 else if (use_strd_p)
29628 num = arm_const_double_inline_cost (val);
29629 num += (length >> 3) + leftover[length & 7];
29631 else
29633 num = arm_const_inline_cost (SET, val);
29634 num += (length >> 2) + leftover[length & 3];
29637 /* We may be able to combine last pair STRH/STRB into a single STR
29638 by shifting one byte back. */
29639 if (unaligned_access && length > 3 && (length & 3) == 3)
29640 num--;
29642 return (num <= arm_block_set_max_insns ());
29645 /* Return TRUE if it's profitable to set block of memory for
29646 vectorized case. LENGTH is the number of bytes to set.
29647 ALIGN is the alignment of destination memory in bytes.
29648 MODE is the vector mode used to set the memory. */
29649 static bool
29650 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
29651 unsigned HOST_WIDE_INT align,
29652 machine_mode mode)
29654 int num;
29655 bool unaligned_p = ((align & 3) != 0);
29656 unsigned int nelt = GET_MODE_NUNITS (mode);
29658 /* Instruction loading constant value. */
29659 num = 1;
29660 /* Instructions storing the memory. */
29661 num += (length + nelt - 1) / nelt;
29662 /* Instructions adjusting the address expression. Only need to
29663 adjust address expression if it's 4 bytes aligned and bytes
29664 leftover can only be stored by mis-aligned store instruction. */
29665 if (!unaligned_p && (length & 3) != 0)
29666 num++;
29668 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
29669 if (!unaligned_p && mode == V16QImode)
29670 num--;
29672 return (num <= arm_block_set_max_insns ());
29675 /* Set a block of memory using vectorization instructions for the
29676 unaligned case. We fill the first LENGTH bytes of the memory
29677 area starting from DSTBASE with byte constant VALUE. ALIGN is
29678 the alignment requirement of memory. Return TRUE if succeeded. */
29679 static bool
29680 arm_block_set_unaligned_vect (rtx dstbase,
29681 unsigned HOST_WIDE_INT length,
29682 unsigned HOST_WIDE_INT value,
29683 unsigned HOST_WIDE_INT align)
29685 unsigned int i, j, nelt_v16, nelt_v8, nelt_mode;
29686 rtx dst, mem;
29687 rtx val_elt, val_vec, reg;
29688 rtx rval[MAX_VECT_LEN];
29689 rtx (*gen_func) (rtx, rtx);
29690 machine_mode mode;
29691 unsigned HOST_WIDE_INT v = value;
29692 unsigned int offset = 0;
29693 gcc_assert ((align & 0x3) != 0);
29694 nelt_v8 = GET_MODE_NUNITS (V8QImode);
29695 nelt_v16 = GET_MODE_NUNITS (V16QImode);
29696 if (length >= nelt_v16)
29698 mode = V16QImode;
29699 gen_func = gen_movmisalignv16qi;
29701 else
29703 mode = V8QImode;
29704 gen_func = gen_movmisalignv8qi;
29706 nelt_mode = GET_MODE_NUNITS (mode);
29707 gcc_assert (length >= nelt_mode);
29708 /* Skip if it isn't profitable. */
29709 if (!arm_block_set_vect_profit_p (length, align, mode))
29710 return false;
29712 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29713 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29715 v = sext_hwi (v, BITS_PER_WORD);
29716 val_elt = GEN_INT (v);
29717 for (j = 0; j < nelt_mode; j++)
29718 rval[j] = val_elt;
29720 reg = gen_reg_rtx (mode);
29721 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
29722 /* Emit instruction loading the constant value. */
29723 emit_move_insn (reg, val_vec);
29725 /* Handle nelt_mode bytes in a vector. */
29726 for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
29728 emit_insn ((*gen_func) (mem, reg));
29729 if (i + 2 * nelt_mode <= length)
29731 emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
29732 offset += nelt_mode;
29733 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29737 /* If there are not less than nelt_v8 bytes leftover, we must be in
29738 V16QI mode. */
29739 gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
29741 /* Handle (8, 16) bytes leftover. */
29742 if (i + nelt_v8 < length)
29744 emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
29745 offset += length - i;
29746 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29748 /* We are shifting bytes back, set the alignment accordingly. */
29749 if ((length & 1) != 0 && align >= 2)
29750 set_mem_align (mem, BITS_PER_UNIT);
29752 emit_insn (gen_movmisalignv16qi (mem, reg));
29754 /* Handle (0, 8] bytes leftover. */
29755 else if (i < length && i + nelt_v8 >= length)
29757 if (mode == V16QImode)
29758 reg = gen_lowpart (V8QImode, reg);
29760 emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
29761 + (nelt_mode - nelt_v8))));
29762 offset += (length - i) + (nelt_mode - nelt_v8);
29763 mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
29765 /* We are shifting bytes back, set the alignment accordingly. */
29766 if ((length & 1) != 0 && align >= 2)
29767 set_mem_align (mem, BITS_PER_UNIT);
29769 emit_insn (gen_movmisalignv8qi (mem, reg));
29772 return true;
29775 /* Set a block of memory using vectorization instructions for the
29776 aligned case. We fill the first LENGTH bytes of the memory area
29777 starting from DSTBASE with byte constant VALUE. ALIGN is the
29778 alignment requirement of memory. Return TRUE if succeeded. */
29779 static bool
29780 arm_block_set_aligned_vect (rtx dstbase,
29781 unsigned HOST_WIDE_INT length,
29782 unsigned HOST_WIDE_INT value,
29783 unsigned HOST_WIDE_INT align)
29785 unsigned int i, j, nelt_v8, nelt_v16, nelt_mode;
29786 rtx dst, addr, mem;
29787 rtx val_elt, val_vec, reg;
29788 rtx rval[MAX_VECT_LEN];
29789 machine_mode mode;
29790 unsigned HOST_WIDE_INT v = value;
29791 unsigned int offset = 0;
29793 gcc_assert ((align & 0x3) == 0);
29794 nelt_v8 = GET_MODE_NUNITS (V8QImode);
29795 nelt_v16 = GET_MODE_NUNITS (V16QImode);
29796 if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
29797 mode = V16QImode;
29798 else
29799 mode = V8QImode;
29801 nelt_mode = GET_MODE_NUNITS (mode);
29802 gcc_assert (length >= nelt_mode);
29803 /* Skip if it isn't profitable. */
29804 if (!arm_block_set_vect_profit_p (length, align, mode))
29805 return false;
29807 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29809 v = sext_hwi (v, BITS_PER_WORD);
29810 val_elt = GEN_INT (v);
29811 for (j = 0; j < nelt_mode; j++)
29812 rval[j] = val_elt;
29814 reg = gen_reg_rtx (mode);
29815 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
29816 /* Emit instruction loading the constant value. */
29817 emit_move_insn (reg, val_vec);
29819 i = 0;
29820 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
29821 if (mode == V16QImode)
29823 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29824 emit_insn (gen_movmisalignv16qi (mem, reg));
29825 i += nelt_mode;
29826 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
29827 if (i + nelt_v8 < length && i + nelt_v16 > length)
29829 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
29830 offset += length - nelt_mode;
29831 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29832 /* We are shifting bytes back, set the alignment accordingly. */
29833 if ((length & 0x3) == 0)
29834 set_mem_align (mem, BITS_PER_UNIT * 4);
29835 else if ((length & 0x1) == 0)
29836 set_mem_align (mem, BITS_PER_UNIT * 2);
29837 else
29838 set_mem_align (mem, BITS_PER_UNIT);
29840 emit_insn (gen_movmisalignv16qi (mem, reg));
29841 return true;
29843 /* Fall through for bytes leftover. */
29844 mode = V8QImode;
29845 nelt_mode = GET_MODE_NUNITS (mode);
29846 reg = gen_lowpart (V8QImode, reg);
29849 /* Handle 8 bytes in a vector. */
29850 for (; (i + nelt_mode <= length); i += nelt_mode)
29852 addr = plus_constant (Pmode, dst, i);
29853 mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
29854 emit_move_insn (mem, reg);
29857 /* Handle single word leftover by shifting 4 bytes back. We can
29858 use aligned access for this case. */
29859 if (i + UNITS_PER_WORD == length)
29861 addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
29862 offset += i - UNITS_PER_WORD;
29863 mem = adjust_automodify_address (dstbase, mode, addr, offset);
29864 /* We are shifting 4 bytes back, set the alignment accordingly. */
29865 if (align > UNITS_PER_WORD)
29866 set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
29868 emit_move_insn (mem, reg);
29870 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
29871 We have to use unaligned access for this case. */
29872 else if (i < length)
29874 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
29875 offset += length - nelt_mode;
29876 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29877 /* We are shifting bytes back, set the alignment accordingly. */
29878 if ((length & 1) == 0)
29879 set_mem_align (mem, BITS_PER_UNIT * 2);
29880 else
29881 set_mem_align (mem, BITS_PER_UNIT);
29883 emit_insn (gen_movmisalignv8qi (mem, reg));
29886 return true;
29889 /* Set a block of memory using plain strh/strb instructions, only
29890 using instructions allowed by ALIGN on processor. We fill the
29891 first LENGTH bytes of the memory area starting from DSTBASE
29892 with byte constant VALUE. ALIGN is the alignment requirement
29893 of memory. */
29894 static bool
29895 arm_block_set_unaligned_non_vect (rtx dstbase,
29896 unsigned HOST_WIDE_INT length,
29897 unsigned HOST_WIDE_INT value,
29898 unsigned HOST_WIDE_INT align)
29900 unsigned int i;
29901 rtx dst, addr, mem;
29902 rtx val_exp, val_reg, reg;
29903 machine_mode mode;
29904 HOST_WIDE_INT v = value;
29906 gcc_assert (align == 1 || align == 2);
29908 if (align == 2)
29909 v |= (value << BITS_PER_UNIT);
29911 v = sext_hwi (v, BITS_PER_WORD);
29912 val_exp = GEN_INT (v);
29913 /* Skip if it isn't profitable. */
29914 if (!arm_block_set_non_vect_profit_p (val_exp, length,
29915 align, true, false))
29916 return false;
29918 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29919 mode = (align == 2 ? HImode : QImode);
29920 val_reg = force_reg (SImode, val_exp);
29921 reg = gen_lowpart (mode, val_reg);
29923 for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
29925 addr = plus_constant (Pmode, dst, i);
29926 mem = adjust_automodify_address (dstbase, mode, addr, i);
29927 emit_move_insn (mem, reg);
29930 /* Handle single byte leftover. */
29931 if (i + 1 == length)
29933 reg = gen_lowpart (QImode, val_reg);
29934 addr = plus_constant (Pmode, dst, i);
29935 mem = adjust_automodify_address (dstbase, QImode, addr, i);
29936 emit_move_insn (mem, reg);
29937 i++;
29940 gcc_assert (i == length);
29941 return true;
29944 /* Set a block of memory using plain strd/str/strh/strb instructions,
29945 to permit unaligned copies on processors which support unaligned
29946 semantics for those instructions. We fill the first LENGTH bytes
29947 of the memory area starting from DSTBASE with byte constant VALUE.
29948 ALIGN is the alignment requirement of memory. */
29949 static bool
29950 arm_block_set_aligned_non_vect (rtx dstbase,
29951 unsigned HOST_WIDE_INT length,
29952 unsigned HOST_WIDE_INT value,
29953 unsigned HOST_WIDE_INT align)
29955 unsigned int i;
29956 rtx dst, addr, mem;
29957 rtx val_exp, val_reg, reg;
29958 unsigned HOST_WIDE_INT v;
29959 bool use_strd_p;
29961 use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
29962 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
29964 v = (value | (value << 8) | (value << 16) | (value << 24));
29965 if (length < UNITS_PER_WORD)
29966 v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
29968 if (use_strd_p)
29969 v |= (v << BITS_PER_WORD);
29970 else
29971 v = sext_hwi (v, BITS_PER_WORD);
29973 val_exp = GEN_INT (v);
29974 /* Skip if it isn't profitable. */
29975 if (!arm_block_set_non_vect_profit_p (val_exp, length,
29976 align, false, use_strd_p))
29978 if (!use_strd_p)
29979 return false;
29981 /* Try without strd. */
29982 v = (v >> BITS_PER_WORD);
29983 v = sext_hwi (v, BITS_PER_WORD);
29984 val_exp = GEN_INT (v);
29985 use_strd_p = false;
29986 if (!arm_block_set_non_vect_profit_p (val_exp, length,
29987 align, false, use_strd_p))
29988 return false;
29991 i = 0;
29992 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29993 /* Handle double words using strd if possible. */
29994 if (use_strd_p)
29996 val_reg = force_reg (DImode, val_exp);
29997 reg = val_reg;
29998 for (; (i + 8 <= length); i += 8)
30000 addr = plus_constant (Pmode, dst, i);
30001 mem = adjust_automodify_address (dstbase, DImode, addr, i);
30002 emit_move_insn (mem, reg);
30005 else
30006 val_reg = force_reg (SImode, val_exp);
30008 /* Handle words. */
30009 reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
30010 for (; (i + 4 <= length); i += 4)
30012 addr = plus_constant (Pmode, dst, i);
30013 mem = adjust_automodify_address (dstbase, SImode, addr, i);
30014 if ((align & 3) == 0)
30015 emit_move_insn (mem, reg);
30016 else
30017 emit_insn (gen_unaligned_storesi (mem, reg));
30020 /* Merge last pair of STRH and STRB into a STR if possible. */
30021 if (unaligned_access && i > 0 && (i + 3) == length)
30023 addr = plus_constant (Pmode, dst, i - 1);
30024 mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
30025 /* We are shifting one byte back, set the alignment accordingly. */
30026 if ((align & 1) == 0)
30027 set_mem_align (mem, BITS_PER_UNIT);
30029 /* Most likely this is an unaligned access, and we can't tell at
30030 compilation time. */
30031 emit_insn (gen_unaligned_storesi (mem, reg));
30032 return true;
30035 /* Handle half word leftover. */
30036 if (i + 2 <= length)
30038 reg = gen_lowpart (HImode, val_reg);
30039 addr = plus_constant (Pmode, dst, i);
30040 mem = adjust_automodify_address (dstbase, HImode, addr, i);
30041 if ((align & 1) == 0)
30042 emit_move_insn (mem, reg);
30043 else
30044 emit_insn (gen_unaligned_storehi (mem, reg));
30046 i += 2;
30049 /* Handle single byte leftover. */
30050 if (i + 1 == length)
30052 reg = gen_lowpart (QImode, val_reg);
30053 addr = plus_constant (Pmode, dst, i);
30054 mem = adjust_automodify_address (dstbase, QImode, addr, i);
30055 emit_move_insn (mem, reg);
30058 return true;
30061 /* Set a block of memory using vectorization instructions for both
30062 aligned and unaligned cases. We fill the first LENGTH bytes of
30063 the memory area starting from DSTBASE with byte constant VALUE.
30064 ALIGN is the alignment requirement of memory. */
30065 static bool
30066 arm_block_set_vect (rtx dstbase,
30067 unsigned HOST_WIDE_INT length,
30068 unsigned HOST_WIDE_INT value,
30069 unsigned HOST_WIDE_INT align)
30071 /* Check whether we need to use unaligned store instruction. */
30072 if (((align & 3) != 0 || (length & 3) != 0)
30073 /* Check whether unaligned store instruction is available. */
30074 && (!unaligned_access || BYTES_BIG_ENDIAN))
30075 return false;
30077 if ((align & 3) == 0)
30078 return arm_block_set_aligned_vect (dstbase, length, value, align);
30079 else
30080 return arm_block_set_unaligned_vect (dstbase, length, value, align);
30083 /* Expand string store operation. Firstly we try to do that by using
30084 vectorization instructions, then try with ARM unaligned access and
30085 double-word store if profitable. OPERANDS[0] is the destination,
30086 OPERANDS[1] is the number of bytes, operands[2] is the value to
30087 initialize the memory, OPERANDS[3] is the known alignment of the
30088 destination. */
30089 bool
30090 arm_gen_setmem (rtx *operands)
30092 rtx dstbase = operands[0];
30093 unsigned HOST_WIDE_INT length;
30094 unsigned HOST_WIDE_INT value;
30095 unsigned HOST_WIDE_INT align;
30097 if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
30098 return false;
30100 length = UINTVAL (operands[1]);
30101 if (length > 64)
30102 return false;
30104 value = (UINTVAL (operands[2]) & 0xFF);
30105 align = UINTVAL (operands[3]);
30106 if (TARGET_NEON && length >= 8
30107 && current_tune->string_ops_prefer_neon
30108 && arm_block_set_vect (dstbase, length, value, align))
30109 return true;
30111 if (!unaligned_access && (align & 3) != 0)
30112 return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
30114 return arm_block_set_aligned_non_vect (dstbase, length, value, align);
30118 static bool
30119 arm_macro_fusion_p (void)
30121 return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
30124 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
30125 for MOVW / MOVT macro fusion. */
30127 static bool
30128 arm_sets_movw_movt_fusible_p (rtx prev_set, rtx curr_set)
30130 /* We are trying to fuse
30131 movw imm / movt imm
30132 instructions as a group that gets scheduled together. */
30134 rtx set_dest = SET_DEST (curr_set);
30136 if (GET_MODE (set_dest) != SImode)
30137 return false;
30139 /* We are trying to match:
30140 prev (movw) == (set (reg r0) (const_int imm16))
30141 curr (movt) == (set (zero_extract (reg r0)
30142 (const_int 16)
30143 (const_int 16))
30144 (const_int imm16_1))
30146 prev (movw) == (set (reg r1)
30147 (high (symbol_ref ("SYM"))))
30148 curr (movt) == (set (reg r0)
30149 (lo_sum (reg r1)
30150 (symbol_ref ("SYM")))) */
30152 if (GET_CODE (set_dest) == ZERO_EXTRACT)
30154 if (CONST_INT_P (SET_SRC (curr_set))
30155 && CONST_INT_P (SET_SRC (prev_set))
30156 && REG_P (XEXP (set_dest, 0))
30157 && REG_P (SET_DEST (prev_set))
30158 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
30159 return true;
30162 else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
30163 && REG_P (SET_DEST (curr_set))
30164 && REG_P (SET_DEST (prev_set))
30165 && GET_CODE (SET_SRC (prev_set)) == HIGH
30166 && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
30167 return true;
30169 return false;
30172 static bool
30173 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
30175 rtx prev_set = single_set (prev);
30176 rtx curr_set = single_set (curr);
30178 if (!prev_set
30179 || !curr_set)
30180 return false;
30182 if (any_condjump_p (curr))
30183 return false;
30185 if (!arm_macro_fusion_p ())
30186 return false;
30188 if (current_tune->fusible_ops & tune_params::FUSE_AES_AESMC
30189 && aarch_crypto_can_dual_issue (prev, curr))
30190 return true;
30192 if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT
30193 && arm_sets_movw_movt_fusible_p (prev_set, curr_set))
30194 return true;
30196 return false;
30199 /* Return true iff the instruction fusion described by OP is enabled. */
30200 bool
30201 arm_fusion_enabled_p (tune_params::fuse_ops op)
30203 return current_tune->fusible_ops & op;
30206 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
30207 scheduled for speculative execution. Reject the long-running division
30208 and square-root instructions. */
30210 static bool
30211 arm_sched_can_speculate_insn (rtx_insn *insn)
30213 switch (get_attr_type (insn))
30215 case TYPE_SDIV:
30216 case TYPE_UDIV:
30217 case TYPE_FDIVS:
30218 case TYPE_FDIVD:
30219 case TYPE_FSQRTS:
30220 case TYPE_FSQRTD:
30221 case TYPE_NEON_FP_SQRT_S:
30222 case TYPE_NEON_FP_SQRT_D:
30223 case TYPE_NEON_FP_SQRT_S_Q:
30224 case TYPE_NEON_FP_SQRT_D_Q:
30225 case TYPE_NEON_FP_DIV_S:
30226 case TYPE_NEON_FP_DIV_D:
30227 case TYPE_NEON_FP_DIV_S_Q:
30228 case TYPE_NEON_FP_DIV_D_Q:
30229 return false;
30230 default:
30231 return true;
30235 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
30237 static unsigned HOST_WIDE_INT
30238 arm_asan_shadow_offset (void)
30240 return HOST_WIDE_INT_1U << 29;
30244 /* This is a temporary fix for PR60655. Ideally we need
30245 to handle most of these cases in the generic part but
30246 currently we reject minus (..) (sym_ref). We try to
30247 ameliorate the case with minus (sym_ref1) (sym_ref2)
30248 where they are in the same section. */
30250 static bool
30251 arm_const_not_ok_for_debug_p (rtx p)
30253 tree decl_op0 = NULL;
30254 tree decl_op1 = NULL;
30256 if (GET_CODE (p) == MINUS)
30258 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
30260 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
30261 if (decl_op1
30262 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
30263 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
30265 if ((VAR_P (decl_op1)
30266 || TREE_CODE (decl_op1) == CONST_DECL)
30267 && (VAR_P (decl_op0)
30268 || TREE_CODE (decl_op0) == CONST_DECL))
30269 return (get_variable_section (decl_op1, false)
30270 != get_variable_section (decl_op0, false));
30272 if (TREE_CODE (decl_op1) == LABEL_DECL
30273 && TREE_CODE (decl_op0) == LABEL_DECL)
30274 return (DECL_CONTEXT (decl_op1)
30275 != DECL_CONTEXT (decl_op0));
30278 return true;
30282 return false;
30285 /* return TRUE if x is a reference to a value in a constant pool */
30286 extern bool
30287 arm_is_constant_pool_ref (rtx x)
30289 return (MEM_P (x)
30290 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
30291 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
30294 /* Remember the last target of arm_set_current_function. */
30295 static GTY(()) tree arm_previous_fndecl;
30297 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
30299 void
30300 save_restore_target_globals (tree new_tree)
30302 /* If we have a previous state, use it. */
30303 if (TREE_TARGET_GLOBALS (new_tree))
30304 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
30305 else if (new_tree == target_option_default_node)
30306 restore_target_globals (&default_target_globals);
30307 else
30309 /* Call target_reinit and save the state for TARGET_GLOBALS. */
30310 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
30313 arm_option_params_internal ();
30316 /* Invalidate arm_previous_fndecl. */
30318 void
30319 arm_reset_previous_fndecl (void)
30321 arm_previous_fndecl = NULL_TREE;
30324 /* Establish appropriate back-end context for processing the function
30325 FNDECL. The argument might be NULL to indicate processing at top
30326 level, outside of any function scope. */
30328 static void
30329 arm_set_current_function (tree fndecl)
30331 if (!fndecl || fndecl == arm_previous_fndecl)
30332 return;
30334 tree old_tree = (arm_previous_fndecl
30335 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
30336 : NULL_TREE);
30338 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30340 /* If current function has no attributes but previous one did,
30341 use the default node. */
30342 if (! new_tree && old_tree)
30343 new_tree = target_option_default_node;
30345 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
30346 the default have been handled by save_restore_target_globals from
30347 arm_pragma_target_parse. */
30348 if (old_tree == new_tree)
30349 return;
30351 arm_previous_fndecl = fndecl;
30353 /* First set the target options. */
30354 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
30356 save_restore_target_globals (new_tree);
30359 /* Implement TARGET_OPTION_PRINT. */
30361 static void
30362 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
30364 int flags = ptr->x_target_flags;
30365 const char *fpu_name;
30367 fpu_name = (ptr->x_arm_fpu_index == TARGET_FPU_auto
30368 ? "auto" : all_fpus[ptr->x_arm_fpu_index].name);
30370 fprintf (file, "%*sselected arch %s\n", indent, "",
30371 TARGET_THUMB2_P (flags) ? "thumb2" :
30372 TARGET_THUMB_P (flags) ? "thumb1" :
30373 "arm");
30375 fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_name);
30378 /* Hook to determine if one function can safely inline another. */
30380 static bool
30381 arm_can_inline_p (tree caller, tree callee)
30383 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
30384 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
30385 bool can_inline = true;
30387 struct cl_target_option *caller_opts
30388 = TREE_TARGET_OPTION (caller_tree ? caller_tree
30389 : target_option_default_node);
30391 struct cl_target_option *callee_opts
30392 = TREE_TARGET_OPTION (callee_tree ? callee_tree
30393 : target_option_default_node);
30395 if (callee_opts == caller_opts)
30396 return true;
30398 /* Callee's ISA features should be a subset of the caller's. */
30399 struct arm_build_target caller_target;
30400 struct arm_build_target callee_target;
30401 caller_target.isa = sbitmap_alloc (isa_num_bits);
30402 callee_target.isa = sbitmap_alloc (isa_num_bits);
30404 arm_configure_build_target (&caller_target, caller_opts, &global_options_set,
30405 false);
30406 arm_configure_build_target (&callee_target, callee_opts, &global_options_set,
30407 false);
30408 if (!bitmap_subset_p (callee_target.isa, caller_target.isa))
30409 can_inline = false;
30411 sbitmap_free (caller_target.isa);
30412 sbitmap_free (callee_target.isa);
30414 /* OK to inline between different modes.
30415 Function with mode specific instructions, e.g using asm,
30416 must be explicitly protected with noinline. */
30417 return can_inline;
30420 /* Hook to fix function's alignment affected by target attribute. */
30422 static void
30423 arm_relayout_function (tree fndecl)
30425 if (DECL_USER_ALIGN (fndecl))
30426 return;
30428 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30430 if (!callee_tree)
30431 callee_tree = target_option_default_node;
30433 struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
30434 SET_DECL_ALIGN
30435 (fndecl,
30436 FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts->x_target_flags)));
30439 /* Inner function to process the attribute((target(...))), take an argument and
30440 set the current options from the argument. If we have a list, recursively
30441 go over the list. */
30443 static bool
30444 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
30446 if (TREE_CODE (args) == TREE_LIST)
30448 bool ret = true;
30450 for (; args; args = TREE_CHAIN (args))
30451 if (TREE_VALUE (args)
30452 && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
30453 ret = false;
30454 return ret;
30457 else if (TREE_CODE (args) != STRING_CST)
30459 error ("attribute %<target%> argument not a string");
30460 return false;
30463 char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
30464 char *q;
30466 while ((q = strtok (argstr, ",")) != NULL)
30468 while (ISSPACE (*q)) ++q;
30470 argstr = NULL;
30471 if (!strncmp (q, "thumb", 5))
30472 opts->x_target_flags |= MASK_THUMB;
30474 else if (!strncmp (q, "arm", 3))
30475 opts->x_target_flags &= ~MASK_THUMB;
30477 else if (!strncmp (q, "fpu=", 4))
30479 int fpu_index;
30480 if (! opt_enum_arg_to_value (OPT_mfpu_, q+4,
30481 &fpu_index, CL_TARGET))
30483 error ("invalid fpu for attribute(target(\"%s\"))", q);
30484 return false;
30486 if (fpu_index == TARGET_FPU_auto)
30488 /* This doesn't really make sense until we support
30489 general dynamic selection of the architecture and all
30490 sub-features. */
30491 sorry ("auto fpu selection not currently permitted here");
30492 return false;
30494 opts->x_arm_fpu_index = (enum fpu_type) fpu_index;
30496 else
30498 error ("attribute(target(\"%s\")) is unknown", q);
30499 return false;
30503 return true;
30506 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
30508 tree
30509 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
30510 struct gcc_options *opts_set)
30512 struct cl_target_option cl_opts;
30514 if (!arm_valid_target_attribute_rec (args, opts))
30515 return NULL_TREE;
30517 cl_target_option_save (&cl_opts, opts);
30518 arm_configure_build_target (&arm_active_target, &cl_opts, opts_set, false);
30519 arm_option_check_internal (opts);
30520 /* Do any overrides, such as global options arch=xxx. */
30521 arm_option_override_internal (opts, opts_set);
30523 return build_target_option_node (opts);
30526 static void
30527 add_attribute (const char * mode, tree *attributes)
30529 size_t len = strlen (mode);
30530 tree value = build_string (len, mode);
30532 TREE_TYPE (value) = build_array_type (char_type_node,
30533 build_index_type (size_int (len)));
30535 *attributes = tree_cons (get_identifier ("target"),
30536 build_tree_list (NULL_TREE, value),
30537 *attributes);
30540 /* For testing. Insert thumb or arm modes alternatively on functions. */
30542 static void
30543 arm_insert_attributes (tree fndecl, tree * attributes)
30545 const char *mode;
30547 if (! TARGET_FLIP_THUMB)
30548 return;
30550 if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
30551 || DECL_BUILT_IN (fndecl) || DECL_ARTIFICIAL (fndecl))
30552 return;
30554 /* Nested definitions must inherit mode. */
30555 if (current_function_decl)
30557 mode = TARGET_THUMB ? "thumb" : "arm";
30558 add_attribute (mode, attributes);
30559 return;
30562 /* If there is already a setting don't change it. */
30563 if (lookup_attribute ("target", *attributes) != NULL)
30564 return;
30566 mode = thumb_flipper ? "thumb" : "arm";
30567 add_attribute (mode, attributes);
30569 thumb_flipper = !thumb_flipper;
30572 /* Hook to validate attribute((target("string"))). */
30574 static bool
30575 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
30576 tree args, int ARG_UNUSED (flags))
30578 bool ret = true;
30579 struct gcc_options func_options;
30580 tree cur_tree, new_optimize;
30581 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
30583 /* Get the optimization options of the current function. */
30584 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
30586 /* If the function changed the optimization levels as well as setting target
30587 options, start with the optimizations specified. */
30588 if (!func_optimize)
30589 func_optimize = optimization_default_node;
30591 /* Init func_options. */
30592 memset (&func_options, 0, sizeof (func_options));
30593 init_options_struct (&func_options, NULL);
30594 lang_hooks.init_options_struct (&func_options);
30596 /* Initialize func_options to the defaults. */
30597 cl_optimization_restore (&func_options,
30598 TREE_OPTIMIZATION (func_optimize));
30600 cl_target_option_restore (&func_options,
30601 TREE_TARGET_OPTION (target_option_default_node));
30603 /* Set func_options flags with new target mode. */
30604 cur_tree = arm_valid_target_attribute_tree (args, &func_options,
30605 &global_options_set);
30607 if (cur_tree == NULL_TREE)
30608 ret = false;
30610 new_optimize = build_optimization_node (&func_options);
30612 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
30614 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
30616 finalize_options_struct (&func_options);
30618 return ret;
30621 /* Match an ISA feature bitmap to a named FPU. We always use the
30622 first entry that exactly matches the feature set, so that we
30623 effectively canonicalize the FPU name for the assembler. */
30624 static const char*
30625 arm_identify_fpu_from_isa (sbitmap isa)
30627 auto_sbitmap fpubits (isa_num_bits);
30628 auto_sbitmap cand_fpubits (isa_num_bits);
30630 bitmap_and (fpubits, isa, isa_all_fpubits);
30632 /* If there are no ISA feature bits relating to the FPU, we must be
30633 doing soft-float. */
30634 if (bitmap_empty_p (fpubits))
30635 return "softvfp";
30637 for (unsigned int i = 0; i < ARRAY_SIZE (all_fpus); i++)
30639 arm_initialize_isa (cand_fpubits, all_fpus[i].isa_bits);
30640 if (bitmap_equal_p (fpubits, cand_fpubits))
30641 return all_fpus[i].name;
30643 /* We must find an entry, or things have gone wrong. */
30644 gcc_unreachable ();
30647 void
30648 arm_declare_function_name (FILE *stream, const char *name, tree decl)
30651 fprintf (stream, "\t.syntax unified\n");
30653 if (TARGET_THUMB)
30655 if (is_called_in_ARM_mode (decl)
30656 || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
30657 && cfun->is_thunk))
30658 fprintf (stream, "\t.code 32\n");
30659 else if (TARGET_THUMB1)
30660 fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
30661 else
30662 fprintf (stream, "\t.thumb\n\t.thumb_func\n");
30664 else
30665 fprintf (stream, "\t.arm\n");
30667 asm_fprintf (asm_out_file, "\t.fpu %s\n",
30668 (TARGET_SOFT_FLOAT
30669 ? "softvfp"
30670 : arm_identify_fpu_from_isa (arm_active_target.isa)));
30672 if (TARGET_POKE_FUNCTION_NAME)
30673 arm_poke_function_name (stream, (const char *) name);
30676 /* If MEM is in the form of [base+offset], extract the two parts
30677 of address and set to BASE and OFFSET, otherwise return false
30678 after clearing BASE and OFFSET. */
30680 static bool
30681 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
30683 rtx addr;
30685 gcc_assert (MEM_P (mem));
30687 addr = XEXP (mem, 0);
30689 /* Strip off const from addresses like (const (addr)). */
30690 if (GET_CODE (addr) == CONST)
30691 addr = XEXP (addr, 0);
30693 if (GET_CODE (addr) == REG)
30695 *base = addr;
30696 *offset = const0_rtx;
30697 return true;
30700 if (GET_CODE (addr) == PLUS
30701 && GET_CODE (XEXP (addr, 0)) == REG
30702 && CONST_INT_P (XEXP (addr, 1)))
30704 *base = XEXP (addr, 0);
30705 *offset = XEXP (addr, 1);
30706 return true;
30709 *base = NULL_RTX;
30710 *offset = NULL_RTX;
30712 return false;
30715 /* If INSN is a load or store of address in the form of [base+offset],
30716 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
30717 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
30718 otherwise return FALSE. */
30720 static bool
30721 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
30723 rtx x, dest, src;
30725 gcc_assert (INSN_P (insn));
30726 x = PATTERN (insn);
30727 if (GET_CODE (x) != SET)
30728 return false;
30730 src = SET_SRC (x);
30731 dest = SET_DEST (x);
30732 if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
30734 *is_load = false;
30735 extract_base_offset_in_addr (dest, base, offset);
30737 else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
30739 *is_load = true;
30740 extract_base_offset_in_addr (src, base, offset);
30742 else
30743 return false;
30745 return (*base != NULL_RTX && *offset != NULL_RTX);
30748 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
30750 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
30751 and PRI are only calculated for these instructions. For other instruction,
30752 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
30753 instruction fusion can be supported by returning different priorities.
30755 It's important that irrelevant instructions get the largest FUSION_PRI. */
30757 static void
30758 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
30759 int *fusion_pri, int *pri)
30761 int tmp, off_val;
30762 bool is_load;
30763 rtx base, offset;
30765 gcc_assert (INSN_P (insn));
30767 tmp = max_pri - 1;
30768 if (!fusion_load_store (insn, &base, &offset, &is_load))
30770 *pri = tmp;
30771 *fusion_pri = tmp;
30772 return;
30775 /* Load goes first. */
30776 if (is_load)
30777 *fusion_pri = tmp - 1;
30778 else
30779 *fusion_pri = tmp - 2;
30781 tmp /= 2;
30783 /* INSN with smaller base register goes first. */
30784 tmp -= ((REGNO (base) & 0xff) << 20);
30786 /* INSN with smaller offset goes first. */
30787 off_val = (int)(INTVAL (offset));
30788 if (off_val >= 0)
30789 tmp -= (off_val & 0xfffff);
30790 else
30791 tmp += ((- off_val) & 0xfffff);
30793 *pri = tmp;
30794 return;
30798 /* Construct and return a PARALLEL RTX vector with elements numbering the
30799 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
30800 the vector - from the perspective of the architecture. This does not
30801 line up with GCC's perspective on lane numbers, so we end up with
30802 different masks depending on our target endian-ness. The diagram
30803 below may help. We must draw the distinction when building masks
30804 which select one half of the vector. An instruction selecting
30805 architectural low-lanes for a big-endian target, must be described using
30806 a mask selecting GCC high-lanes.
30808 Big-Endian Little-Endian
30810 GCC 0 1 2 3 3 2 1 0
30811 | x | x | x | x | | x | x | x | x |
30812 Architecture 3 2 1 0 3 2 1 0
30814 Low Mask: { 2, 3 } { 0, 1 }
30815 High Mask: { 0, 1 } { 2, 3 }
30819 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
30821 int nunits = GET_MODE_NUNITS (mode);
30822 rtvec v = rtvec_alloc (nunits / 2);
30823 int high_base = nunits / 2;
30824 int low_base = 0;
30825 int base;
30826 rtx t1;
30827 int i;
30829 if (BYTES_BIG_ENDIAN)
30830 base = high ? low_base : high_base;
30831 else
30832 base = high ? high_base : low_base;
30834 for (i = 0; i < nunits / 2; i++)
30835 RTVEC_ELT (v, i) = GEN_INT (base + i);
30837 t1 = gen_rtx_PARALLEL (mode, v);
30838 return t1;
30841 /* Check OP for validity as a PARALLEL RTX vector with elements
30842 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
30843 from the perspective of the architecture. See the diagram above
30844 arm_simd_vect_par_cnst_half_p for more details. */
30846 bool
30847 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
30848 bool high)
30850 rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
30851 HOST_WIDE_INT count_op = XVECLEN (op, 0);
30852 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
30853 int i = 0;
30855 if (!VECTOR_MODE_P (mode))
30856 return false;
30858 if (count_op != count_ideal)
30859 return false;
30861 for (i = 0; i < count_ideal; i++)
30863 rtx elt_op = XVECEXP (op, 0, i);
30864 rtx elt_ideal = XVECEXP (ideal, 0, i);
30866 if (!CONST_INT_P (elt_op)
30867 || INTVAL (elt_ideal) != INTVAL (elt_op))
30868 return false;
30870 return true;
30873 /* Can output mi_thunk for all cases except for non-zero vcall_offset
30874 in Thumb1. */
30875 static bool
30876 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
30877 const_tree)
30879 /* For now, we punt and not handle this for TARGET_THUMB1. */
30880 if (vcall_offset && TARGET_THUMB1)
30881 return false;
30883 /* Otherwise ok. */
30884 return true;
30887 /* Generate RTL for a conditional branch with rtx comparison CODE in
30888 mode CC_MODE. The destination of the unlikely conditional branch
30889 is LABEL_REF. */
30891 void
30892 arm_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
30893 rtx label_ref)
30895 rtx x;
30896 x = gen_rtx_fmt_ee (code, VOIDmode,
30897 gen_rtx_REG (cc_mode, CC_REGNUM),
30898 const0_rtx);
30900 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
30901 gen_rtx_LABEL_REF (VOIDmode, label_ref),
30902 pc_rtx);
30903 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
30906 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
30908 For pure-code sections there is no letter code for this attribute, so
30909 output all the section flags numerically when this is needed. */
30911 static bool
30912 arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num)
30915 if (flags & SECTION_ARM_PURECODE)
30917 *num = 0x20000000;
30919 if (!(flags & SECTION_DEBUG))
30920 *num |= 0x2;
30921 if (flags & SECTION_EXCLUDE)
30922 *num |= 0x80000000;
30923 if (flags & SECTION_WRITE)
30924 *num |= 0x1;
30925 if (flags & SECTION_CODE)
30926 *num |= 0x4;
30927 if (flags & SECTION_MERGE)
30928 *num |= 0x10;
30929 if (flags & SECTION_STRINGS)
30930 *num |= 0x20;
30931 if (flags & SECTION_TLS)
30932 *num |= 0x400;
30933 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
30934 *num |= 0x200;
30936 return true;
30939 return false;
30942 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
30944 If pure-code is passed as an option, make sure all functions are in
30945 sections that have the SHF_ARM_PURECODE attribute. */
30947 static section *
30948 arm_function_section (tree decl, enum node_frequency freq,
30949 bool startup, bool exit)
30951 const char * section_name;
30952 section * sec;
30954 if (!decl || TREE_CODE (decl) != FUNCTION_DECL)
30955 return default_function_section (decl, freq, startup, exit);
30957 if (!target_pure_code)
30958 return default_function_section (decl, freq, startup, exit);
30961 section_name = DECL_SECTION_NAME (decl);
30963 /* If a function is not in a named section then it falls under the 'default'
30964 text section, also known as '.text'. We can preserve previous behavior as
30965 the default text section already has the SHF_ARM_PURECODE section
30966 attribute. */
30967 if (!section_name)
30969 section *default_sec = default_function_section (decl, freq, startup,
30970 exit);
30972 /* If default_sec is not null, then it must be a special section like for
30973 example .text.startup. We set the pure-code attribute and return the
30974 same section to preserve existing behavior. */
30975 if (default_sec)
30976 default_sec->common.flags |= SECTION_ARM_PURECODE;
30977 return default_sec;
30980 /* Otherwise look whether a section has already been created with
30981 'section_name'. */
30982 sec = get_named_section (decl, section_name, 0);
30983 if (!sec)
30984 /* If that is not the case passing NULL as the section's name to
30985 'get_named_section' will create a section with the declaration's
30986 section name. */
30987 sec = get_named_section (decl, NULL, 0);
30989 /* Set the SHF_ARM_PURECODE attribute. */
30990 sec->common.flags |= SECTION_ARM_PURECODE;
30992 return sec;
30995 /* Implements the TARGET_SECTION_FLAGS hook.
30997 If DECL is a function declaration and pure-code is passed as an option
30998 then add the SFH_ARM_PURECODE attribute to the section flags. NAME is the
30999 section's name and RELOC indicates whether the declarations initializer may
31000 contain runtime relocations. */
31002 static unsigned int
31003 arm_elf_section_type_flags (tree decl, const char *name, int reloc)
31005 unsigned int flags = default_section_type_flags (decl, name, reloc);
31007 if (decl && TREE_CODE (decl) == FUNCTION_DECL && target_pure_code)
31008 flags |= SECTION_ARM_PURECODE;
31010 return flags;
31013 /* Generate call to __aeabi_[mode]divmod (op0, op1). */
31015 static void
31016 arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
31017 rtx op0, rtx op1,
31018 rtx *quot_p, rtx *rem_p)
31020 if (mode == SImode)
31021 gcc_assert (!TARGET_IDIV);
31023 machine_mode libval_mode = smallest_mode_for_size (2 * GET_MODE_BITSIZE (mode),
31024 MODE_INT);
31026 rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
31027 libval_mode, 2,
31028 op0, GET_MODE (op0),
31029 op1, GET_MODE (op1));
31031 rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
31032 rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
31033 GET_MODE_SIZE (mode));
31035 gcc_assert (quotient);
31036 gcc_assert (remainder);
31038 *quot_p = quotient;
31039 *rem_p = remainder;
31042 /* This function checks for the availability of the coprocessor builtin passed
31043 in BUILTIN for the current target. Returns true if it is available and
31044 false otherwise. If a BUILTIN is passed for which this function has not
31045 been implemented it will cause an exception. */
31047 bool
31048 arm_coproc_builtin_available (enum unspecv builtin)
31050 /* None of these builtins are available in Thumb mode if the target only
31051 supports Thumb-1. */
31052 if (TARGET_THUMB1)
31053 return false;
31055 switch (builtin)
31057 case VUNSPEC_CDP:
31058 case VUNSPEC_LDC:
31059 case VUNSPEC_LDCL:
31060 case VUNSPEC_STC:
31061 case VUNSPEC_STCL:
31062 case VUNSPEC_MCR:
31063 case VUNSPEC_MRC:
31064 if (arm_arch4)
31065 return true;
31066 break;
31067 case VUNSPEC_CDP2:
31068 case VUNSPEC_LDC2:
31069 case VUNSPEC_LDC2L:
31070 case VUNSPEC_STC2:
31071 case VUNSPEC_STC2L:
31072 case VUNSPEC_MCR2:
31073 case VUNSPEC_MRC2:
31074 /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
31075 ARMv8-{A,M}. */
31076 if (arm_arch5)
31077 return true;
31078 break;
31079 case VUNSPEC_MCRR:
31080 case VUNSPEC_MRRC:
31081 /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
31082 ARMv8-{A,M}. */
31083 if (arm_arch6 || arm_arch5te)
31084 return true;
31085 break;
31086 case VUNSPEC_MCRR2:
31087 case VUNSPEC_MRRC2:
31088 if (arm_arch6)
31089 return true;
31090 break;
31091 default:
31092 gcc_unreachable ();
31094 return false;
31097 /* This function returns true if OP is a valid memory operand for the ldc and
31098 stc coprocessor instructions and false otherwise. */
31100 bool
31101 arm_coproc_ldc_stc_legitimate_address (rtx op)
31103 HOST_WIDE_INT range;
31104 /* Has to be a memory operand. */
31105 if (!MEM_P (op))
31106 return false;
31108 op = XEXP (op, 0);
31110 /* We accept registers. */
31111 if (REG_P (op))
31112 return true;
31114 switch GET_CODE (op)
31116 case PLUS:
31118 /* Or registers with an offset. */
31119 if (!REG_P (XEXP (op, 0)))
31120 return false;
31122 op = XEXP (op, 1);
31124 /* The offset must be an immediate though. */
31125 if (!CONST_INT_P (op))
31126 return false;
31128 range = INTVAL (op);
31130 /* Within the range of [-1020,1020]. */
31131 if (!IN_RANGE (range, -1020, 1020))
31132 return false;
31134 /* And a multiple of 4. */
31135 return (range % 4) == 0;
31137 case PRE_INC:
31138 case POST_INC:
31139 case PRE_DEC:
31140 case POST_DEC:
31141 return REG_P (XEXP (op, 0));
31142 default:
31143 gcc_unreachable ();
31145 return false;
31147 #include "gt-arm.h"