[ARM] PR target/71270 fix neon_valid_immediate for big-endian
[official-gcc.git] / gcc / config / arm / arm.c
blob934f5d141f720d1c6c0a86c12eed1c62e8cbb983
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2017 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "target.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "memmodel.h"
31 #include "cfghooks.h"
32 #include "df.h"
33 #include "tm_p.h"
34 #include "stringpool.h"
35 #include "optabs.h"
36 #include "regs.h"
37 #include "emit-rtl.h"
38 #include "recog.h"
39 #include "cgraph.h"
40 #include "diagnostic-core.h"
41 #include "alias.h"
42 #include "fold-const.h"
43 #include "stor-layout.h"
44 #include "calls.h"
45 #include "varasm.h"
46 #include "output.h"
47 #include "insn-attr.h"
48 #include "flags.h"
49 #include "reload.h"
50 #include "explow.h"
51 #include "expr.h"
52 #include "cfgrtl.h"
53 #include "sched-int.h"
54 #include "common/common-target.h"
55 #include "langhooks.h"
56 #include "intl.h"
57 #include "libfuncs.h"
58 #include "params.h"
59 #include "opts.h"
60 #include "dumpfile.h"
61 #include "target-globals.h"
62 #include "builtins.h"
63 #include "tm-constrs.h"
64 #include "rtl-iter.h"
65 #include "optabs-libfuncs.h"
66 #include "gimplify.h"
68 /* This file should be included last. */
69 #include "target-def.h"
71 /* Forward definitions of types. */
72 typedef struct minipool_node Mnode;
73 typedef struct minipool_fixup Mfix;
75 void (*arm_lang_output_object_attributes_hook)(void);
77 struct four_ints
79 int i[4];
82 /* Forward function declarations. */
83 static bool arm_const_not_ok_for_debug_p (rtx);
84 static bool arm_needs_doubleword_align (machine_mode, const_tree);
85 static int arm_compute_static_chain_stack_bytes (void);
86 static arm_stack_offsets *arm_get_frame_offsets (void);
87 static void arm_add_gc_roots (void);
88 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
89 unsigned HOST_WIDE_INT, rtx, rtx, int, int);
90 static unsigned bit_count (unsigned long);
91 static unsigned bitmap_popcount (const sbitmap);
92 static int arm_address_register_rtx_p (rtx, int);
93 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
94 static bool is_called_in_ARM_mode (tree);
95 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
96 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
97 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
98 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
99 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
100 inline static int thumb1_index_register_rtx_p (rtx, int);
101 static int thumb_far_jump_used_p (void);
102 static bool thumb_force_lr_save (void);
103 static unsigned arm_size_return_regs (void);
104 static bool arm_assemble_integer (rtx, unsigned int, int);
105 static void arm_print_operand (FILE *, rtx, int);
106 static void arm_print_operand_address (FILE *, machine_mode, rtx);
107 static bool arm_print_operand_punct_valid_p (unsigned char code);
108 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
109 static arm_cc get_arm_condition_code (rtx);
110 static const char *output_multi_immediate (rtx *, const char *, const char *,
111 int, HOST_WIDE_INT);
112 static const char *shift_op (rtx, HOST_WIDE_INT *);
113 static struct machine_function *arm_init_machine_status (void);
114 static void thumb_exit (FILE *, int);
115 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
116 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
117 static Mnode *add_minipool_forward_ref (Mfix *);
118 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
119 static Mnode *add_minipool_backward_ref (Mfix *);
120 static void assign_minipool_offsets (Mfix *);
121 static void arm_print_value (FILE *, rtx);
122 static void dump_minipool (rtx_insn *);
123 static int arm_barrier_cost (rtx_insn *);
124 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
125 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
126 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
127 machine_mode, rtx);
128 static void arm_reorg (void);
129 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
130 static unsigned long arm_compute_save_reg0_reg12_mask (void);
131 static unsigned long arm_compute_save_reg_mask (void);
132 static unsigned long arm_isr_value (tree);
133 static unsigned long arm_compute_func_type (void);
134 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
135 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
136 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
137 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
138 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
139 #endif
140 static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *);
141 static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree, int, bool *);
142 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
143 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
144 static int arm_comp_type_attributes (const_tree, const_tree);
145 static void arm_set_default_type_attributes (tree);
146 static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
147 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
148 static int optimal_immediate_sequence (enum rtx_code code,
149 unsigned HOST_WIDE_INT val,
150 struct four_ints *return_sequence);
151 static int optimal_immediate_sequence_1 (enum rtx_code code,
152 unsigned HOST_WIDE_INT val,
153 struct four_ints *return_sequence,
154 int i);
155 static int arm_get_strip_length (int);
156 static bool arm_function_ok_for_sibcall (tree, tree);
157 static machine_mode arm_promote_function_mode (const_tree,
158 machine_mode, int *,
159 const_tree, int);
160 static bool arm_return_in_memory (const_tree, const_tree);
161 static rtx arm_function_value (const_tree, const_tree, bool);
162 static rtx arm_libcall_value_1 (machine_mode);
163 static rtx arm_libcall_value (machine_mode, const_rtx);
164 static bool arm_function_value_regno_p (const unsigned int);
165 static void arm_internal_label (FILE *, const char *, unsigned long);
166 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
167 tree);
168 static bool arm_have_conditional_execution (void);
169 static bool arm_cannot_force_const_mem (machine_mode, rtx);
170 static bool arm_legitimate_constant_p (machine_mode, rtx);
171 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
172 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
173 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
174 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
175 static void emit_constant_insn (rtx cond, rtx pattern);
176 static rtx_insn *emit_set_insn (rtx, rtx);
177 static rtx emit_multi_reg_push (unsigned long, unsigned long);
178 static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
179 tree, bool);
180 static rtx arm_function_arg (cumulative_args_t, machine_mode,
181 const_tree, bool);
182 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
183 const_tree, bool);
184 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
185 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
186 const_tree);
187 static rtx aapcs_libcall_value (machine_mode);
188 static int aapcs_select_return_coproc (const_tree, const_tree);
190 #ifdef OBJECT_FORMAT_ELF
191 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
192 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
193 #endif
194 #ifndef ARM_PE
195 static void arm_encode_section_info (tree, rtx, int);
196 #endif
198 static void arm_file_end (void);
199 static void arm_file_start (void);
200 static void arm_insert_attributes (tree, tree *);
202 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
203 tree, int *, int);
204 static bool arm_pass_by_reference (cumulative_args_t,
205 machine_mode, const_tree, bool);
206 static bool arm_promote_prototypes (const_tree);
207 static bool arm_default_short_enums (void);
208 static bool arm_align_anon_bitfield (void);
209 static bool arm_return_in_msb (const_tree);
210 static bool arm_must_pass_in_stack (machine_mode, const_tree);
211 static bool arm_return_in_memory (const_tree, const_tree);
212 #if ARM_UNWIND_INFO
213 static void arm_unwind_emit (FILE *, rtx_insn *);
214 static bool arm_output_ttype (rtx);
215 static void arm_asm_emit_except_personality (rtx);
216 #endif
217 static void arm_asm_init_sections (void);
218 static rtx arm_dwarf_register_span (rtx);
220 static tree arm_cxx_guard_type (void);
221 static bool arm_cxx_guard_mask_bit (void);
222 static tree arm_get_cookie_size (tree);
223 static bool arm_cookie_has_size (void);
224 static bool arm_cxx_cdtor_returns_this (void);
225 static bool arm_cxx_key_method_may_be_inline (void);
226 static void arm_cxx_determine_class_data_visibility (tree);
227 static bool arm_cxx_class_data_always_comdat (void);
228 static bool arm_cxx_use_aeabi_atexit (void);
229 static void arm_init_libfuncs (void);
230 static tree arm_build_builtin_va_list (void);
231 static void arm_expand_builtin_va_start (tree, rtx);
232 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
233 static void arm_option_override (void);
234 static void arm_option_restore (struct gcc_options *,
235 struct cl_target_option *);
236 static void arm_override_options_after_change (void);
237 static void arm_option_print (FILE *, int, struct cl_target_option *);
238 static void arm_set_current_function (tree);
239 static bool arm_can_inline_p (tree, tree);
240 static void arm_relayout_function (tree);
241 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
242 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
243 static bool arm_macro_fusion_p (void);
244 static bool arm_cannot_copy_insn_p (rtx_insn *);
245 static int arm_issue_rate (void);
246 static int arm_first_cycle_multipass_dfa_lookahead (void);
247 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
248 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
249 static bool arm_output_addr_const_extra (FILE *, rtx);
250 static bool arm_allocate_stack_slots_for_args (void);
251 static bool arm_warn_func_return (tree);
252 static tree arm_promoted_type (const_tree t);
253 static bool arm_scalar_mode_supported_p (machine_mode);
254 static bool arm_frame_pointer_required (void);
255 static bool arm_can_eliminate (const int, const int);
256 static void arm_asm_trampoline_template (FILE *);
257 static void arm_trampoline_init (rtx, tree, rtx);
258 static rtx arm_trampoline_adjust_address (rtx);
259 static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg);
260 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
261 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
262 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
263 static bool arm_array_mode_supported_p (machine_mode,
264 unsigned HOST_WIDE_INT);
265 static machine_mode arm_preferred_simd_mode (machine_mode);
266 static bool arm_class_likely_spilled_p (reg_class_t);
267 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
268 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
269 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
270 const_tree type,
271 int misalignment,
272 bool is_packed);
273 static void arm_conditional_register_usage (void);
274 static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
275 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
276 static unsigned int arm_autovectorize_vector_sizes (void);
277 static int arm_default_branch_cost (bool, bool);
278 static int arm_cortex_a5_branch_cost (bool, bool);
279 static int arm_cortex_m_branch_cost (bool, bool);
280 static int arm_cortex_m7_branch_cost (bool, bool);
282 static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode,
283 const unsigned char *sel);
285 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
287 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
288 tree vectype,
289 int misalign ATTRIBUTE_UNUSED);
290 static unsigned arm_add_stmt_cost (void *data, int count,
291 enum vect_cost_for_stmt kind,
292 struct _stmt_vec_info *stmt_info,
293 int misalign,
294 enum vect_cost_model_location where);
296 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
297 bool op0_preserve_value);
298 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
300 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
301 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
302 const_tree);
303 static section *arm_function_section (tree, enum node_frequency, bool, bool);
304 static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num);
305 static unsigned int arm_elf_section_type_flags (tree decl, const char *name,
306 int reloc);
307 static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *);
308 static machine_mode arm_floatn_mode (int, bool);
310 /* Table of machine attributes. */
311 static const struct attribute_spec arm_attribute_table[] =
313 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
314 affects_type_identity } */
315 /* Function calls made to this symbol must be done indirectly, because
316 it may lie outside of the 26 bit addressing range of a normal function
317 call. */
318 { "long_call", 0, 0, false, true, true, NULL, false },
319 /* Whereas these functions are always known to reside within the 26 bit
320 addressing range. */
321 { "short_call", 0, 0, false, true, true, NULL, false },
322 /* Specify the procedure call conventions for a function. */
323 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
324 false },
325 /* Interrupt Service Routines have special prologue and epilogue requirements. */
326 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
327 false },
328 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
329 false },
330 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
331 false },
332 #ifdef ARM_PE
333 /* ARM/PE has three new attributes:
334 interfacearm - ?
335 dllexport - for exporting a function/variable that will live in a dll
336 dllimport - for importing a function/variable from a dll
338 Microsoft allows multiple declspecs in one __declspec, separating
339 them with spaces. We do NOT support this. Instead, use __declspec
340 multiple times.
342 { "dllimport", 0, 0, true, false, false, NULL, false },
343 { "dllexport", 0, 0, true, false, false, NULL, false },
344 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
345 false },
346 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
347 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
348 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
349 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
350 false },
351 #endif
352 /* ARMv8-M Security Extensions support. */
353 { "cmse_nonsecure_entry", 0, 0, true, false, false,
354 arm_handle_cmse_nonsecure_entry, false },
355 { "cmse_nonsecure_call", 0, 0, true, false, false,
356 arm_handle_cmse_nonsecure_call, true },
357 { NULL, 0, 0, false, false, false, NULL, false }
360 /* Initialize the GCC target structure. */
361 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
362 #undef TARGET_MERGE_DECL_ATTRIBUTES
363 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
364 #endif
366 #undef TARGET_LEGITIMIZE_ADDRESS
367 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
369 #undef TARGET_ATTRIBUTE_TABLE
370 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
372 #undef TARGET_INSERT_ATTRIBUTES
373 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
375 #undef TARGET_ASM_FILE_START
376 #define TARGET_ASM_FILE_START arm_file_start
377 #undef TARGET_ASM_FILE_END
378 #define TARGET_ASM_FILE_END arm_file_end
380 #undef TARGET_ASM_ALIGNED_SI_OP
381 #define TARGET_ASM_ALIGNED_SI_OP NULL
382 #undef TARGET_ASM_INTEGER
383 #define TARGET_ASM_INTEGER arm_assemble_integer
385 #undef TARGET_PRINT_OPERAND
386 #define TARGET_PRINT_OPERAND arm_print_operand
387 #undef TARGET_PRINT_OPERAND_ADDRESS
388 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
389 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
390 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
392 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
393 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
395 #undef TARGET_ASM_FUNCTION_PROLOGUE
396 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
398 #undef TARGET_ASM_FUNCTION_EPILOGUE
399 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
401 #undef TARGET_CAN_INLINE_P
402 #define TARGET_CAN_INLINE_P arm_can_inline_p
404 #undef TARGET_RELAYOUT_FUNCTION
405 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
407 #undef TARGET_OPTION_OVERRIDE
408 #define TARGET_OPTION_OVERRIDE arm_option_override
410 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
411 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
413 #undef TARGET_OPTION_RESTORE
414 #define TARGET_OPTION_RESTORE arm_option_restore
416 #undef TARGET_OPTION_PRINT
417 #define TARGET_OPTION_PRINT arm_option_print
419 #undef TARGET_COMP_TYPE_ATTRIBUTES
420 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
422 #undef TARGET_SCHED_MACRO_FUSION_P
423 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
425 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
426 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
428 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
429 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
431 #undef TARGET_SCHED_ADJUST_COST
432 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
434 #undef TARGET_SET_CURRENT_FUNCTION
435 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
437 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
438 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
440 #undef TARGET_SCHED_REORDER
441 #define TARGET_SCHED_REORDER arm_sched_reorder
443 #undef TARGET_REGISTER_MOVE_COST
444 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
446 #undef TARGET_MEMORY_MOVE_COST
447 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
449 #undef TARGET_ENCODE_SECTION_INFO
450 #ifdef ARM_PE
451 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
452 #else
453 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
454 #endif
456 #undef TARGET_STRIP_NAME_ENCODING
457 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
459 #undef TARGET_ASM_INTERNAL_LABEL
460 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
462 #undef TARGET_FLOATN_MODE
463 #define TARGET_FLOATN_MODE arm_floatn_mode
465 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
466 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
468 #undef TARGET_FUNCTION_VALUE
469 #define TARGET_FUNCTION_VALUE arm_function_value
471 #undef TARGET_LIBCALL_VALUE
472 #define TARGET_LIBCALL_VALUE arm_libcall_value
474 #undef TARGET_FUNCTION_VALUE_REGNO_P
475 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
477 #undef TARGET_ASM_OUTPUT_MI_THUNK
478 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
479 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
480 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
482 #undef TARGET_RTX_COSTS
483 #define TARGET_RTX_COSTS arm_rtx_costs
484 #undef TARGET_ADDRESS_COST
485 #define TARGET_ADDRESS_COST arm_address_cost
487 #undef TARGET_SHIFT_TRUNCATION_MASK
488 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
489 #undef TARGET_VECTOR_MODE_SUPPORTED_P
490 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
491 #undef TARGET_ARRAY_MODE_SUPPORTED_P
492 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
493 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
494 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
495 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
496 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
497 arm_autovectorize_vector_sizes
499 #undef TARGET_MACHINE_DEPENDENT_REORG
500 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
502 #undef TARGET_INIT_BUILTINS
503 #define TARGET_INIT_BUILTINS arm_init_builtins
504 #undef TARGET_EXPAND_BUILTIN
505 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
506 #undef TARGET_BUILTIN_DECL
507 #define TARGET_BUILTIN_DECL arm_builtin_decl
509 #undef TARGET_INIT_LIBFUNCS
510 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
512 #undef TARGET_PROMOTE_FUNCTION_MODE
513 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
514 #undef TARGET_PROMOTE_PROTOTYPES
515 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
516 #undef TARGET_PASS_BY_REFERENCE
517 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
518 #undef TARGET_ARG_PARTIAL_BYTES
519 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
520 #undef TARGET_FUNCTION_ARG
521 #define TARGET_FUNCTION_ARG arm_function_arg
522 #undef TARGET_FUNCTION_ARG_ADVANCE
523 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
524 #undef TARGET_FUNCTION_ARG_BOUNDARY
525 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
527 #undef TARGET_SETUP_INCOMING_VARARGS
528 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
530 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
531 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
533 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
534 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
535 #undef TARGET_TRAMPOLINE_INIT
536 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
537 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
538 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
540 #undef TARGET_WARN_FUNC_RETURN
541 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
543 #undef TARGET_DEFAULT_SHORT_ENUMS
544 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
546 #undef TARGET_ALIGN_ANON_BITFIELD
547 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
549 #undef TARGET_NARROW_VOLATILE_BITFIELD
550 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
552 #undef TARGET_CXX_GUARD_TYPE
553 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
555 #undef TARGET_CXX_GUARD_MASK_BIT
556 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
558 #undef TARGET_CXX_GET_COOKIE_SIZE
559 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
561 #undef TARGET_CXX_COOKIE_HAS_SIZE
562 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
564 #undef TARGET_CXX_CDTOR_RETURNS_THIS
565 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
567 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
568 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
570 #undef TARGET_CXX_USE_AEABI_ATEXIT
571 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
573 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
574 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
575 arm_cxx_determine_class_data_visibility
577 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
578 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
580 #undef TARGET_RETURN_IN_MSB
581 #define TARGET_RETURN_IN_MSB arm_return_in_msb
583 #undef TARGET_RETURN_IN_MEMORY
584 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
586 #undef TARGET_MUST_PASS_IN_STACK
587 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
589 #if ARM_UNWIND_INFO
590 #undef TARGET_ASM_UNWIND_EMIT
591 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
593 /* EABI unwinding tables use a different format for the typeinfo tables. */
594 #undef TARGET_ASM_TTYPE
595 #define TARGET_ASM_TTYPE arm_output_ttype
597 #undef TARGET_ARM_EABI_UNWINDER
598 #define TARGET_ARM_EABI_UNWINDER true
600 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
601 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
603 #endif /* ARM_UNWIND_INFO */
605 #undef TARGET_ASM_INIT_SECTIONS
606 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
608 #undef TARGET_DWARF_REGISTER_SPAN
609 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
611 #undef TARGET_CANNOT_COPY_INSN_P
612 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
614 #ifdef HAVE_AS_TLS
615 #undef TARGET_HAVE_TLS
616 #define TARGET_HAVE_TLS true
617 #endif
619 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
620 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
622 #undef TARGET_LEGITIMATE_CONSTANT_P
623 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
625 #undef TARGET_CANNOT_FORCE_CONST_MEM
626 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
628 #undef TARGET_MAX_ANCHOR_OFFSET
629 #define TARGET_MAX_ANCHOR_OFFSET 4095
631 /* The minimum is set such that the total size of the block
632 for a particular anchor is -4088 + 1 + 4095 bytes, which is
633 divisible by eight, ensuring natural spacing of anchors. */
634 #undef TARGET_MIN_ANCHOR_OFFSET
635 #define TARGET_MIN_ANCHOR_OFFSET -4088
637 #undef TARGET_SCHED_ISSUE_RATE
638 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
640 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
641 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
642 arm_first_cycle_multipass_dfa_lookahead
644 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
645 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
646 arm_first_cycle_multipass_dfa_lookahead_guard
648 #undef TARGET_MANGLE_TYPE
649 #define TARGET_MANGLE_TYPE arm_mangle_type
651 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
652 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
654 #undef TARGET_BUILD_BUILTIN_VA_LIST
655 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
656 #undef TARGET_EXPAND_BUILTIN_VA_START
657 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
658 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
659 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
661 #ifdef HAVE_AS_TLS
662 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
663 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
664 #endif
666 #undef TARGET_LEGITIMATE_ADDRESS_P
667 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
669 #undef TARGET_PREFERRED_RELOAD_CLASS
670 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
672 #undef TARGET_PROMOTED_TYPE
673 #define TARGET_PROMOTED_TYPE arm_promoted_type
675 #undef TARGET_SCALAR_MODE_SUPPORTED_P
676 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
678 #undef TARGET_FRAME_POINTER_REQUIRED
679 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
681 #undef TARGET_CAN_ELIMINATE
682 #define TARGET_CAN_ELIMINATE arm_can_eliminate
684 #undef TARGET_CONDITIONAL_REGISTER_USAGE
685 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
687 #undef TARGET_CLASS_LIKELY_SPILLED_P
688 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
690 #undef TARGET_VECTORIZE_BUILTINS
691 #define TARGET_VECTORIZE_BUILTINS
693 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
694 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
695 arm_builtin_vectorized_function
697 #undef TARGET_VECTOR_ALIGNMENT
698 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
700 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
701 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
702 arm_vector_alignment_reachable
704 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
705 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
706 arm_builtin_support_vector_misalignment
708 #undef TARGET_PREFERRED_RENAME_CLASS
709 #define TARGET_PREFERRED_RENAME_CLASS \
710 arm_preferred_rename_class
712 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
713 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
714 arm_vectorize_vec_perm_const_ok
716 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
717 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
718 arm_builtin_vectorization_cost
719 #undef TARGET_VECTORIZE_ADD_STMT_COST
720 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
722 #undef TARGET_CANONICALIZE_COMPARISON
723 #define TARGET_CANONICALIZE_COMPARISON \
724 arm_canonicalize_comparison
726 #undef TARGET_ASAN_SHADOW_OFFSET
727 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
729 #undef MAX_INSN_PER_IT_BLOCK
730 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
732 #undef TARGET_CAN_USE_DOLOOP_P
733 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
735 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
736 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
738 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
739 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
741 #undef TARGET_SCHED_FUSION_PRIORITY
742 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
744 #undef TARGET_ASM_FUNCTION_SECTION
745 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
747 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
748 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
750 #undef TARGET_SECTION_TYPE_FLAGS
751 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
753 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
754 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
756 #undef TARGET_C_EXCESS_PRECISION
757 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
759 struct gcc_target targetm = TARGET_INITIALIZER;
761 /* Obstack for minipool constant handling. */
762 static struct obstack minipool_obstack;
763 static char * minipool_startobj;
765 /* The maximum number of insns skipped which
766 will be conditionalised if possible. */
767 static int max_insns_skipped = 5;
769 extern FILE * asm_out_file;
771 /* True if we are currently building a constant table. */
772 int making_const_table;
774 /* The processor for which instructions should be scheduled. */
775 enum processor_type arm_tune = TARGET_CPU_arm_none;
777 /* The current tuning set. */
778 const struct tune_params *current_tune;
780 /* Which floating point hardware to schedule for. */
781 int arm_fpu_attr;
783 /* Used for Thumb call_via trampolines. */
784 rtx thumb_call_via_label[14];
785 static int thumb_call_reg_needed;
787 /* The bits in this mask specify which instruction scheduling options should
788 be used. */
789 unsigned int tune_flags = 0;
791 /* The highest ARM architecture version supported by the
792 target. */
793 enum base_architecture arm_base_arch = BASE_ARCH_0;
795 /* Active target architecture and tuning. */
797 struct arm_build_target arm_active_target;
799 /* The following are used in the arm.md file as equivalents to bits
800 in the above two flag variables. */
802 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
803 int arm_arch3m = 0;
805 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
806 int arm_arch4 = 0;
808 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
809 int arm_arch4t = 0;
811 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
812 int arm_arch5 = 0;
814 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
815 int arm_arch5e = 0;
817 /* Nonzero if this chip supports the ARM Architecture 5TE extensions. */
818 int arm_arch5te = 0;
820 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
821 int arm_arch6 = 0;
823 /* Nonzero if this chip supports the ARM 6K extensions. */
824 int arm_arch6k = 0;
826 /* Nonzero if this chip supports the ARM 6KZ extensions. */
827 int arm_arch6kz = 0;
829 /* Nonzero if instructions present in ARMv6-M can be used. */
830 int arm_arch6m = 0;
832 /* Nonzero if this chip supports the ARM 7 extensions. */
833 int arm_arch7 = 0;
835 /* Nonzero if this chip supports the ARM 7ve extensions. */
836 int arm_arch7ve = 0;
838 /* Nonzero if instructions not present in the 'M' profile can be used. */
839 int arm_arch_notm = 0;
841 /* Nonzero if instructions present in ARMv7E-M can be used. */
842 int arm_arch7em = 0;
844 /* Nonzero if instructions present in ARMv8 can be used. */
845 int arm_arch8 = 0;
847 /* Nonzero if this chip supports the ARMv8.1 extensions. */
848 int arm_arch8_1 = 0;
850 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */
851 int arm_arch8_2 = 0;
853 /* Nonzero if this chip supports the FP16 instructions extension of ARM
854 Architecture 8.2. */
855 int arm_fp16_inst = 0;
857 /* Nonzero if this chip can benefit from load scheduling. */
858 int arm_ld_sched = 0;
860 /* Nonzero if this chip is a StrongARM. */
861 int arm_tune_strongarm = 0;
863 /* Nonzero if this chip supports Intel Wireless MMX technology. */
864 int arm_arch_iwmmxt = 0;
866 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
867 int arm_arch_iwmmxt2 = 0;
869 /* Nonzero if this chip is an XScale. */
870 int arm_arch_xscale = 0;
872 /* Nonzero if tuning for XScale */
873 int arm_tune_xscale = 0;
875 /* Nonzero if we want to tune for stores that access the write-buffer.
876 This typically means an ARM6 or ARM7 with MMU or MPU. */
877 int arm_tune_wbuf = 0;
879 /* Nonzero if tuning for Cortex-A9. */
880 int arm_tune_cortex_a9 = 0;
882 /* Nonzero if we should define __THUMB_INTERWORK__ in the
883 preprocessor.
884 XXX This is a bit of a hack, it's intended to help work around
885 problems in GLD which doesn't understand that armv5t code is
886 interworking clean. */
887 int arm_cpp_interwork = 0;
889 /* Nonzero if chip supports Thumb 1. */
890 int arm_arch_thumb1;
892 /* Nonzero if chip supports Thumb 2. */
893 int arm_arch_thumb2;
895 /* Nonzero if chip supports integer division instruction. */
896 int arm_arch_arm_hwdiv;
897 int arm_arch_thumb_hwdiv;
899 /* Nonzero if chip disallows volatile memory access in IT block. */
900 int arm_arch_no_volatile_ce;
902 /* Nonzero if we should use Neon to handle 64-bits operations rather
903 than core registers. */
904 int prefer_neon_for_64bits = 0;
906 /* Nonzero if we shouldn't use literal pools. */
907 bool arm_disable_literal_pool = false;
909 /* The register number to be used for the PIC offset register. */
910 unsigned arm_pic_register = INVALID_REGNUM;
912 enum arm_pcs arm_pcs_default;
914 /* For an explanation of these variables, see final_prescan_insn below. */
915 int arm_ccfsm_state;
916 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
917 enum arm_cond_code arm_current_cc;
919 rtx arm_target_insn;
920 int arm_target_label;
921 /* The number of conditionally executed insns, including the current insn. */
922 int arm_condexec_count = 0;
923 /* A bitmask specifying the patterns for the IT block.
924 Zero means do not output an IT block before this insn. */
925 int arm_condexec_mask = 0;
926 /* The number of bits used in arm_condexec_mask. */
927 int arm_condexec_masklen = 0;
929 /* Nonzero if chip supports the ARMv8 CRC instructions. */
930 int arm_arch_crc = 0;
932 /* Nonzero if chip supports the ARMv8-M security extensions. */
933 int arm_arch_cmse = 0;
935 /* Nonzero if the core has a very small, high-latency, multiply unit. */
936 int arm_m_profile_small_mul = 0;
938 /* The condition codes of the ARM, and the inverse function. */
939 static const char * const arm_condition_codes[] =
941 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
942 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
945 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
946 int arm_regs_in_sequence[] =
948 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
951 #define ARM_LSL_NAME "lsl"
952 #define streq(string1, string2) (strcmp (string1, string2) == 0)
954 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
955 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
956 | (1 << PIC_OFFSET_TABLE_REGNUM)))
958 /* Initialization code. */
960 struct processors
962 const char *const name;
963 enum processor_type core;
964 unsigned int tune_flags;
965 const char *arch;
966 enum base_architecture base_arch;
967 enum isa_feature isa_bits[isa_num_bits];
968 const struct tune_params *const tune;
972 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
973 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
975 num_slots, \
976 l1_size, \
977 l1_line_size \
980 /* arm generic vectorizer costs. */
981 static const
982 struct cpu_vec_costs arm_default_vec_cost = {
983 1, /* scalar_stmt_cost. */
984 1, /* scalar load_cost. */
985 1, /* scalar_store_cost. */
986 1, /* vec_stmt_cost. */
987 1, /* vec_to_scalar_cost. */
988 1, /* scalar_to_vec_cost. */
989 1, /* vec_align_load_cost. */
990 1, /* vec_unalign_load_cost. */
991 1, /* vec_unalign_store_cost. */
992 1, /* vec_store_cost. */
993 3, /* cond_taken_branch_cost. */
994 1, /* cond_not_taken_branch_cost. */
997 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
998 #include "aarch-cost-tables.h"
1002 const struct cpu_cost_table cortexa9_extra_costs =
1004 /* ALU */
1006 0, /* arith. */
1007 0, /* logical. */
1008 0, /* shift. */
1009 COSTS_N_INSNS (1), /* shift_reg. */
1010 COSTS_N_INSNS (1), /* arith_shift. */
1011 COSTS_N_INSNS (2), /* arith_shift_reg. */
1012 0, /* log_shift. */
1013 COSTS_N_INSNS (1), /* log_shift_reg. */
1014 COSTS_N_INSNS (1), /* extend. */
1015 COSTS_N_INSNS (2), /* extend_arith. */
1016 COSTS_N_INSNS (1), /* bfi. */
1017 COSTS_N_INSNS (1), /* bfx. */
1018 0, /* clz. */
1019 0, /* rev. */
1020 0, /* non_exec. */
1021 true /* non_exec_costs_exec. */
1024 /* MULT SImode */
1026 COSTS_N_INSNS (3), /* simple. */
1027 COSTS_N_INSNS (3), /* flag_setting. */
1028 COSTS_N_INSNS (2), /* extend. */
1029 COSTS_N_INSNS (3), /* add. */
1030 COSTS_N_INSNS (2), /* extend_add. */
1031 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1033 /* MULT DImode */
1035 0, /* simple (N/A). */
1036 0, /* flag_setting (N/A). */
1037 COSTS_N_INSNS (4), /* extend. */
1038 0, /* add (N/A). */
1039 COSTS_N_INSNS (4), /* extend_add. */
1040 0 /* idiv (N/A). */
1043 /* LD/ST */
1045 COSTS_N_INSNS (2), /* load. */
1046 COSTS_N_INSNS (2), /* load_sign_extend. */
1047 COSTS_N_INSNS (2), /* ldrd. */
1048 COSTS_N_INSNS (2), /* ldm_1st. */
1049 1, /* ldm_regs_per_insn_1st. */
1050 2, /* ldm_regs_per_insn_subsequent. */
1051 COSTS_N_INSNS (5), /* loadf. */
1052 COSTS_N_INSNS (5), /* loadd. */
1053 COSTS_N_INSNS (1), /* load_unaligned. */
1054 COSTS_N_INSNS (2), /* store. */
1055 COSTS_N_INSNS (2), /* strd. */
1056 COSTS_N_INSNS (2), /* stm_1st. */
1057 1, /* stm_regs_per_insn_1st. */
1058 2, /* stm_regs_per_insn_subsequent. */
1059 COSTS_N_INSNS (1), /* storef. */
1060 COSTS_N_INSNS (1), /* stored. */
1061 COSTS_N_INSNS (1), /* store_unaligned. */
1062 COSTS_N_INSNS (1), /* loadv. */
1063 COSTS_N_INSNS (1) /* storev. */
1066 /* FP SFmode */
1068 COSTS_N_INSNS (14), /* div. */
1069 COSTS_N_INSNS (4), /* mult. */
1070 COSTS_N_INSNS (7), /* mult_addsub. */
1071 COSTS_N_INSNS (30), /* fma. */
1072 COSTS_N_INSNS (3), /* addsub. */
1073 COSTS_N_INSNS (1), /* fpconst. */
1074 COSTS_N_INSNS (1), /* neg. */
1075 COSTS_N_INSNS (3), /* compare. */
1076 COSTS_N_INSNS (3), /* widen. */
1077 COSTS_N_INSNS (3), /* narrow. */
1078 COSTS_N_INSNS (3), /* toint. */
1079 COSTS_N_INSNS (3), /* fromint. */
1080 COSTS_N_INSNS (3) /* roundint. */
1082 /* FP DFmode */
1084 COSTS_N_INSNS (24), /* div. */
1085 COSTS_N_INSNS (5), /* mult. */
1086 COSTS_N_INSNS (8), /* mult_addsub. */
1087 COSTS_N_INSNS (30), /* fma. */
1088 COSTS_N_INSNS (3), /* addsub. */
1089 COSTS_N_INSNS (1), /* fpconst. */
1090 COSTS_N_INSNS (1), /* neg. */
1091 COSTS_N_INSNS (3), /* compare. */
1092 COSTS_N_INSNS (3), /* widen. */
1093 COSTS_N_INSNS (3), /* narrow. */
1094 COSTS_N_INSNS (3), /* toint. */
1095 COSTS_N_INSNS (3), /* fromint. */
1096 COSTS_N_INSNS (3) /* roundint. */
1099 /* Vector */
1101 COSTS_N_INSNS (1) /* alu. */
1105 const struct cpu_cost_table cortexa8_extra_costs =
1107 /* ALU */
1109 0, /* arith. */
1110 0, /* logical. */
1111 COSTS_N_INSNS (1), /* shift. */
1112 0, /* shift_reg. */
1113 COSTS_N_INSNS (1), /* arith_shift. */
1114 0, /* arith_shift_reg. */
1115 COSTS_N_INSNS (1), /* log_shift. */
1116 0, /* log_shift_reg. */
1117 0, /* extend. */
1118 0, /* extend_arith. */
1119 0, /* bfi. */
1120 0, /* bfx. */
1121 0, /* clz. */
1122 0, /* rev. */
1123 0, /* non_exec. */
1124 true /* non_exec_costs_exec. */
1127 /* MULT SImode */
1129 COSTS_N_INSNS (1), /* simple. */
1130 COSTS_N_INSNS (1), /* flag_setting. */
1131 COSTS_N_INSNS (1), /* extend. */
1132 COSTS_N_INSNS (1), /* add. */
1133 COSTS_N_INSNS (1), /* extend_add. */
1134 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1136 /* MULT DImode */
1138 0, /* simple (N/A). */
1139 0, /* flag_setting (N/A). */
1140 COSTS_N_INSNS (2), /* extend. */
1141 0, /* add (N/A). */
1142 COSTS_N_INSNS (2), /* extend_add. */
1143 0 /* idiv (N/A). */
1146 /* LD/ST */
1148 COSTS_N_INSNS (1), /* load. */
1149 COSTS_N_INSNS (1), /* load_sign_extend. */
1150 COSTS_N_INSNS (1), /* ldrd. */
1151 COSTS_N_INSNS (1), /* ldm_1st. */
1152 1, /* ldm_regs_per_insn_1st. */
1153 2, /* ldm_regs_per_insn_subsequent. */
1154 COSTS_N_INSNS (1), /* loadf. */
1155 COSTS_N_INSNS (1), /* loadd. */
1156 COSTS_N_INSNS (1), /* load_unaligned. */
1157 COSTS_N_INSNS (1), /* store. */
1158 COSTS_N_INSNS (1), /* strd. */
1159 COSTS_N_INSNS (1), /* stm_1st. */
1160 1, /* stm_regs_per_insn_1st. */
1161 2, /* stm_regs_per_insn_subsequent. */
1162 COSTS_N_INSNS (1), /* storef. */
1163 COSTS_N_INSNS (1), /* stored. */
1164 COSTS_N_INSNS (1), /* store_unaligned. */
1165 COSTS_N_INSNS (1), /* loadv. */
1166 COSTS_N_INSNS (1) /* storev. */
1169 /* FP SFmode */
1171 COSTS_N_INSNS (36), /* div. */
1172 COSTS_N_INSNS (11), /* mult. */
1173 COSTS_N_INSNS (20), /* mult_addsub. */
1174 COSTS_N_INSNS (30), /* fma. */
1175 COSTS_N_INSNS (9), /* addsub. */
1176 COSTS_N_INSNS (3), /* fpconst. */
1177 COSTS_N_INSNS (3), /* neg. */
1178 COSTS_N_INSNS (6), /* compare. */
1179 COSTS_N_INSNS (4), /* widen. */
1180 COSTS_N_INSNS (4), /* narrow. */
1181 COSTS_N_INSNS (8), /* toint. */
1182 COSTS_N_INSNS (8), /* fromint. */
1183 COSTS_N_INSNS (8) /* roundint. */
1185 /* FP DFmode */
1187 COSTS_N_INSNS (64), /* div. */
1188 COSTS_N_INSNS (16), /* mult. */
1189 COSTS_N_INSNS (25), /* mult_addsub. */
1190 COSTS_N_INSNS (30), /* fma. */
1191 COSTS_N_INSNS (9), /* addsub. */
1192 COSTS_N_INSNS (3), /* fpconst. */
1193 COSTS_N_INSNS (3), /* neg. */
1194 COSTS_N_INSNS (6), /* compare. */
1195 COSTS_N_INSNS (6), /* widen. */
1196 COSTS_N_INSNS (6), /* narrow. */
1197 COSTS_N_INSNS (8), /* toint. */
1198 COSTS_N_INSNS (8), /* fromint. */
1199 COSTS_N_INSNS (8) /* roundint. */
1202 /* Vector */
1204 COSTS_N_INSNS (1) /* alu. */
1208 const struct cpu_cost_table cortexa5_extra_costs =
1210 /* ALU */
1212 0, /* arith. */
1213 0, /* logical. */
1214 COSTS_N_INSNS (1), /* shift. */
1215 COSTS_N_INSNS (1), /* shift_reg. */
1216 COSTS_N_INSNS (1), /* arith_shift. */
1217 COSTS_N_INSNS (1), /* arith_shift_reg. */
1218 COSTS_N_INSNS (1), /* log_shift. */
1219 COSTS_N_INSNS (1), /* log_shift_reg. */
1220 COSTS_N_INSNS (1), /* extend. */
1221 COSTS_N_INSNS (1), /* extend_arith. */
1222 COSTS_N_INSNS (1), /* bfi. */
1223 COSTS_N_INSNS (1), /* bfx. */
1224 COSTS_N_INSNS (1), /* clz. */
1225 COSTS_N_INSNS (1), /* rev. */
1226 0, /* non_exec. */
1227 true /* non_exec_costs_exec. */
1231 /* MULT SImode */
1233 0, /* simple. */
1234 COSTS_N_INSNS (1), /* flag_setting. */
1235 COSTS_N_INSNS (1), /* extend. */
1236 COSTS_N_INSNS (1), /* add. */
1237 COSTS_N_INSNS (1), /* extend_add. */
1238 COSTS_N_INSNS (7) /* idiv. */
1240 /* MULT DImode */
1242 0, /* simple (N/A). */
1243 0, /* flag_setting (N/A). */
1244 COSTS_N_INSNS (1), /* extend. */
1245 0, /* add. */
1246 COSTS_N_INSNS (2), /* extend_add. */
1247 0 /* idiv (N/A). */
1250 /* LD/ST */
1252 COSTS_N_INSNS (1), /* load. */
1253 COSTS_N_INSNS (1), /* load_sign_extend. */
1254 COSTS_N_INSNS (6), /* ldrd. */
1255 COSTS_N_INSNS (1), /* ldm_1st. */
1256 1, /* ldm_regs_per_insn_1st. */
1257 2, /* ldm_regs_per_insn_subsequent. */
1258 COSTS_N_INSNS (2), /* loadf. */
1259 COSTS_N_INSNS (4), /* loadd. */
1260 COSTS_N_INSNS (1), /* load_unaligned. */
1261 COSTS_N_INSNS (1), /* store. */
1262 COSTS_N_INSNS (3), /* strd. */
1263 COSTS_N_INSNS (1), /* stm_1st. */
1264 1, /* stm_regs_per_insn_1st. */
1265 2, /* stm_regs_per_insn_subsequent. */
1266 COSTS_N_INSNS (2), /* storef. */
1267 COSTS_N_INSNS (2), /* stored. */
1268 COSTS_N_INSNS (1), /* store_unaligned. */
1269 COSTS_N_INSNS (1), /* loadv. */
1270 COSTS_N_INSNS (1) /* storev. */
1273 /* FP SFmode */
1275 COSTS_N_INSNS (15), /* div. */
1276 COSTS_N_INSNS (3), /* mult. */
1277 COSTS_N_INSNS (7), /* mult_addsub. */
1278 COSTS_N_INSNS (7), /* fma. */
1279 COSTS_N_INSNS (3), /* addsub. */
1280 COSTS_N_INSNS (3), /* fpconst. */
1281 COSTS_N_INSNS (3), /* neg. */
1282 COSTS_N_INSNS (3), /* compare. */
1283 COSTS_N_INSNS (3), /* widen. */
1284 COSTS_N_INSNS (3), /* narrow. */
1285 COSTS_N_INSNS (3), /* toint. */
1286 COSTS_N_INSNS (3), /* fromint. */
1287 COSTS_N_INSNS (3) /* roundint. */
1289 /* FP DFmode */
1291 COSTS_N_INSNS (30), /* div. */
1292 COSTS_N_INSNS (6), /* mult. */
1293 COSTS_N_INSNS (10), /* mult_addsub. */
1294 COSTS_N_INSNS (7), /* fma. */
1295 COSTS_N_INSNS (3), /* addsub. */
1296 COSTS_N_INSNS (3), /* fpconst. */
1297 COSTS_N_INSNS (3), /* neg. */
1298 COSTS_N_INSNS (3), /* compare. */
1299 COSTS_N_INSNS (3), /* widen. */
1300 COSTS_N_INSNS (3), /* narrow. */
1301 COSTS_N_INSNS (3), /* toint. */
1302 COSTS_N_INSNS (3), /* fromint. */
1303 COSTS_N_INSNS (3) /* roundint. */
1306 /* Vector */
1308 COSTS_N_INSNS (1) /* alu. */
1313 const struct cpu_cost_table cortexa7_extra_costs =
1315 /* ALU */
1317 0, /* arith. */
1318 0, /* logical. */
1319 COSTS_N_INSNS (1), /* shift. */
1320 COSTS_N_INSNS (1), /* shift_reg. */
1321 COSTS_N_INSNS (1), /* arith_shift. */
1322 COSTS_N_INSNS (1), /* arith_shift_reg. */
1323 COSTS_N_INSNS (1), /* log_shift. */
1324 COSTS_N_INSNS (1), /* log_shift_reg. */
1325 COSTS_N_INSNS (1), /* extend. */
1326 COSTS_N_INSNS (1), /* extend_arith. */
1327 COSTS_N_INSNS (1), /* bfi. */
1328 COSTS_N_INSNS (1), /* bfx. */
1329 COSTS_N_INSNS (1), /* clz. */
1330 COSTS_N_INSNS (1), /* rev. */
1331 0, /* non_exec. */
1332 true /* non_exec_costs_exec. */
1336 /* MULT SImode */
1338 0, /* simple. */
1339 COSTS_N_INSNS (1), /* flag_setting. */
1340 COSTS_N_INSNS (1), /* extend. */
1341 COSTS_N_INSNS (1), /* add. */
1342 COSTS_N_INSNS (1), /* extend_add. */
1343 COSTS_N_INSNS (7) /* idiv. */
1345 /* MULT DImode */
1347 0, /* simple (N/A). */
1348 0, /* flag_setting (N/A). */
1349 COSTS_N_INSNS (1), /* extend. */
1350 0, /* add. */
1351 COSTS_N_INSNS (2), /* extend_add. */
1352 0 /* idiv (N/A). */
1355 /* LD/ST */
1357 COSTS_N_INSNS (1), /* load. */
1358 COSTS_N_INSNS (1), /* load_sign_extend. */
1359 COSTS_N_INSNS (3), /* ldrd. */
1360 COSTS_N_INSNS (1), /* ldm_1st. */
1361 1, /* ldm_regs_per_insn_1st. */
1362 2, /* ldm_regs_per_insn_subsequent. */
1363 COSTS_N_INSNS (2), /* loadf. */
1364 COSTS_N_INSNS (2), /* loadd. */
1365 COSTS_N_INSNS (1), /* load_unaligned. */
1366 COSTS_N_INSNS (1), /* store. */
1367 COSTS_N_INSNS (3), /* strd. */
1368 COSTS_N_INSNS (1), /* stm_1st. */
1369 1, /* stm_regs_per_insn_1st. */
1370 2, /* stm_regs_per_insn_subsequent. */
1371 COSTS_N_INSNS (2), /* storef. */
1372 COSTS_N_INSNS (2), /* stored. */
1373 COSTS_N_INSNS (1), /* store_unaligned. */
1374 COSTS_N_INSNS (1), /* loadv. */
1375 COSTS_N_INSNS (1) /* storev. */
1378 /* FP SFmode */
1380 COSTS_N_INSNS (15), /* div. */
1381 COSTS_N_INSNS (3), /* mult. */
1382 COSTS_N_INSNS (7), /* mult_addsub. */
1383 COSTS_N_INSNS (7), /* fma. */
1384 COSTS_N_INSNS (3), /* addsub. */
1385 COSTS_N_INSNS (3), /* fpconst. */
1386 COSTS_N_INSNS (3), /* neg. */
1387 COSTS_N_INSNS (3), /* compare. */
1388 COSTS_N_INSNS (3), /* widen. */
1389 COSTS_N_INSNS (3), /* narrow. */
1390 COSTS_N_INSNS (3), /* toint. */
1391 COSTS_N_INSNS (3), /* fromint. */
1392 COSTS_N_INSNS (3) /* roundint. */
1394 /* FP DFmode */
1396 COSTS_N_INSNS (30), /* div. */
1397 COSTS_N_INSNS (6), /* mult. */
1398 COSTS_N_INSNS (10), /* mult_addsub. */
1399 COSTS_N_INSNS (7), /* fma. */
1400 COSTS_N_INSNS (3), /* addsub. */
1401 COSTS_N_INSNS (3), /* fpconst. */
1402 COSTS_N_INSNS (3), /* neg. */
1403 COSTS_N_INSNS (3), /* compare. */
1404 COSTS_N_INSNS (3), /* widen. */
1405 COSTS_N_INSNS (3), /* narrow. */
1406 COSTS_N_INSNS (3), /* toint. */
1407 COSTS_N_INSNS (3), /* fromint. */
1408 COSTS_N_INSNS (3) /* roundint. */
1411 /* Vector */
1413 COSTS_N_INSNS (1) /* alu. */
1417 const struct cpu_cost_table cortexa12_extra_costs =
1419 /* ALU */
1421 0, /* arith. */
1422 0, /* logical. */
1423 0, /* shift. */
1424 COSTS_N_INSNS (1), /* shift_reg. */
1425 COSTS_N_INSNS (1), /* arith_shift. */
1426 COSTS_N_INSNS (1), /* arith_shift_reg. */
1427 COSTS_N_INSNS (1), /* log_shift. */
1428 COSTS_N_INSNS (1), /* log_shift_reg. */
1429 0, /* extend. */
1430 COSTS_N_INSNS (1), /* extend_arith. */
1431 0, /* bfi. */
1432 COSTS_N_INSNS (1), /* bfx. */
1433 COSTS_N_INSNS (1), /* clz. */
1434 COSTS_N_INSNS (1), /* rev. */
1435 0, /* non_exec. */
1436 true /* non_exec_costs_exec. */
1438 /* MULT SImode */
1441 COSTS_N_INSNS (2), /* simple. */
1442 COSTS_N_INSNS (3), /* flag_setting. */
1443 COSTS_N_INSNS (2), /* extend. */
1444 COSTS_N_INSNS (3), /* add. */
1445 COSTS_N_INSNS (2), /* extend_add. */
1446 COSTS_N_INSNS (18) /* idiv. */
1448 /* MULT DImode */
1450 0, /* simple (N/A). */
1451 0, /* flag_setting (N/A). */
1452 COSTS_N_INSNS (3), /* extend. */
1453 0, /* add (N/A). */
1454 COSTS_N_INSNS (3), /* extend_add. */
1455 0 /* idiv (N/A). */
1458 /* LD/ST */
1460 COSTS_N_INSNS (3), /* load. */
1461 COSTS_N_INSNS (3), /* load_sign_extend. */
1462 COSTS_N_INSNS (3), /* ldrd. */
1463 COSTS_N_INSNS (3), /* ldm_1st. */
1464 1, /* ldm_regs_per_insn_1st. */
1465 2, /* ldm_regs_per_insn_subsequent. */
1466 COSTS_N_INSNS (3), /* loadf. */
1467 COSTS_N_INSNS (3), /* loadd. */
1468 0, /* load_unaligned. */
1469 0, /* store. */
1470 0, /* strd. */
1471 0, /* stm_1st. */
1472 1, /* stm_regs_per_insn_1st. */
1473 2, /* stm_regs_per_insn_subsequent. */
1474 COSTS_N_INSNS (2), /* storef. */
1475 COSTS_N_INSNS (2), /* stored. */
1476 0, /* store_unaligned. */
1477 COSTS_N_INSNS (1), /* loadv. */
1478 COSTS_N_INSNS (1) /* storev. */
1481 /* FP SFmode */
1483 COSTS_N_INSNS (17), /* div. */
1484 COSTS_N_INSNS (4), /* mult. */
1485 COSTS_N_INSNS (8), /* mult_addsub. */
1486 COSTS_N_INSNS (8), /* fma. */
1487 COSTS_N_INSNS (4), /* addsub. */
1488 COSTS_N_INSNS (2), /* fpconst. */
1489 COSTS_N_INSNS (2), /* neg. */
1490 COSTS_N_INSNS (2), /* compare. */
1491 COSTS_N_INSNS (4), /* widen. */
1492 COSTS_N_INSNS (4), /* narrow. */
1493 COSTS_N_INSNS (4), /* toint. */
1494 COSTS_N_INSNS (4), /* fromint. */
1495 COSTS_N_INSNS (4) /* roundint. */
1497 /* FP DFmode */
1499 COSTS_N_INSNS (31), /* div. */
1500 COSTS_N_INSNS (4), /* mult. */
1501 COSTS_N_INSNS (8), /* mult_addsub. */
1502 COSTS_N_INSNS (8), /* fma. */
1503 COSTS_N_INSNS (4), /* addsub. */
1504 COSTS_N_INSNS (2), /* fpconst. */
1505 COSTS_N_INSNS (2), /* neg. */
1506 COSTS_N_INSNS (2), /* compare. */
1507 COSTS_N_INSNS (4), /* widen. */
1508 COSTS_N_INSNS (4), /* narrow. */
1509 COSTS_N_INSNS (4), /* toint. */
1510 COSTS_N_INSNS (4), /* fromint. */
1511 COSTS_N_INSNS (4) /* roundint. */
1514 /* Vector */
1516 COSTS_N_INSNS (1) /* alu. */
1520 const struct cpu_cost_table cortexa15_extra_costs =
1522 /* ALU */
1524 0, /* arith. */
1525 0, /* logical. */
1526 0, /* shift. */
1527 0, /* shift_reg. */
1528 COSTS_N_INSNS (1), /* arith_shift. */
1529 COSTS_N_INSNS (1), /* arith_shift_reg. */
1530 COSTS_N_INSNS (1), /* log_shift. */
1531 COSTS_N_INSNS (1), /* log_shift_reg. */
1532 0, /* extend. */
1533 COSTS_N_INSNS (1), /* extend_arith. */
1534 COSTS_N_INSNS (1), /* bfi. */
1535 0, /* bfx. */
1536 0, /* clz. */
1537 0, /* rev. */
1538 0, /* non_exec. */
1539 true /* non_exec_costs_exec. */
1541 /* MULT SImode */
1544 COSTS_N_INSNS (2), /* simple. */
1545 COSTS_N_INSNS (3), /* flag_setting. */
1546 COSTS_N_INSNS (2), /* extend. */
1547 COSTS_N_INSNS (2), /* add. */
1548 COSTS_N_INSNS (2), /* extend_add. */
1549 COSTS_N_INSNS (18) /* idiv. */
1551 /* MULT DImode */
1553 0, /* simple (N/A). */
1554 0, /* flag_setting (N/A). */
1555 COSTS_N_INSNS (3), /* extend. */
1556 0, /* add (N/A). */
1557 COSTS_N_INSNS (3), /* extend_add. */
1558 0 /* idiv (N/A). */
1561 /* LD/ST */
1563 COSTS_N_INSNS (3), /* load. */
1564 COSTS_N_INSNS (3), /* load_sign_extend. */
1565 COSTS_N_INSNS (3), /* ldrd. */
1566 COSTS_N_INSNS (4), /* ldm_1st. */
1567 1, /* ldm_regs_per_insn_1st. */
1568 2, /* ldm_regs_per_insn_subsequent. */
1569 COSTS_N_INSNS (4), /* loadf. */
1570 COSTS_N_INSNS (4), /* loadd. */
1571 0, /* load_unaligned. */
1572 0, /* store. */
1573 0, /* strd. */
1574 COSTS_N_INSNS (1), /* stm_1st. */
1575 1, /* stm_regs_per_insn_1st. */
1576 2, /* stm_regs_per_insn_subsequent. */
1577 0, /* storef. */
1578 0, /* stored. */
1579 0, /* store_unaligned. */
1580 COSTS_N_INSNS (1), /* loadv. */
1581 COSTS_N_INSNS (1) /* storev. */
1584 /* FP SFmode */
1586 COSTS_N_INSNS (17), /* div. */
1587 COSTS_N_INSNS (4), /* mult. */
1588 COSTS_N_INSNS (8), /* mult_addsub. */
1589 COSTS_N_INSNS (8), /* fma. */
1590 COSTS_N_INSNS (4), /* addsub. */
1591 COSTS_N_INSNS (2), /* fpconst. */
1592 COSTS_N_INSNS (2), /* neg. */
1593 COSTS_N_INSNS (5), /* compare. */
1594 COSTS_N_INSNS (4), /* widen. */
1595 COSTS_N_INSNS (4), /* narrow. */
1596 COSTS_N_INSNS (4), /* toint. */
1597 COSTS_N_INSNS (4), /* fromint. */
1598 COSTS_N_INSNS (4) /* roundint. */
1600 /* FP DFmode */
1602 COSTS_N_INSNS (31), /* div. */
1603 COSTS_N_INSNS (4), /* mult. */
1604 COSTS_N_INSNS (8), /* mult_addsub. */
1605 COSTS_N_INSNS (8), /* fma. */
1606 COSTS_N_INSNS (4), /* addsub. */
1607 COSTS_N_INSNS (2), /* fpconst. */
1608 COSTS_N_INSNS (2), /* neg. */
1609 COSTS_N_INSNS (2), /* compare. */
1610 COSTS_N_INSNS (4), /* widen. */
1611 COSTS_N_INSNS (4), /* narrow. */
1612 COSTS_N_INSNS (4), /* toint. */
1613 COSTS_N_INSNS (4), /* fromint. */
1614 COSTS_N_INSNS (4) /* roundint. */
1617 /* Vector */
1619 COSTS_N_INSNS (1) /* alu. */
1623 const struct cpu_cost_table v7m_extra_costs =
1625 /* ALU */
1627 0, /* arith. */
1628 0, /* logical. */
1629 0, /* shift. */
1630 0, /* shift_reg. */
1631 0, /* arith_shift. */
1632 COSTS_N_INSNS (1), /* arith_shift_reg. */
1633 0, /* log_shift. */
1634 COSTS_N_INSNS (1), /* log_shift_reg. */
1635 0, /* extend. */
1636 COSTS_N_INSNS (1), /* extend_arith. */
1637 0, /* bfi. */
1638 0, /* bfx. */
1639 0, /* clz. */
1640 0, /* rev. */
1641 COSTS_N_INSNS (1), /* non_exec. */
1642 false /* non_exec_costs_exec. */
1645 /* MULT SImode */
1647 COSTS_N_INSNS (1), /* simple. */
1648 COSTS_N_INSNS (1), /* flag_setting. */
1649 COSTS_N_INSNS (2), /* extend. */
1650 COSTS_N_INSNS (1), /* add. */
1651 COSTS_N_INSNS (3), /* extend_add. */
1652 COSTS_N_INSNS (8) /* idiv. */
1654 /* MULT DImode */
1656 0, /* simple (N/A). */
1657 0, /* flag_setting (N/A). */
1658 COSTS_N_INSNS (2), /* extend. */
1659 0, /* add (N/A). */
1660 COSTS_N_INSNS (3), /* extend_add. */
1661 0 /* idiv (N/A). */
1664 /* LD/ST */
1666 COSTS_N_INSNS (2), /* load. */
1667 0, /* load_sign_extend. */
1668 COSTS_N_INSNS (3), /* ldrd. */
1669 COSTS_N_INSNS (2), /* ldm_1st. */
1670 1, /* ldm_regs_per_insn_1st. */
1671 1, /* ldm_regs_per_insn_subsequent. */
1672 COSTS_N_INSNS (2), /* loadf. */
1673 COSTS_N_INSNS (3), /* loadd. */
1674 COSTS_N_INSNS (1), /* load_unaligned. */
1675 COSTS_N_INSNS (2), /* store. */
1676 COSTS_N_INSNS (3), /* strd. */
1677 COSTS_N_INSNS (2), /* stm_1st. */
1678 1, /* stm_regs_per_insn_1st. */
1679 1, /* stm_regs_per_insn_subsequent. */
1680 COSTS_N_INSNS (2), /* storef. */
1681 COSTS_N_INSNS (3), /* stored. */
1682 COSTS_N_INSNS (1), /* store_unaligned. */
1683 COSTS_N_INSNS (1), /* loadv. */
1684 COSTS_N_INSNS (1) /* storev. */
1687 /* FP SFmode */
1689 COSTS_N_INSNS (7), /* div. */
1690 COSTS_N_INSNS (2), /* mult. */
1691 COSTS_N_INSNS (5), /* mult_addsub. */
1692 COSTS_N_INSNS (3), /* fma. */
1693 COSTS_N_INSNS (1), /* addsub. */
1694 0, /* fpconst. */
1695 0, /* neg. */
1696 0, /* compare. */
1697 0, /* widen. */
1698 0, /* narrow. */
1699 0, /* toint. */
1700 0, /* fromint. */
1701 0 /* roundint. */
1703 /* FP DFmode */
1705 COSTS_N_INSNS (15), /* div. */
1706 COSTS_N_INSNS (5), /* mult. */
1707 COSTS_N_INSNS (7), /* mult_addsub. */
1708 COSTS_N_INSNS (7), /* fma. */
1709 COSTS_N_INSNS (3), /* addsub. */
1710 0, /* fpconst. */
1711 0, /* neg. */
1712 0, /* compare. */
1713 0, /* widen. */
1714 0, /* narrow. */
1715 0, /* toint. */
1716 0, /* fromint. */
1717 0 /* roundint. */
1720 /* Vector */
1722 COSTS_N_INSNS (1) /* alu. */
1726 const struct tune_params arm_slowmul_tune =
1728 &generic_extra_costs, /* Insn extra costs. */
1729 NULL, /* Sched adj cost. */
1730 arm_default_branch_cost,
1731 &arm_default_vec_cost,
1732 3, /* Constant limit. */
1733 5, /* Max cond insns. */
1734 8, /* Memset max inline. */
1735 1, /* Issue rate. */
1736 ARM_PREFETCH_NOT_BENEFICIAL,
1737 tune_params::PREF_CONST_POOL_TRUE,
1738 tune_params::PREF_LDRD_FALSE,
1739 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1740 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1741 tune_params::DISPARAGE_FLAGS_NEITHER,
1742 tune_params::PREF_NEON_64_FALSE,
1743 tune_params::PREF_NEON_STRINGOPS_FALSE,
1744 tune_params::FUSE_NOTHING,
1745 tune_params::SCHED_AUTOPREF_OFF
1748 const struct tune_params arm_fastmul_tune =
1750 &generic_extra_costs, /* Insn extra costs. */
1751 NULL, /* Sched adj cost. */
1752 arm_default_branch_cost,
1753 &arm_default_vec_cost,
1754 1, /* Constant limit. */
1755 5, /* Max cond insns. */
1756 8, /* Memset max inline. */
1757 1, /* Issue rate. */
1758 ARM_PREFETCH_NOT_BENEFICIAL,
1759 tune_params::PREF_CONST_POOL_TRUE,
1760 tune_params::PREF_LDRD_FALSE,
1761 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1762 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1763 tune_params::DISPARAGE_FLAGS_NEITHER,
1764 tune_params::PREF_NEON_64_FALSE,
1765 tune_params::PREF_NEON_STRINGOPS_FALSE,
1766 tune_params::FUSE_NOTHING,
1767 tune_params::SCHED_AUTOPREF_OFF
1770 /* StrongARM has early execution of branches, so a sequence that is worth
1771 skipping is shorter. Set max_insns_skipped to a lower value. */
1773 const struct tune_params arm_strongarm_tune =
1775 &generic_extra_costs, /* Insn extra costs. */
1776 NULL, /* Sched adj cost. */
1777 arm_default_branch_cost,
1778 &arm_default_vec_cost,
1779 1, /* Constant limit. */
1780 3, /* Max cond insns. */
1781 8, /* Memset max inline. */
1782 1, /* Issue rate. */
1783 ARM_PREFETCH_NOT_BENEFICIAL,
1784 tune_params::PREF_CONST_POOL_TRUE,
1785 tune_params::PREF_LDRD_FALSE,
1786 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1787 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1788 tune_params::DISPARAGE_FLAGS_NEITHER,
1789 tune_params::PREF_NEON_64_FALSE,
1790 tune_params::PREF_NEON_STRINGOPS_FALSE,
1791 tune_params::FUSE_NOTHING,
1792 tune_params::SCHED_AUTOPREF_OFF
1795 const struct tune_params arm_xscale_tune =
1797 &generic_extra_costs, /* Insn extra costs. */
1798 xscale_sched_adjust_cost,
1799 arm_default_branch_cost,
1800 &arm_default_vec_cost,
1801 2, /* Constant limit. */
1802 3, /* Max cond insns. */
1803 8, /* Memset max inline. */
1804 1, /* Issue rate. */
1805 ARM_PREFETCH_NOT_BENEFICIAL,
1806 tune_params::PREF_CONST_POOL_TRUE,
1807 tune_params::PREF_LDRD_FALSE,
1808 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1809 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1810 tune_params::DISPARAGE_FLAGS_NEITHER,
1811 tune_params::PREF_NEON_64_FALSE,
1812 tune_params::PREF_NEON_STRINGOPS_FALSE,
1813 tune_params::FUSE_NOTHING,
1814 tune_params::SCHED_AUTOPREF_OFF
1817 const struct tune_params arm_9e_tune =
1819 &generic_extra_costs, /* Insn extra costs. */
1820 NULL, /* Sched adj cost. */
1821 arm_default_branch_cost,
1822 &arm_default_vec_cost,
1823 1, /* Constant limit. */
1824 5, /* Max cond insns. */
1825 8, /* Memset max inline. */
1826 1, /* Issue rate. */
1827 ARM_PREFETCH_NOT_BENEFICIAL,
1828 tune_params::PREF_CONST_POOL_TRUE,
1829 tune_params::PREF_LDRD_FALSE,
1830 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1831 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1832 tune_params::DISPARAGE_FLAGS_NEITHER,
1833 tune_params::PREF_NEON_64_FALSE,
1834 tune_params::PREF_NEON_STRINGOPS_FALSE,
1835 tune_params::FUSE_NOTHING,
1836 tune_params::SCHED_AUTOPREF_OFF
1839 const struct tune_params arm_marvell_pj4_tune =
1841 &generic_extra_costs, /* Insn extra costs. */
1842 NULL, /* Sched adj cost. */
1843 arm_default_branch_cost,
1844 &arm_default_vec_cost,
1845 1, /* Constant limit. */
1846 5, /* Max cond insns. */
1847 8, /* Memset max inline. */
1848 2, /* Issue rate. */
1849 ARM_PREFETCH_NOT_BENEFICIAL,
1850 tune_params::PREF_CONST_POOL_TRUE,
1851 tune_params::PREF_LDRD_FALSE,
1852 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1853 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1854 tune_params::DISPARAGE_FLAGS_NEITHER,
1855 tune_params::PREF_NEON_64_FALSE,
1856 tune_params::PREF_NEON_STRINGOPS_FALSE,
1857 tune_params::FUSE_NOTHING,
1858 tune_params::SCHED_AUTOPREF_OFF
1861 const struct tune_params arm_v6t2_tune =
1863 &generic_extra_costs, /* Insn extra costs. */
1864 NULL, /* Sched adj cost. */
1865 arm_default_branch_cost,
1866 &arm_default_vec_cost,
1867 1, /* Constant limit. */
1868 5, /* Max cond insns. */
1869 8, /* Memset max inline. */
1870 1, /* Issue rate. */
1871 ARM_PREFETCH_NOT_BENEFICIAL,
1872 tune_params::PREF_CONST_POOL_FALSE,
1873 tune_params::PREF_LDRD_FALSE,
1874 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1875 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1876 tune_params::DISPARAGE_FLAGS_NEITHER,
1877 tune_params::PREF_NEON_64_FALSE,
1878 tune_params::PREF_NEON_STRINGOPS_FALSE,
1879 tune_params::FUSE_NOTHING,
1880 tune_params::SCHED_AUTOPREF_OFF
1884 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1885 const struct tune_params arm_cortex_tune =
1887 &generic_extra_costs,
1888 NULL, /* Sched adj cost. */
1889 arm_default_branch_cost,
1890 &arm_default_vec_cost,
1891 1, /* Constant limit. */
1892 5, /* Max cond insns. */
1893 8, /* Memset max inline. */
1894 2, /* Issue rate. */
1895 ARM_PREFETCH_NOT_BENEFICIAL,
1896 tune_params::PREF_CONST_POOL_FALSE,
1897 tune_params::PREF_LDRD_FALSE,
1898 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1899 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1900 tune_params::DISPARAGE_FLAGS_NEITHER,
1901 tune_params::PREF_NEON_64_FALSE,
1902 tune_params::PREF_NEON_STRINGOPS_FALSE,
1903 tune_params::FUSE_NOTHING,
1904 tune_params::SCHED_AUTOPREF_OFF
1907 const struct tune_params arm_cortex_a8_tune =
1909 &cortexa8_extra_costs,
1910 NULL, /* Sched adj cost. */
1911 arm_default_branch_cost,
1912 &arm_default_vec_cost,
1913 1, /* Constant limit. */
1914 5, /* Max cond insns. */
1915 8, /* Memset max inline. */
1916 2, /* Issue rate. */
1917 ARM_PREFETCH_NOT_BENEFICIAL,
1918 tune_params::PREF_CONST_POOL_FALSE,
1919 tune_params::PREF_LDRD_FALSE,
1920 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1921 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1922 tune_params::DISPARAGE_FLAGS_NEITHER,
1923 tune_params::PREF_NEON_64_FALSE,
1924 tune_params::PREF_NEON_STRINGOPS_TRUE,
1925 tune_params::FUSE_NOTHING,
1926 tune_params::SCHED_AUTOPREF_OFF
1929 const struct tune_params arm_cortex_a7_tune =
1931 &cortexa7_extra_costs,
1932 NULL, /* Sched adj cost. */
1933 arm_default_branch_cost,
1934 &arm_default_vec_cost,
1935 1, /* Constant limit. */
1936 5, /* Max cond insns. */
1937 8, /* Memset max inline. */
1938 2, /* Issue rate. */
1939 ARM_PREFETCH_NOT_BENEFICIAL,
1940 tune_params::PREF_CONST_POOL_FALSE,
1941 tune_params::PREF_LDRD_FALSE,
1942 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1943 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1944 tune_params::DISPARAGE_FLAGS_NEITHER,
1945 tune_params::PREF_NEON_64_FALSE,
1946 tune_params::PREF_NEON_STRINGOPS_TRUE,
1947 tune_params::FUSE_NOTHING,
1948 tune_params::SCHED_AUTOPREF_OFF
1951 const struct tune_params arm_cortex_a15_tune =
1953 &cortexa15_extra_costs,
1954 NULL, /* Sched adj cost. */
1955 arm_default_branch_cost,
1956 &arm_default_vec_cost,
1957 1, /* Constant limit. */
1958 2, /* Max cond insns. */
1959 8, /* Memset max inline. */
1960 3, /* Issue rate. */
1961 ARM_PREFETCH_NOT_BENEFICIAL,
1962 tune_params::PREF_CONST_POOL_FALSE,
1963 tune_params::PREF_LDRD_TRUE,
1964 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1965 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1966 tune_params::DISPARAGE_FLAGS_ALL,
1967 tune_params::PREF_NEON_64_FALSE,
1968 tune_params::PREF_NEON_STRINGOPS_TRUE,
1969 tune_params::FUSE_NOTHING,
1970 tune_params::SCHED_AUTOPREF_FULL
1973 const struct tune_params arm_cortex_a35_tune =
1975 &cortexa53_extra_costs,
1976 NULL, /* Sched adj cost. */
1977 arm_default_branch_cost,
1978 &arm_default_vec_cost,
1979 1, /* Constant limit. */
1980 5, /* Max cond insns. */
1981 8, /* Memset max inline. */
1982 1, /* Issue rate. */
1983 ARM_PREFETCH_NOT_BENEFICIAL,
1984 tune_params::PREF_CONST_POOL_FALSE,
1985 tune_params::PREF_LDRD_FALSE,
1986 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1987 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1988 tune_params::DISPARAGE_FLAGS_NEITHER,
1989 tune_params::PREF_NEON_64_FALSE,
1990 tune_params::PREF_NEON_STRINGOPS_TRUE,
1991 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
1992 tune_params::SCHED_AUTOPREF_OFF
1995 const struct tune_params arm_cortex_a53_tune =
1997 &cortexa53_extra_costs,
1998 NULL, /* Sched adj cost. */
1999 arm_default_branch_cost,
2000 &arm_default_vec_cost,
2001 1, /* Constant limit. */
2002 5, /* Max cond insns. */
2003 8, /* Memset max inline. */
2004 2, /* Issue rate. */
2005 ARM_PREFETCH_NOT_BENEFICIAL,
2006 tune_params::PREF_CONST_POOL_FALSE,
2007 tune_params::PREF_LDRD_FALSE,
2008 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2009 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2010 tune_params::DISPARAGE_FLAGS_NEITHER,
2011 tune_params::PREF_NEON_64_FALSE,
2012 tune_params::PREF_NEON_STRINGOPS_TRUE,
2013 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2014 tune_params::SCHED_AUTOPREF_OFF
2017 const struct tune_params arm_cortex_a57_tune =
2019 &cortexa57_extra_costs,
2020 NULL, /* Sched adj cost. */
2021 arm_default_branch_cost,
2022 &arm_default_vec_cost,
2023 1, /* Constant limit. */
2024 2, /* Max cond insns. */
2025 8, /* Memset max inline. */
2026 3, /* Issue rate. */
2027 ARM_PREFETCH_NOT_BENEFICIAL,
2028 tune_params::PREF_CONST_POOL_FALSE,
2029 tune_params::PREF_LDRD_TRUE,
2030 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2031 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2032 tune_params::DISPARAGE_FLAGS_ALL,
2033 tune_params::PREF_NEON_64_FALSE,
2034 tune_params::PREF_NEON_STRINGOPS_TRUE,
2035 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2036 tune_params::SCHED_AUTOPREF_FULL
2039 const struct tune_params arm_exynosm1_tune =
2041 &exynosm1_extra_costs,
2042 NULL, /* Sched adj cost. */
2043 arm_default_branch_cost,
2044 &arm_default_vec_cost,
2045 1, /* Constant limit. */
2046 2, /* Max cond insns. */
2047 8, /* Memset max inline. */
2048 3, /* Issue rate. */
2049 ARM_PREFETCH_NOT_BENEFICIAL,
2050 tune_params::PREF_CONST_POOL_FALSE,
2051 tune_params::PREF_LDRD_TRUE,
2052 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2053 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2054 tune_params::DISPARAGE_FLAGS_ALL,
2055 tune_params::PREF_NEON_64_FALSE,
2056 tune_params::PREF_NEON_STRINGOPS_TRUE,
2057 tune_params::FUSE_NOTHING,
2058 tune_params::SCHED_AUTOPREF_OFF
2061 const struct tune_params arm_xgene1_tune =
2063 &xgene1_extra_costs,
2064 NULL, /* Sched adj cost. */
2065 arm_default_branch_cost,
2066 &arm_default_vec_cost,
2067 1, /* Constant limit. */
2068 2, /* Max cond insns. */
2069 32, /* Memset max inline. */
2070 4, /* Issue rate. */
2071 ARM_PREFETCH_NOT_BENEFICIAL,
2072 tune_params::PREF_CONST_POOL_FALSE,
2073 tune_params::PREF_LDRD_TRUE,
2074 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2075 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2076 tune_params::DISPARAGE_FLAGS_ALL,
2077 tune_params::PREF_NEON_64_FALSE,
2078 tune_params::PREF_NEON_STRINGOPS_FALSE,
2079 tune_params::FUSE_NOTHING,
2080 tune_params::SCHED_AUTOPREF_OFF
2083 const struct tune_params arm_qdf24xx_tune =
2085 &qdf24xx_extra_costs,
2086 NULL, /* Scheduler cost adjustment. */
2087 arm_default_branch_cost,
2088 &arm_default_vec_cost, /* Vectorizer costs. */
2089 1, /* Constant limit. */
2090 2, /* Max cond insns. */
2091 8, /* Memset max inline. */
2092 4, /* Issue rate. */
2093 ARM_PREFETCH_BENEFICIAL (0, -1, 64),
2094 tune_params::PREF_CONST_POOL_FALSE,
2095 tune_params::PREF_LDRD_TRUE,
2096 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2097 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2098 tune_params::DISPARAGE_FLAGS_ALL,
2099 tune_params::PREF_NEON_64_FALSE,
2100 tune_params::PREF_NEON_STRINGOPS_TRUE,
2101 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2102 tune_params::SCHED_AUTOPREF_FULL
2105 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2106 less appealing. Set max_insns_skipped to a low value. */
2108 const struct tune_params arm_cortex_a5_tune =
2110 &cortexa5_extra_costs,
2111 NULL, /* Sched adj cost. */
2112 arm_cortex_a5_branch_cost,
2113 &arm_default_vec_cost,
2114 1, /* Constant limit. */
2115 1, /* Max cond insns. */
2116 8, /* Memset max inline. */
2117 2, /* Issue rate. */
2118 ARM_PREFETCH_NOT_BENEFICIAL,
2119 tune_params::PREF_CONST_POOL_FALSE,
2120 tune_params::PREF_LDRD_FALSE,
2121 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2122 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2123 tune_params::DISPARAGE_FLAGS_NEITHER,
2124 tune_params::PREF_NEON_64_FALSE,
2125 tune_params::PREF_NEON_STRINGOPS_TRUE,
2126 tune_params::FUSE_NOTHING,
2127 tune_params::SCHED_AUTOPREF_OFF
2130 const struct tune_params arm_cortex_a9_tune =
2132 &cortexa9_extra_costs,
2133 cortex_a9_sched_adjust_cost,
2134 arm_default_branch_cost,
2135 &arm_default_vec_cost,
2136 1, /* Constant limit. */
2137 5, /* Max cond insns. */
2138 8, /* Memset max inline. */
2139 2, /* Issue rate. */
2140 ARM_PREFETCH_BENEFICIAL(4,32,32),
2141 tune_params::PREF_CONST_POOL_FALSE,
2142 tune_params::PREF_LDRD_FALSE,
2143 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2144 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2145 tune_params::DISPARAGE_FLAGS_NEITHER,
2146 tune_params::PREF_NEON_64_FALSE,
2147 tune_params::PREF_NEON_STRINGOPS_FALSE,
2148 tune_params::FUSE_NOTHING,
2149 tune_params::SCHED_AUTOPREF_OFF
2152 const struct tune_params arm_cortex_a12_tune =
2154 &cortexa12_extra_costs,
2155 NULL, /* Sched adj cost. */
2156 arm_default_branch_cost,
2157 &arm_default_vec_cost, /* Vectorizer costs. */
2158 1, /* Constant limit. */
2159 2, /* Max cond insns. */
2160 8, /* Memset max inline. */
2161 2, /* Issue rate. */
2162 ARM_PREFETCH_NOT_BENEFICIAL,
2163 tune_params::PREF_CONST_POOL_FALSE,
2164 tune_params::PREF_LDRD_TRUE,
2165 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2166 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2167 tune_params::DISPARAGE_FLAGS_ALL,
2168 tune_params::PREF_NEON_64_FALSE,
2169 tune_params::PREF_NEON_STRINGOPS_TRUE,
2170 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2171 tune_params::SCHED_AUTOPREF_OFF
2174 const struct tune_params arm_cortex_a73_tune =
2176 &cortexa57_extra_costs,
2177 NULL, /* Sched adj cost. */
2178 arm_default_branch_cost,
2179 &arm_default_vec_cost, /* Vectorizer costs. */
2180 1, /* Constant limit. */
2181 2, /* Max cond insns. */
2182 8, /* Memset max inline. */
2183 2, /* Issue rate. */
2184 ARM_PREFETCH_NOT_BENEFICIAL,
2185 tune_params::PREF_CONST_POOL_FALSE,
2186 tune_params::PREF_LDRD_TRUE,
2187 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2188 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2189 tune_params::DISPARAGE_FLAGS_ALL,
2190 tune_params::PREF_NEON_64_FALSE,
2191 tune_params::PREF_NEON_STRINGOPS_TRUE,
2192 FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2193 tune_params::SCHED_AUTOPREF_FULL
2196 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2197 cycle to execute each. An LDR from the constant pool also takes two cycles
2198 to execute, but mildly increases pipelining opportunity (consecutive
2199 loads/stores can be pipelined together, saving one cycle), and may also
2200 improve icache utilisation. Hence we prefer the constant pool for such
2201 processors. */
2203 const struct tune_params arm_v7m_tune =
2205 &v7m_extra_costs,
2206 NULL, /* Sched adj cost. */
2207 arm_cortex_m_branch_cost,
2208 &arm_default_vec_cost,
2209 1, /* Constant limit. */
2210 2, /* Max cond insns. */
2211 8, /* Memset max inline. */
2212 1, /* Issue rate. */
2213 ARM_PREFETCH_NOT_BENEFICIAL,
2214 tune_params::PREF_CONST_POOL_TRUE,
2215 tune_params::PREF_LDRD_FALSE,
2216 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2217 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2218 tune_params::DISPARAGE_FLAGS_NEITHER,
2219 tune_params::PREF_NEON_64_FALSE,
2220 tune_params::PREF_NEON_STRINGOPS_FALSE,
2221 tune_params::FUSE_NOTHING,
2222 tune_params::SCHED_AUTOPREF_OFF
2225 /* Cortex-M7 tuning. */
2227 const struct tune_params arm_cortex_m7_tune =
2229 &v7m_extra_costs,
2230 NULL, /* Sched adj cost. */
2231 arm_cortex_m7_branch_cost,
2232 &arm_default_vec_cost,
2233 0, /* Constant limit. */
2234 1, /* Max cond insns. */
2235 8, /* Memset max inline. */
2236 2, /* Issue rate. */
2237 ARM_PREFETCH_NOT_BENEFICIAL,
2238 tune_params::PREF_CONST_POOL_TRUE,
2239 tune_params::PREF_LDRD_FALSE,
2240 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2241 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2242 tune_params::DISPARAGE_FLAGS_NEITHER,
2243 tune_params::PREF_NEON_64_FALSE,
2244 tune_params::PREF_NEON_STRINGOPS_FALSE,
2245 tune_params::FUSE_NOTHING,
2246 tune_params::SCHED_AUTOPREF_OFF
2249 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2250 arm_v6t2_tune. It is used for cortex-m0, cortex-m1, cortex-m0plus and
2251 cortex-m23. */
2252 const struct tune_params arm_v6m_tune =
2254 &generic_extra_costs, /* Insn extra costs. */
2255 NULL, /* Sched adj cost. */
2256 arm_default_branch_cost,
2257 &arm_default_vec_cost, /* Vectorizer costs. */
2258 1, /* Constant limit. */
2259 5, /* Max cond insns. */
2260 8, /* Memset max inline. */
2261 1, /* Issue rate. */
2262 ARM_PREFETCH_NOT_BENEFICIAL,
2263 tune_params::PREF_CONST_POOL_FALSE,
2264 tune_params::PREF_LDRD_FALSE,
2265 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2266 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2267 tune_params::DISPARAGE_FLAGS_NEITHER,
2268 tune_params::PREF_NEON_64_FALSE,
2269 tune_params::PREF_NEON_STRINGOPS_FALSE,
2270 tune_params::FUSE_NOTHING,
2271 tune_params::SCHED_AUTOPREF_OFF
2274 const struct tune_params arm_fa726te_tune =
2276 &generic_extra_costs, /* Insn extra costs. */
2277 fa726te_sched_adjust_cost,
2278 arm_default_branch_cost,
2279 &arm_default_vec_cost,
2280 1, /* Constant limit. */
2281 5, /* Max cond insns. */
2282 8, /* Memset max inline. */
2283 2, /* Issue rate. */
2284 ARM_PREFETCH_NOT_BENEFICIAL,
2285 tune_params::PREF_CONST_POOL_TRUE,
2286 tune_params::PREF_LDRD_FALSE,
2287 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2288 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2289 tune_params::DISPARAGE_FLAGS_NEITHER,
2290 tune_params::PREF_NEON_64_FALSE,
2291 tune_params::PREF_NEON_STRINGOPS_FALSE,
2292 tune_params::FUSE_NOTHING,
2293 tune_params::SCHED_AUTOPREF_OFF
2296 /* Auto-generated CPU, FPU and architecture tables. */
2297 #include "arm-cpu-data.h"
2299 /* The name of the preprocessor macro to define for this architecture. PROFILE
2300 is replaced by the architecture name (eg. 8A) in arm_option_override () and
2301 is thus chosen to be big enough to hold the longest architecture name. */
2303 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2305 /* Supported TLS relocations. */
2307 enum tls_reloc {
2308 TLS_GD32,
2309 TLS_LDM32,
2310 TLS_LDO32,
2311 TLS_IE32,
2312 TLS_LE32,
2313 TLS_DESCSEQ /* GNU scheme */
2316 /* The maximum number of insns to be used when loading a constant. */
2317 inline static int
2318 arm_constant_limit (bool size_p)
2320 return size_p ? 1 : current_tune->constant_limit;
2323 /* Emit an insn that's a simple single-set. Both the operands must be known
2324 to be valid. */
2325 inline static rtx_insn *
2326 emit_set_insn (rtx x, rtx y)
2328 return emit_insn (gen_rtx_SET (x, y));
2331 /* Return the number of bits set in VALUE. */
2332 static unsigned
2333 bit_count (unsigned long value)
2335 unsigned long count = 0;
2337 while (value)
2339 count++;
2340 value &= value - 1; /* Clear the least-significant set bit. */
2343 return count;
2346 /* Return the number of bits set in BMAP. */
2347 static unsigned
2348 bitmap_popcount (const sbitmap bmap)
2350 unsigned int count = 0;
2351 unsigned int n = 0;
2352 sbitmap_iterator sbi;
2354 EXECUTE_IF_SET_IN_BITMAP (bmap, 0, n, sbi)
2355 count++;
2356 return count;
2359 typedef struct
2361 machine_mode mode;
2362 const char *name;
2363 } arm_fixed_mode_set;
2365 /* A small helper for setting fixed-point library libfuncs. */
2367 static void
2368 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2369 const char *funcname, const char *modename,
2370 int num_suffix)
2372 char buffer[50];
2374 if (num_suffix == 0)
2375 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2376 else
2377 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2379 set_optab_libfunc (optable, mode, buffer);
2382 static void
2383 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2384 machine_mode from, const char *funcname,
2385 const char *toname, const char *fromname)
2387 char buffer[50];
2388 const char *maybe_suffix_2 = "";
2390 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2391 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2392 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2393 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2394 maybe_suffix_2 = "2";
2396 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2397 maybe_suffix_2);
2399 set_conv_libfunc (optable, to, from, buffer);
2402 /* Set up library functions unique to ARM. */
2404 static void
2405 arm_init_libfuncs (void)
2407 /* For Linux, we have access to kernel support for atomic operations. */
2408 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2409 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2411 /* There are no special library functions unless we are using the
2412 ARM BPABI. */
2413 if (!TARGET_BPABI)
2414 return;
2416 /* The functions below are described in Section 4 of the "Run-Time
2417 ABI for the ARM architecture", Version 1.0. */
2419 /* Double-precision floating-point arithmetic. Table 2. */
2420 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2421 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2422 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2423 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2424 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2426 /* Double-precision comparisons. Table 3. */
2427 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2428 set_optab_libfunc (ne_optab, DFmode, NULL);
2429 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2430 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2431 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2432 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2433 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2435 /* Single-precision floating-point arithmetic. Table 4. */
2436 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2437 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2438 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2439 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2440 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2442 /* Single-precision comparisons. Table 5. */
2443 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2444 set_optab_libfunc (ne_optab, SFmode, NULL);
2445 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2446 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2447 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2448 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2449 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2451 /* Floating-point to integer conversions. Table 6. */
2452 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2453 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2454 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2455 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2456 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2457 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2458 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2459 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2461 /* Conversions between floating types. Table 7. */
2462 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2463 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2465 /* Integer to floating-point conversions. Table 8. */
2466 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2467 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2468 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2469 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2470 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2471 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2472 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2473 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2475 /* Long long. Table 9. */
2476 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2477 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2478 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2479 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2480 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2481 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2482 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2483 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2485 /* Integer (32/32->32) division. \S 4.3.1. */
2486 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2487 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2489 /* The divmod functions are designed so that they can be used for
2490 plain division, even though they return both the quotient and the
2491 remainder. The quotient is returned in the usual location (i.e.,
2492 r0 for SImode, {r0, r1} for DImode), just as would be expected
2493 for an ordinary division routine. Because the AAPCS calling
2494 conventions specify that all of { r0, r1, r2, r3 } are
2495 callee-saved registers, there is no need to tell the compiler
2496 explicitly that those registers are clobbered by these
2497 routines. */
2498 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2499 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2501 /* For SImode division the ABI provides div-without-mod routines,
2502 which are faster. */
2503 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2504 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2506 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2507 divmod libcalls instead. */
2508 set_optab_libfunc (smod_optab, DImode, NULL);
2509 set_optab_libfunc (umod_optab, DImode, NULL);
2510 set_optab_libfunc (smod_optab, SImode, NULL);
2511 set_optab_libfunc (umod_optab, SImode, NULL);
2513 /* Half-precision float operations. The compiler handles all operations
2514 with NULL libfuncs by converting the SFmode. */
2515 switch (arm_fp16_format)
2517 case ARM_FP16_FORMAT_IEEE:
2518 case ARM_FP16_FORMAT_ALTERNATIVE:
2520 /* Conversions. */
2521 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2522 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2523 ? "__gnu_f2h_ieee"
2524 : "__gnu_f2h_alternative"));
2525 set_conv_libfunc (sext_optab, SFmode, HFmode,
2526 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2527 ? "__gnu_h2f_ieee"
2528 : "__gnu_h2f_alternative"));
2530 set_conv_libfunc (trunc_optab, HFmode, DFmode,
2531 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2532 ? "__gnu_d2h_ieee"
2533 : "__gnu_d2h_alternative"));
2535 /* Arithmetic. */
2536 set_optab_libfunc (add_optab, HFmode, NULL);
2537 set_optab_libfunc (sdiv_optab, HFmode, NULL);
2538 set_optab_libfunc (smul_optab, HFmode, NULL);
2539 set_optab_libfunc (neg_optab, HFmode, NULL);
2540 set_optab_libfunc (sub_optab, HFmode, NULL);
2542 /* Comparisons. */
2543 set_optab_libfunc (eq_optab, HFmode, NULL);
2544 set_optab_libfunc (ne_optab, HFmode, NULL);
2545 set_optab_libfunc (lt_optab, HFmode, NULL);
2546 set_optab_libfunc (le_optab, HFmode, NULL);
2547 set_optab_libfunc (ge_optab, HFmode, NULL);
2548 set_optab_libfunc (gt_optab, HFmode, NULL);
2549 set_optab_libfunc (unord_optab, HFmode, NULL);
2550 break;
2552 default:
2553 break;
2556 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2558 const arm_fixed_mode_set fixed_arith_modes[] =
2560 { QQmode, "qq" },
2561 { UQQmode, "uqq" },
2562 { HQmode, "hq" },
2563 { UHQmode, "uhq" },
2564 { SQmode, "sq" },
2565 { USQmode, "usq" },
2566 { DQmode, "dq" },
2567 { UDQmode, "udq" },
2568 { TQmode, "tq" },
2569 { UTQmode, "utq" },
2570 { HAmode, "ha" },
2571 { UHAmode, "uha" },
2572 { SAmode, "sa" },
2573 { USAmode, "usa" },
2574 { DAmode, "da" },
2575 { UDAmode, "uda" },
2576 { TAmode, "ta" },
2577 { UTAmode, "uta" }
2579 const arm_fixed_mode_set fixed_conv_modes[] =
2581 { QQmode, "qq" },
2582 { UQQmode, "uqq" },
2583 { HQmode, "hq" },
2584 { UHQmode, "uhq" },
2585 { SQmode, "sq" },
2586 { USQmode, "usq" },
2587 { DQmode, "dq" },
2588 { UDQmode, "udq" },
2589 { TQmode, "tq" },
2590 { UTQmode, "utq" },
2591 { HAmode, "ha" },
2592 { UHAmode, "uha" },
2593 { SAmode, "sa" },
2594 { USAmode, "usa" },
2595 { DAmode, "da" },
2596 { UDAmode, "uda" },
2597 { TAmode, "ta" },
2598 { UTAmode, "uta" },
2599 { QImode, "qi" },
2600 { HImode, "hi" },
2601 { SImode, "si" },
2602 { DImode, "di" },
2603 { TImode, "ti" },
2604 { SFmode, "sf" },
2605 { DFmode, "df" }
2607 unsigned int i, j;
2609 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2611 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2612 "add", fixed_arith_modes[i].name, 3);
2613 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2614 "ssadd", fixed_arith_modes[i].name, 3);
2615 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2616 "usadd", fixed_arith_modes[i].name, 3);
2617 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2618 "sub", fixed_arith_modes[i].name, 3);
2619 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2620 "sssub", fixed_arith_modes[i].name, 3);
2621 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2622 "ussub", fixed_arith_modes[i].name, 3);
2623 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2624 "mul", fixed_arith_modes[i].name, 3);
2625 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2626 "ssmul", fixed_arith_modes[i].name, 3);
2627 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2628 "usmul", fixed_arith_modes[i].name, 3);
2629 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2630 "div", fixed_arith_modes[i].name, 3);
2631 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2632 "udiv", fixed_arith_modes[i].name, 3);
2633 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2634 "ssdiv", fixed_arith_modes[i].name, 3);
2635 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2636 "usdiv", fixed_arith_modes[i].name, 3);
2637 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2638 "neg", fixed_arith_modes[i].name, 2);
2639 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2640 "ssneg", fixed_arith_modes[i].name, 2);
2641 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2642 "usneg", fixed_arith_modes[i].name, 2);
2643 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2644 "ashl", fixed_arith_modes[i].name, 3);
2645 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2646 "ashr", fixed_arith_modes[i].name, 3);
2647 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2648 "lshr", fixed_arith_modes[i].name, 3);
2649 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2650 "ssashl", fixed_arith_modes[i].name, 3);
2651 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2652 "usashl", fixed_arith_modes[i].name, 3);
2653 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2654 "cmp", fixed_arith_modes[i].name, 2);
2657 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2658 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2660 if (i == j
2661 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2662 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2663 continue;
2665 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2666 fixed_conv_modes[j].mode, "fract",
2667 fixed_conv_modes[i].name,
2668 fixed_conv_modes[j].name);
2669 arm_set_fixed_conv_libfunc (satfract_optab,
2670 fixed_conv_modes[i].mode,
2671 fixed_conv_modes[j].mode, "satfract",
2672 fixed_conv_modes[i].name,
2673 fixed_conv_modes[j].name);
2674 arm_set_fixed_conv_libfunc (fractuns_optab,
2675 fixed_conv_modes[i].mode,
2676 fixed_conv_modes[j].mode, "fractuns",
2677 fixed_conv_modes[i].name,
2678 fixed_conv_modes[j].name);
2679 arm_set_fixed_conv_libfunc (satfractuns_optab,
2680 fixed_conv_modes[i].mode,
2681 fixed_conv_modes[j].mode, "satfractuns",
2682 fixed_conv_modes[i].name,
2683 fixed_conv_modes[j].name);
2687 if (TARGET_AAPCS_BASED)
2688 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2691 /* On AAPCS systems, this is the "struct __va_list". */
2692 static GTY(()) tree va_list_type;
2694 /* Return the type to use as __builtin_va_list. */
2695 static tree
2696 arm_build_builtin_va_list (void)
2698 tree va_list_name;
2699 tree ap_field;
2701 if (!TARGET_AAPCS_BASED)
2702 return std_build_builtin_va_list ();
2704 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2705 defined as:
2707 struct __va_list
2709 void *__ap;
2712 The C Library ABI further reinforces this definition in \S
2713 4.1.
2715 We must follow this definition exactly. The structure tag
2716 name is visible in C++ mangled names, and thus forms a part
2717 of the ABI. The field name may be used by people who
2718 #include <stdarg.h>. */
2719 /* Create the type. */
2720 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2721 /* Give it the required name. */
2722 va_list_name = build_decl (BUILTINS_LOCATION,
2723 TYPE_DECL,
2724 get_identifier ("__va_list"),
2725 va_list_type);
2726 DECL_ARTIFICIAL (va_list_name) = 1;
2727 TYPE_NAME (va_list_type) = va_list_name;
2728 TYPE_STUB_DECL (va_list_type) = va_list_name;
2729 /* Create the __ap field. */
2730 ap_field = build_decl (BUILTINS_LOCATION,
2731 FIELD_DECL,
2732 get_identifier ("__ap"),
2733 ptr_type_node);
2734 DECL_ARTIFICIAL (ap_field) = 1;
2735 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2736 TYPE_FIELDS (va_list_type) = ap_field;
2737 /* Compute its layout. */
2738 layout_type (va_list_type);
2740 return va_list_type;
2743 /* Return an expression of type "void *" pointing to the next
2744 available argument in a variable-argument list. VALIST is the
2745 user-level va_list object, of type __builtin_va_list. */
2746 static tree
2747 arm_extract_valist_ptr (tree valist)
2749 if (TREE_TYPE (valist) == error_mark_node)
2750 return error_mark_node;
2752 /* On an AAPCS target, the pointer is stored within "struct
2753 va_list". */
2754 if (TARGET_AAPCS_BASED)
2756 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2757 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2758 valist, ap_field, NULL_TREE);
2761 return valist;
2764 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2765 static void
2766 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2768 valist = arm_extract_valist_ptr (valist);
2769 std_expand_builtin_va_start (valist, nextarg);
2772 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2773 static tree
2774 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2775 gimple_seq *post_p)
2777 valist = arm_extract_valist_ptr (valist);
2778 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2781 /* Check any incompatible options that the user has specified. */
2782 static void
2783 arm_option_check_internal (struct gcc_options *opts)
2785 int flags = opts->x_target_flags;
2787 /* iWMMXt and NEON are incompatible. */
2788 if (TARGET_IWMMXT
2789 && bitmap_bit_p (arm_active_target.isa, isa_bit_neon))
2790 error ("iWMMXt and NEON are incompatible");
2792 /* Make sure that the processor choice does not conflict with any of the
2793 other command line choices. */
2794 if (TARGET_ARM_P (flags)
2795 && !bitmap_bit_p (arm_active_target.isa, isa_bit_notm))
2796 error ("target CPU does not support ARM mode");
2798 /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet. */
2799 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2800 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2802 if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2803 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2805 /* If this target is normally configured to use APCS frames, warn if they
2806 are turned off and debugging is turned on. */
2807 if (TARGET_ARM_P (flags)
2808 && write_symbols != NO_DEBUG
2809 && !TARGET_APCS_FRAME
2810 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2811 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2813 /* iWMMXt unsupported under Thumb mode. */
2814 if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2815 error ("iWMMXt unsupported under Thumb mode");
2817 if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
2818 error ("can not use -mtp=cp15 with 16-bit Thumb");
2820 if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
2822 error ("RTP PIC is incompatible with Thumb");
2823 flag_pic = 0;
2826 /* We only support -mslow-flash-data on armv7-m targets. */
2827 if (target_slow_flash_data
2828 && ((!(arm_arch7 && !arm_arch_notm) && !arm_arch7em)
2829 || (TARGET_THUMB1_P (flags) || flag_pic || TARGET_NEON)))
2830 error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
2832 /* We only support pure-code on Thumb-2 M-profile targets. */
2833 if (target_pure_code
2834 && (!arm_arch_thumb2 || arm_arch_notm || flag_pic || TARGET_NEON))
2835 error ("-mpure-code only supports non-pic code on armv7-m targets");
2839 /* Recompute the global settings depending on target attribute options. */
2841 static void
2842 arm_option_params_internal (void)
2844 /* If we are not using the default (ARM mode) section anchor offset
2845 ranges, then set the correct ranges now. */
2846 if (TARGET_THUMB1)
2848 /* Thumb-1 LDR instructions cannot have negative offsets.
2849 Permissible positive offset ranges are 5-bit (for byte loads),
2850 6-bit (for halfword loads), or 7-bit (for word loads).
2851 Empirical results suggest a 7-bit anchor range gives the best
2852 overall code size. */
2853 targetm.min_anchor_offset = 0;
2854 targetm.max_anchor_offset = 127;
2856 else if (TARGET_THUMB2)
2858 /* The minimum is set such that the total size of the block
2859 for a particular anchor is 248 + 1 + 4095 bytes, which is
2860 divisible by eight, ensuring natural spacing of anchors. */
2861 targetm.min_anchor_offset = -248;
2862 targetm.max_anchor_offset = 4095;
2864 else
2866 targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
2867 targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
2870 if (optimize_size)
2872 /* If optimizing for size, bump the number of instructions that we
2873 are prepared to conditionally execute (even on a StrongARM). */
2874 max_insns_skipped = 6;
2876 /* For THUMB2, we limit the conditional sequence to one IT block. */
2877 if (TARGET_THUMB2)
2878 max_insns_skipped = arm_restrict_it ? 1 : 4;
2880 else
2881 /* When -mrestrict-it is in use tone down the if-conversion. */
2882 max_insns_skipped = (TARGET_THUMB2 && arm_restrict_it)
2883 ? 1 : current_tune->max_insns_skipped;
2886 /* True if -mflip-thumb should next add an attribute for the default
2887 mode, false if it should next add an attribute for the opposite mode. */
2888 static GTY(()) bool thumb_flipper;
2890 /* Options after initial target override. */
2891 static GTY(()) tree init_optimize;
2893 static void
2894 arm_override_options_after_change_1 (struct gcc_options *opts)
2896 if (opts->x_align_functions <= 0)
2897 opts->x_align_functions = TARGET_THUMB_P (opts->x_target_flags)
2898 && opts->x_optimize_size ? 2 : 4;
2901 /* Implement targetm.override_options_after_change. */
2903 static void
2904 arm_override_options_after_change (void)
2906 arm_configure_build_target (&arm_active_target,
2907 TREE_TARGET_OPTION (target_option_default_node),
2908 &global_options_set, false);
2910 arm_override_options_after_change_1 (&global_options);
2913 static void
2914 arm_option_restore (struct gcc_options *, struct cl_target_option *ptr)
2916 arm_configure_build_target (&arm_active_target, ptr, &global_options_set,
2917 false);
2920 /* Reset options between modes that the user has specified. */
2921 static void
2922 arm_option_override_internal (struct gcc_options *opts,
2923 struct gcc_options *opts_set)
2925 arm_override_options_after_change_1 (opts);
2927 if (TARGET_INTERWORK && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
2929 /* The default is to enable interworking, so this warning message would
2930 be confusing to users who have just compiled with, eg, -march=armv3. */
2931 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
2932 opts->x_target_flags &= ~MASK_INTERWORK;
2935 if (TARGET_THUMB_P (opts->x_target_flags)
2936 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
2938 warning (0, "target CPU does not support THUMB instructions");
2939 opts->x_target_flags &= ~MASK_THUMB;
2942 if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
2944 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2945 opts->x_target_flags &= ~MASK_APCS_FRAME;
2948 /* Callee super interworking implies thumb interworking. Adding
2949 this to the flags here simplifies the logic elsewhere. */
2950 if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
2951 opts->x_target_flags |= MASK_INTERWORK;
2953 /* need to remember initial values so combinaisons of options like
2954 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
2955 cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
2957 if (! opts_set->x_arm_restrict_it)
2958 opts->x_arm_restrict_it = arm_arch8;
2960 /* ARM execution state and M profile don't have [restrict] IT. */
2961 if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
2962 opts->x_arm_restrict_it = 0;
2964 /* Enable -munaligned-access by default for
2965 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
2966 i.e. Thumb2 and ARM state only.
2967 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2968 - ARMv8 architecture-base processors.
2970 Disable -munaligned-access by default for
2971 - all pre-ARMv6 architecture-based processors
2972 - ARMv6-M architecture-based processors
2973 - ARMv8-M Baseline processors. */
2975 if (! opts_set->x_unaligned_access)
2977 opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
2978 && arm_arch6 && (arm_arch_notm || arm_arch7));
2980 else if (opts->x_unaligned_access == 1
2981 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
2983 warning (0, "target CPU does not support unaligned accesses");
2984 opts->x_unaligned_access = 0;
2987 /* Don't warn since it's on by default in -O2. */
2988 if (TARGET_THUMB1_P (opts->x_target_flags))
2989 opts->x_flag_schedule_insns = 0;
2990 else
2991 opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
2993 /* Disable shrink-wrap when optimizing function for size, since it tends to
2994 generate additional returns. */
2995 if (optimize_function_for_size_p (cfun)
2996 && TARGET_THUMB2_P (opts->x_target_flags))
2997 opts->x_flag_shrink_wrap = false;
2998 else
2999 opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
3001 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3002 - epilogue_insns - does not accurately model the corresponding insns
3003 emitted in the asm file. In particular, see the comment in thumb_exit
3004 'Find out how many of the (return) argument registers we can corrupt'.
3005 As a consequence, the epilogue may clobber registers without fipa-ra
3006 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3007 TODO: Accurately model clobbers for epilogue_insns and reenable
3008 fipa-ra. */
3009 if (TARGET_THUMB1_P (opts->x_target_flags))
3010 opts->x_flag_ipa_ra = 0;
3011 else
3012 opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3014 /* Thumb2 inline assembly code should always use unified syntax.
3015 This will apply to ARM and Thumb1 eventually. */
3016 opts->x_inline_asm_unified = TARGET_THUMB2_P (opts->x_target_flags);
3018 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3019 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3020 #endif
3023 /* Convert a static initializer array of feature bits to sbitmap
3024 representation. */
3025 static void
3026 arm_initialize_isa (sbitmap isa, const enum isa_feature *isa_bits)
3028 bitmap_clear (isa);
3029 while (*isa_bits != isa_nobit)
3030 bitmap_set_bit (isa, *(isa_bits++));
3033 static sbitmap isa_all_fpubits;
3034 static sbitmap isa_quirkbits;
3036 /* Configure a build target TARGET from the user-specified options OPTS and
3037 OPTS_SET. If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3038 architecture have been specified, but the two are not identical. */
3039 void
3040 arm_configure_build_target (struct arm_build_target *target,
3041 struct cl_target_option *opts,
3042 struct gcc_options *opts_set,
3043 bool warn_compatible)
3045 const struct processors *arm_selected_tune = NULL;
3046 const struct processors *arm_selected_arch = NULL;
3047 const struct processors *arm_selected_cpu = NULL;
3048 const struct arm_fpu_desc *arm_selected_fpu = NULL;
3050 bitmap_clear (target->isa);
3051 target->core_name = NULL;
3052 target->arch_name = NULL;
3054 if (opts_set->x_arm_arch_option)
3055 arm_selected_arch = &all_architectures[opts->x_arm_arch_option];
3057 if (opts_set->x_arm_cpu_option)
3059 arm_selected_cpu = &all_cores[(int) opts->x_arm_cpu_option];
3060 arm_selected_tune = &all_cores[(int) opts->x_arm_cpu_option];
3063 if (opts_set->x_arm_tune_option)
3064 arm_selected_tune = &all_cores[(int) opts->x_arm_tune_option];
3066 if (arm_selected_arch)
3068 arm_initialize_isa (target->isa, arm_selected_arch->isa_bits);
3070 if (arm_selected_cpu)
3072 auto_sbitmap cpu_isa (isa_num_bits);
3074 arm_initialize_isa (cpu_isa, arm_selected_cpu->isa_bits);
3075 bitmap_xor (cpu_isa, cpu_isa, target->isa);
3076 /* Ignore any bits that are quirk bits. */
3077 bitmap_and_compl (cpu_isa, cpu_isa, isa_quirkbits);
3078 /* Ignore (for now) any bits that might be set by -mfpu. */
3079 bitmap_and_compl (cpu_isa, cpu_isa, isa_all_fpubits);
3081 if (!bitmap_empty_p (cpu_isa))
3083 if (warn_compatible)
3084 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
3085 arm_selected_cpu->name, arm_selected_arch->name);
3086 /* -march wins for code generation.
3087 -mcpu wins for default tuning. */
3088 if (!arm_selected_tune)
3089 arm_selected_tune = arm_selected_cpu;
3091 arm_selected_cpu = arm_selected_arch;
3093 else
3095 /* Architecture and CPU are essentially the same.
3096 Prefer the CPU setting. */
3097 arm_selected_arch = NULL;
3100 target->core_name = arm_selected_cpu->name;
3102 else
3104 /* Pick a CPU based on the architecture. */
3105 arm_selected_cpu = arm_selected_arch;
3106 target->arch_name = arm_selected_arch->name;
3107 /* Note: target->core_name is left unset in this path. */
3110 else if (arm_selected_cpu)
3112 target->core_name = arm_selected_cpu->name;
3113 arm_initialize_isa (target->isa, arm_selected_cpu->isa_bits);
3115 /* If the user did not specify a processor, choose one for them. */
3116 else
3118 const struct processors * sel;
3119 auto_sbitmap sought_isa (isa_num_bits);
3120 bitmap_clear (sought_isa);
3121 auto_sbitmap default_isa (isa_num_bits);
3123 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
3124 gcc_assert (arm_selected_cpu->name);
3126 /* RWE: All of the selection logic below (to the end of this
3127 'if' clause) looks somewhat suspect. It appears to be mostly
3128 there to support forcing thumb support when the default CPU
3129 does not have thumb (somewhat dubious in terms of what the
3130 user might be expecting). I think it should be removed once
3131 support for the pre-thumb era cores is removed. */
3132 sel = arm_selected_cpu;
3133 arm_initialize_isa (default_isa, sel->isa_bits);
3135 /* Now check to see if the user has specified any command line
3136 switches that require certain abilities from the cpu. */
3138 if (TARGET_INTERWORK || TARGET_THUMB)
3140 bitmap_set_bit (sought_isa, isa_bit_thumb);
3141 bitmap_set_bit (sought_isa, isa_bit_mode32);
3143 /* There are no ARM processors that support both APCS-26 and
3144 interworking. Therefore we forcibly remove MODE26 from
3145 from the isa features here (if it was set), so that the
3146 search below will always be able to find a compatible
3147 processor. */
3148 bitmap_clear_bit (default_isa, isa_bit_mode26);
3151 /* If there are such requirements and the default CPU does not
3152 satisfy them, we need to run over the complete list of
3153 cores looking for one that is satisfactory. */
3154 if (!bitmap_empty_p (sought_isa)
3155 && !bitmap_subset_p (sought_isa, default_isa))
3157 auto_sbitmap candidate_isa (isa_num_bits);
3158 /* We're only interested in a CPU with at least the
3159 capabilities of the default CPU and the required
3160 additional features. */
3161 bitmap_ior (default_isa, default_isa, sought_isa);
3163 /* Try to locate a CPU type that supports all of the abilities
3164 of the default CPU, plus the extra abilities requested by
3165 the user. */
3166 for (sel = all_cores; sel->name != NULL; sel++)
3168 arm_initialize_isa (candidate_isa, sel->isa_bits);
3169 /* An exact match? */
3170 if (bitmap_equal_p (default_isa, candidate_isa))
3171 break;
3174 if (sel->name == NULL)
3176 unsigned current_bit_count = isa_num_bits;
3177 const struct processors * best_fit = NULL;
3179 /* Ideally we would like to issue an error message here
3180 saying that it was not possible to find a CPU compatible
3181 with the default CPU, but which also supports the command
3182 line options specified by the programmer, and so they
3183 ought to use the -mcpu=<name> command line option to
3184 override the default CPU type.
3186 If we cannot find a CPU that has exactly the
3187 characteristics of the default CPU and the given
3188 command line options we scan the array again looking
3189 for a best match. The best match must have at least
3190 the capabilities of the perfect match. */
3191 for (sel = all_cores; sel->name != NULL; sel++)
3193 arm_initialize_isa (candidate_isa, sel->isa_bits);
3195 if (bitmap_subset_p (default_isa, candidate_isa))
3197 unsigned count;
3199 bitmap_and_compl (candidate_isa, candidate_isa,
3200 default_isa);
3201 count = bitmap_popcount (candidate_isa);
3203 if (count < current_bit_count)
3205 best_fit = sel;
3206 current_bit_count = count;
3210 gcc_assert (best_fit);
3211 sel = best_fit;
3214 arm_selected_cpu = sel;
3217 /* Now we know the CPU, we can finally initialize the target
3218 structure. */
3219 target->core_name = arm_selected_cpu->name;
3220 arm_initialize_isa (target->isa, arm_selected_cpu->isa_bits);
3223 gcc_assert (arm_selected_cpu);
3225 if (opts->x_arm_fpu_index != TARGET_FPU_auto)
3227 arm_selected_fpu = &all_fpus[opts->x_arm_fpu_index];
3228 auto_sbitmap fpu_bits (isa_num_bits);
3230 arm_initialize_isa (fpu_bits, arm_selected_fpu->isa_bits);
3231 bitmap_and_compl (target->isa, target->isa, isa_all_fpubits);
3232 bitmap_ior (target->isa, target->isa, fpu_bits);
3234 else if (target->core_name == NULL)
3235 /* To support this we need to be able to parse FPU feature options
3236 from the architecture string. */
3237 sorry ("-mfpu=auto not currently supported without an explicit CPU.");
3239 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
3240 if (!arm_selected_tune)
3241 arm_selected_tune = &all_cores[arm_selected_cpu->core];
3243 /* Finish initializing the target structure. */
3244 target->arch_pp_name = arm_selected_cpu->arch;
3245 target->base_arch = arm_selected_cpu->base_arch;
3246 target->arch_core = arm_selected_cpu->core;
3248 target->tune_flags = arm_selected_tune->tune_flags;
3249 target->tune = arm_selected_tune->tune;
3250 target->tune_core = arm_selected_tune->core;
3253 /* Fix up any incompatible options that the user has specified. */
3254 static void
3255 arm_option_override (void)
3257 static const enum isa_feature fpu_bitlist[] = { ISA_ALL_FPU, isa_nobit };
3258 static const enum isa_feature quirk_bitlist[] = { ISA_ALL_QUIRKS, isa_nobit};
3260 isa_quirkbits = sbitmap_alloc (isa_num_bits);
3261 arm_initialize_isa (isa_quirkbits, quirk_bitlist);
3263 isa_all_fpubits = sbitmap_alloc (isa_num_bits);
3264 arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
3266 arm_active_target.isa = sbitmap_alloc (isa_num_bits);
3268 if (!global_options_set.x_arm_fpu_index)
3270 const char *target_fpu_name;
3271 bool ok;
3272 int fpu_index;
3274 #ifdef FPUTYPE_DEFAULT
3275 target_fpu_name = FPUTYPE_DEFAULT;
3276 #else
3277 target_fpu_name = "vfp";
3278 #endif
3280 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &fpu_index,
3281 CL_TARGET);
3282 gcc_assert (ok);
3283 arm_fpu_index = (enum fpu_type) fpu_index;
3286 /* Create the default target_options structure. We need this early
3287 to configure the overall build target. */
3288 target_option_default_node = target_option_current_node
3289 = build_target_option_node (&global_options);
3291 arm_configure_build_target (&arm_active_target,
3292 TREE_TARGET_OPTION (target_option_default_node),
3293 &global_options_set, true);
3295 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3296 SUBTARGET_OVERRIDE_OPTIONS;
3297 #endif
3299 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_active_target.arch_pp_name);
3300 arm_base_arch = arm_active_target.base_arch;
3302 arm_tune = arm_active_target.tune_core;
3303 tune_flags = arm_active_target.tune_flags;
3304 current_tune = arm_active_target.tune;
3306 /* TBD: Dwarf info for apcs frame is not handled yet. */
3307 if (TARGET_APCS_FRAME)
3308 flag_shrink_wrap = false;
3310 /* BPABI targets use linker tricks to allow interworking on cores
3311 without thumb support. */
3312 if (TARGET_INTERWORK
3313 && !TARGET_BPABI
3314 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3316 warning (0, "target CPU does not support interworking" );
3317 target_flags &= ~MASK_INTERWORK;
3320 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3322 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3323 target_flags |= MASK_APCS_FRAME;
3326 if (TARGET_POKE_FUNCTION_NAME)
3327 target_flags |= MASK_APCS_FRAME;
3329 if (TARGET_APCS_REENT && flag_pic)
3330 error ("-fpic and -mapcs-reent are incompatible");
3332 if (TARGET_APCS_REENT)
3333 warning (0, "APCS reentrant code not supported. Ignored");
3335 /* Initialize boolean versions of the architectural flags, for use
3336 in the arm.md file. */
3337 arm_arch3m = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv3m);
3338 arm_arch4 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv4);
3339 arm_arch4t = arm_arch4 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3340 arm_arch5 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv5);
3341 arm_arch5e = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv5e);
3342 arm_arch5te = arm_arch5e
3343 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3344 arm_arch6 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv6);
3345 arm_arch6k = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv6k);
3346 arm_arch_notm = bitmap_bit_p (arm_active_target.isa, isa_bit_notm);
3347 arm_arch6m = arm_arch6 && !arm_arch_notm;
3348 arm_arch7 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv7);
3349 arm_arch7em = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv7em);
3350 arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv8);
3351 arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv8_1);
3352 arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv8_2);
3353 arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3354 arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
3355 arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
3356 arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt);
3357 arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2);
3358 arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv);
3359 arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
3360 arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
3361 arm_arch_cmse = bitmap_bit_p (arm_active_target.isa, isa_bit_cmse);
3362 arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16);
3363 arm_arch7ve
3364 = (arm_arch6k && arm_arch7 && arm_arch_thumb_hwdiv && arm_arch_arm_hwdiv);
3365 if (arm_fp16_inst)
3367 if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
3368 error ("selected fp16 options are incompatible.");
3369 arm_fp16_format = ARM_FP16_FORMAT_IEEE;
3373 /* Set up some tuning parameters. */
3374 arm_ld_sched = (tune_flags & TF_LDSCHED) != 0;
3375 arm_tune_strongarm = (tune_flags & TF_STRONG) != 0;
3376 arm_tune_wbuf = (tune_flags & TF_WBUF) != 0;
3377 arm_tune_xscale = (tune_flags & TF_XSCALE) != 0;
3378 arm_tune_cortex_a9 = (arm_tune == TARGET_CPU_cortexa9) != 0;
3379 arm_m_profile_small_mul = (tune_flags & TF_SMALLMUL) != 0;
3381 /* And finally, set up some quirks. */
3382 arm_arch_no_volatile_ce
3383 = bitmap_bit_p (arm_active_target.isa, isa_quirk_no_volatile_ce);
3384 arm_arch6kz
3385 = arm_arch6k && bitmap_bit_p (arm_active_target.isa, isa_quirk_ARMv6kz);
3387 /* V5 code we generate is completely interworking capable, so we turn off
3388 TARGET_INTERWORK here to avoid many tests later on. */
3390 /* XXX However, we must pass the right pre-processor defines to CPP
3391 or GLD can get confused. This is a hack. */
3392 if (TARGET_INTERWORK)
3393 arm_cpp_interwork = 1;
3395 if (arm_arch5)
3396 target_flags &= ~MASK_INTERWORK;
3398 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3399 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3401 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3402 error ("iwmmxt abi requires an iwmmxt capable cpu");
3404 /* If soft-float is specified then don't use FPU. */
3405 if (TARGET_SOFT_FLOAT)
3406 arm_fpu_attr = FPU_NONE;
3407 else
3408 arm_fpu_attr = FPU_VFP;
3410 if (TARGET_AAPCS_BASED)
3412 if (TARGET_CALLER_INTERWORKING)
3413 error ("AAPCS does not support -mcaller-super-interworking");
3414 else
3415 if (TARGET_CALLEE_INTERWORKING)
3416 error ("AAPCS does not support -mcallee-super-interworking");
3419 /* __fp16 support currently assumes the core has ldrh. */
3420 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3421 sorry ("__fp16 and no ldrh");
3423 if (TARGET_AAPCS_BASED)
3425 if (arm_abi == ARM_ABI_IWMMXT)
3426 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3427 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
3428 && TARGET_HARD_FLOAT)
3430 arm_pcs_default = ARM_PCS_AAPCS_VFP;
3431 if (!bitmap_bit_p (arm_active_target.isa, isa_bit_VFPv2))
3432 error ("-mfloat-abi=hard: selected processor lacks an FPU");
3434 else
3435 arm_pcs_default = ARM_PCS_AAPCS;
3437 else
3439 if (arm_float_abi == ARM_FLOAT_ABI_HARD)
3440 sorry ("-mfloat-abi=hard and VFP");
3442 if (arm_abi == ARM_ABI_APCS)
3443 arm_pcs_default = ARM_PCS_APCS;
3444 else
3445 arm_pcs_default = ARM_PCS_ATPCS;
3448 /* For arm2/3 there is no need to do any scheduling if we are doing
3449 software floating-point. */
3450 if (TARGET_SOFT_FLOAT && (tune_flags & TF_NO_MODE32))
3451 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3453 /* Use the cp15 method if it is available. */
3454 if (target_thread_pointer == TP_AUTO)
3456 if (arm_arch6k && !TARGET_THUMB1)
3457 target_thread_pointer = TP_CP15;
3458 else
3459 target_thread_pointer = TP_SOFT;
3462 /* Override the default structure alignment for AAPCS ABI. */
3463 if (!global_options_set.x_arm_structure_size_boundary)
3465 if (TARGET_AAPCS_BASED)
3466 arm_structure_size_boundary = 8;
3468 else
3470 if (arm_structure_size_boundary != 8
3471 && arm_structure_size_boundary != 32
3472 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3474 if (ARM_DOUBLEWORD_ALIGN)
3475 warning (0,
3476 "structure size boundary can only be set to 8, 32 or 64");
3477 else
3478 warning (0, "structure size boundary can only be set to 8 or 32");
3479 arm_structure_size_boundary
3480 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3484 if (TARGET_VXWORKS_RTP)
3486 if (!global_options_set.x_arm_pic_data_is_text_relative)
3487 arm_pic_data_is_text_relative = 0;
3489 else if (flag_pic
3490 && !arm_pic_data_is_text_relative
3491 && !(global_options_set.x_target_flags & MASK_SINGLE_PIC_BASE))
3492 /* When text & data segments don't have a fixed displacement, the
3493 intended use is with a single, read only, pic base register.
3494 Unless the user explicitly requested not to do that, set
3495 it. */
3496 target_flags |= MASK_SINGLE_PIC_BASE;
3498 /* If stack checking is disabled, we can use r10 as the PIC register,
3499 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3500 if (flag_pic && TARGET_SINGLE_PIC_BASE)
3502 if (TARGET_VXWORKS_RTP)
3503 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3504 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3507 if (flag_pic && TARGET_VXWORKS_RTP)
3508 arm_pic_register = 9;
3510 if (arm_pic_register_string != NULL)
3512 int pic_register = decode_reg_name (arm_pic_register_string);
3514 if (!flag_pic)
3515 warning (0, "-mpic-register= is useless without -fpic");
3517 /* Prevent the user from choosing an obviously stupid PIC register. */
3518 else if (pic_register < 0 || call_used_regs[pic_register]
3519 || pic_register == HARD_FRAME_POINTER_REGNUM
3520 || pic_register == STACK_POINTER_REGNUM
3521 || pic_register >= PC_REGNUM
3522 || (TARGET_VXWORKS_RTP
3523 && (unsigned int) pic_register != arm_pic_register))
3524 error ("unable to use '%s' for PIC register", arm_pic_register_string);
3525 else
3526 arm_pic_register = pic_register;
3529 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3530 if (fix_cm3_ldrd == 2)
3532 if (bitmap_bit_p (arm_active_target.isa, isa_quirk_cm3_ldrd))
3533 fix_cm3_ldrd = 1;
3534 else
3535 fix_cm3_ldrd = 0;
3538 /* Hot/Cold partitioning is not currently supported, since we can't
3539 handle literal pool placement in that case. */
3540 if (flag_reorder_blocks_and_partition)
3542 inform (input_location,
3543 "-freorder-blocks-and-partition not supported on this architecture");
3544 flag_reorder_blocks_and_partition = 0;
3545 flag_reorder_blocks = 1;
3548 if (flag_pic)
3549 /* Hoisting PIC address calculations more aggressively provides a small,
3550 but measurable, size reduction for PIC code. Therefore, we decrease
3551 the bar for unrestricted expression hoisting to the cost of PIC address
3552 calculation, which is 2 instructions. */
3553 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3554 global_options.x_param_values,
3555 global_options_set.x_param_values);
3557 /* ARM EABI defaults to strict volatile bitfields. */
3558 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3559 && abi_version_at_least(2))
3560 flag_strict_volatile_bitfields = 1;
3562 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3563 have deemed it beneficial (signified by setting
3564 prefetch.num_slots to 1 or more). */
3565 if (flag_prefetch_loop_arrays < 0
3566 && HAVE_prefetch
3567 && optimize >= 3
3568 && current_tune->prefetch.num_slots > 0)
3569 flag_prefetch_loop_arrays = 1;
3571 /* Set up parameters to be used in prefetching algorithm. Do not
3572 override the defaults unless we are tuning for a core we have
3573 researched values for. */
3574 if (current_tune->prefetch.num_slots > 0)
3575 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3576 current_tune->prefetch.num_slots,
3577 global_options.x_param_values,
3578 global_options_set.x_param_values);
3579 if (current_tune->prefetch.l1_cache_line_size >= 0)
3580 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3581 current_tune->prefetch.l1_cache_line_size,
3582 global_options.x_param_values,
3583 global_options_set.x_param_values);
3584 if (current_tune->prefetch.l1_cache_size >= 0)
3585 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3586 current_tune->prefetch.l1_cache_size,
3587 global_options.x_param_values,
3588 global_options_set.x_param_values);
3590 /* Use Neon to perform 64-bits operations rather than core
3591 registers. */
3592 prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3593 if (use_neon_for_64bits == 1)
3594 prefer_neon_for_64bits = true;
3596 /* Use the alternative scheduling-pressure algorithm by default. */
3597 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3598 global_options.x_param_values,
3599 global_options_set.x_param_values);
3601 /* Look through ready list and all of queue for instructions
3602 relevant for L2 auto-prefetcher. */
3603 int param_sched_autopref_queue_depth;
3605 switch (current_tune->sched_autopref)
3607 case tune_params::SCHED_AUTOPREF_OFF:
3608 param_sched_autopref_queue_depth = -1;
3609 break;
3611 case tune_params::SCHED_AUTOPREF_RANK:
3612 param_sched_autopref_queue_depth = 0;
3613 break;
3615 case tune_params::SCHED_AUTOPREF_FULL:
3616 param_sched_autopref_queue_depth = max_insn_queue_index + 1;
3617 break;
3619 default:
3620 gcc_unreachable ();
3623 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
3624 param_sched_autopref_queue_depth,
3625 global_options.x_param_values,
3626 global_options_set.x_param_values);
3628 /* Currently, for slow flash data, we just disable literal pools. We also
3629 disable it for pure-code. */
3630 if (target_slow_flash_data || target_pure_code)
3631 arm_disable_literal_pool = true;
3633 if (use_cmse && !arm_arch_cmse)
3634 error ("target CPU does not support ARMv8-M Security Extensions");
3636 /* Disable scheduling fusion by default if it's not armv7 processor
3637 or doesn't prefer ldrd/strd. */
3638 if (flag_schedule_fusion == 2
3639 && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3640 flag_schedule_fusion = 0;
3642 /* Need to remember initial options before they are overriden. */
3643 init_optimize = build_optimization_node (&global_options);
3645 arm_option_override_internal (&global_options, &global_options_set);
3646 arm_option_check_internal (&global_options);
3647 arm_option_params_internal ();
3649 /* Resynchronize the saved target options. */
3650 cl_target_option_save (TREE_TARGET_OPTION (target_option_default_node),
3651 &global_options);
3652 /* Register global variables with the garbage collector. */
3653 arm_add_gc_roots ();
3655 /* Init initial mode for testing. */
3656 thumb_flipper = TARGET_THUMB;
3659 static void
3660 arm_add_gc_roots (void)
3662 gcc_obstack_init(&minipool_obstack);
3663 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3666 /* A table of known ARM exception types.
3667 For use with the interrupt function attribute. */
3669 typedef struct
3671 const char *const arg;
3672 const unsigned long return_value;
3674 isr_attribute_arg;
3676 static const isr_attribute_arg isr_attribute_args [] =
3678 { "IRQ", ARM_FT_ISR },
3679 { "irq", ARM_FT_ISR },
3680 { "FIQ", ARM_FT_FIQ },
3681 { "fiq", ARM_FT_FIQ },
3682 { "ABORT", ARM_FT_ISR },
3683 { "abort", ARM_FT_ISR },
3684 { "ABORT", ARM_FT_ISR },
3685 { "abort", ARM_FT_ISR },
3686 { "UNDEF", ARM_FT_EXCEPTION },
3687 { "undef", ARM_FT_EXCEPTION },
3688 { "SWI", ARM_FT_EXCEPTION },
3689 { "swi", ARM_FT_EXCEPTION },
3690 { NULL, ARM_FT_NORMAL }
3693 /* Returns the (interrupt) function type of the current
3694 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3696 static unsigned long
3697 arm_isr_value (tree argument)
3699 const isr_attribute_arg * ptr;
3700 const char * arg;
3702 if (!arm_arch_notm)
3703 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3705 /* No argument - default to IRQ. */
3706 if (argument == NULL_TREE)
3707 return ARM_FT_ISR;
3709 /* Get the value of the argument. */
3710 if (TREE_VALUE (argument) == NULL_TREE
3711 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3712 return ARM_FT_UNKNOWN;
3714 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3716 /* Check it against the list of known arguments. */
3717 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3718 if (streq (arg, ptr->arg))
3719 return ptr->return_value;
3721 /* An unrecognized interrupt type. */
3722 return ARM_FT_UNKNOWN;
3725 /* Computes the type of the current function. */
3727 static unsigned long
3728 arm_compute_func_type (void)
3730 unsigned long type = ARM_FT_UNKNOWN;
3731 tree a;
3732 tree attr;
3734 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3736 /* Decide if the current function is volatile. Such functions
3737 never return, and many memory cycles can be saved by not storing
3738 register values that will never be needed again. This optimization
3739 was added to speed up context switching in a kernel application. */
3740 if (optimize > 0
3741 && (TREE_NOTHROW (current_function_decl)
3742 || !(flag_unwind_tables
3743 || (flag_exceptions
3744 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3745 && TREE_THIS_VOLATILE (current_function_decl))
3746 type |= ARM_FT_VOLATILE;
3748 if (cfun->static_chain_decl != NULL)
3749 type |= ARM_FT_NESTED;
3751 attr = DECL_ATTRIBUTES (current_function_decl);
3753 a = lookup_attribute ("naked", attr);
3754 if (a != NULL_TREE)
3755 type |= ARM_FT_NAKED;
3757 a = lookup_attribute ("isr", attr);
3758 if (a == NULL_TREE)
3759 a = lookup_attribute ("interrupt", attr);
3761 if (a == NULL_TREE)
3762 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3763 else
3764 type |= arm_isr_value (TREE_VALUE (a));
3766 if (lookup_attribute ("cmse_nonsecure_entry", attr))
3767 type |= ARM_FT_CMSE_ENTRY;
3769 return type;
3772 /* Returns the type of the current function. */
3774 unsigned long
3775 arm_current_func_type (void)
3777 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3778 cfun->machine->func_type = arm_compute_func_type ();
3780 return cfun->machine->func_type;
3783 bool
3784 arm_allocate_stack_slots_for_args (void)
3786 /* Naked functions should not allocate stack slots for arguments. */
3787 return !IS_NAKED (arm_current_func_type ());
3790 static bool
3791 arm_warn_func_return (tree decl)
3793 /* Naked functions are implemented entirely in assembly, including the
3794 return sequence, so suppress warnings about this. */
3795 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3799 /* Output assembler code for a block containing the constant parts
3800 of a trampoline, leaving space for the variable parts.
3802 On the ARM, (if r8 is the static chain regnum, and remembering that
3803 referencing pc adds an offset of 8) the trampoline looks like:
3804 ldr r8, [pc, #0]
3805 ldr pc, [pc]
3806 .word static chain value
3807 .word function's address
3808 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3810 static void
3811 arm_asm_trampoline_template (FILE *f)
3813 fprintf (f, "\t.syntax unified\n");
3815 if (TARGET_ARM)
3817 fprintf (f, "\t.arm\n");
3818 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3819 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3821 else if (TARGET_THUMB2)
3823 fprintf (f, "\t.thumb\n");
3824 /* The Thumb-2 trampoline is similar to the arm implementation.
3825 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3826 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3827 STATIC_CHAIN_REGNUM, PC_REGNUM);
3828 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3830 else
3832 ASM_OUTPUT_ALIGN (f, 2);
3833 fprintf (f, "\t.code\t16\n");
3834 fprintf (f, ".Ltrampoline_start:\n");
3835 asm_fprintf (f, "\tpush\t{r0, r1}\n");
3836 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3837 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3838 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3839 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3840 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3842 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3843 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3846 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3848 static void
3849 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3851 rtx fnaddr, mem, a_tramp;
3853 emit_block_move (m_tramp, assemble_trampoline_template (),
3854 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3856 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3857 emit_move_insn (mem, chain_value);
3859 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3860 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3861 emit_move_insn (mem, fnaddr);
3863 a_tramp = XEXP (m_tramp, 0);
3864 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3865 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
3866 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3869 /* Thumb trampolines should be entered in thumb mode, so set
3870 the bottom bit of the address. */
3872 static rtx
3873 arm_trampoline_adjust_address (rtx addr)
3875 if (TARGET_THUMB)
3876 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3877 NULL, 0, OPTAB_LIB_WIDEN);
3878 return addr;
3881 /* Return 1 if it is possible to return using a single instruction.
3882 If SIBLING is non-null, this is a test for a return before a sibling
3883 call. SIBLING is the call insn, so we can examine its register usage. */
3886 use_return_insn (int iscond, rtx sibling)
3888 int regno;
3889 unsigned int func_type;
3890 unsigned long saved_int_regs;
3891 unsigned HOST_WIDE_INT stack_adjust;
3892 arm_stack_offsets *offsets;
3894 /* Never use a return instruction before reload has run. */
3895 if (!reload_completed)
3896 return 0;
3898 func_type = arm_current_func_type ();
3900 /* Naked, volatile and stack alignment functions need special
3901 consideration. */
3902 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3903 return 0;
3905 /* So do interrupt functions that use the frame pointer and Thumb
3906 interrupt functions. */
3907 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3908 return 0;
3910 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3911 && !optimize_function_for_size_p (cfun))
3912 return 0;
3914 offsets = arm_get_frame_offsets ();
3915 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3917 /* As do variadic functions. */
3918 if (crtl->args.pretend_args_size
3919 || cfun->machine->uses_anonymous_args
3920 /* Or if the function calls __builtin_eh_return () */
3921 || crtl->calls_eh_return
3922 /* Or if the function calls alloca */
3923 || cfun->calls_alloca
3924 /* Or if there is a stack adjustment. However, if the stack pointer
3925 is saved on the stack, we can use a pre-incrementing stack load. */
3926 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3927 && stack_adjust == 4))
3928 /* Or if the static chain register was saved above the frame, under the
3929 assumption that the stack pointer isn't saved on the stack. */
3930 || (!(TARGET_APCS_FRAME && frame_pointer_needed)
3931 && arm_compute_static_chain_stack_bytes() != 0))
3932 return 0;
3934 saved_int_regs = offsets->saved_regs_mask;
3936 /* Unfortunately, the insn
3938 ldmib sp, {..., sp, ...}
3940 triggers a bug on most SA-110 based devices, such that the stack
3941 pointer won't be correctly restored if the instruction takes a
3942 page fault. We work around this problem by popping r3 along with
3943 the other registers, since that is never slower than executing
3944 another instruction.
3946 We test for !arm_arch5 here, because code for any architecture
3947 less than this could potentially be run on one of the buggy
3948 chips. */
3949 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3951 /* Validate that r3 is a call-clobbered register (always true in
3952 the default abi) ... */
3953 if (!call_used_regs[3])
3954 return 0;
3956 /* ... that it isn't being used for a return value ... */
3957 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
3958 return 0;
3960 /* ... or for a tail-call argument ... */
3961 if (sibling)
3963 gcc_assert (CALL_P (sibling));
3965 if (find_regno_fusage (sibling, USE, 3))
3966 return 0;
3969 /* ... and that there are no call-saved registers in r0-r2
3970 (always true in the default ABI). */
3971 if (saved_int_regs & 0x7)
3972 return 0;
3975 /* Can't be done if interworking with Thumb, and any registers have been
3976 stacked. */
3977 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
3978 return 0;
3980 /* On StrongARM, conditional returns are expensive if they aren't
3981 taken and multiple registers have been stacked. */
3982 if (iscond && arm_tune_strongarm)
3984 /* Conditional return when just the LR is stored is a simple
3985 conditional-load instruction, that's not expensive. */
3986 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
3987 return 0;
3989 if (flag_pic
3990 && arm_pic_register != INVALID_REGNUM
3991 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
3992 return 0;
3995 /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
3996 several instructions if anything needs to be popped. */
3997 if (saved_int_regs && IS_CMSE_ENTRY (func_type))
3998 return 0;
4000 /* If there are saved registers but the LR isn't saved, then we need
4001 two instructions for the return. */
4002 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
4003 return 0;
4005 /* Can't be done if any of the VFP regs are pushed,
4006 since this also requires an insn. */
4007 if (TARGET_HARD_FLOAT)
4008 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
4009 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
4010 return 0;
4012 if (TARGET_REALLY_IWMMXT)
4013 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
4014 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
4015 return 0;
4017 return 1;
4020 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4021 shrink-wrapping if possible. This is the case if we need to emit a
4022 prologue, which we can test by looking at the offsets. */
4023 bool
4024 use_simple_return_p (void)
4026 arm_stack_offsets *offsets;
4028 offsets = arm_get_frame_offsets ();
4029 return offsets->outgoing_args != 0;
4032 /* Return TRUE if int I is a valid immediate ARM constant. */
4035 const_ok_for_arm (HOST_WIDE_INT i)
4037 int lowbit;
4039 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4040 be all zero, or all one. */
4041 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
4042 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
4043 != ((~(unsigned HOST_WIDE_INT) 0)
4044 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
4045 return FALSE;
4047 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
4049 /* Fast return for 0 and small values. We must do this for zero, since
4050 the code below can't handle that one case. */
4051 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
4052 return TRUE;
4054 /* Get the number of trailing zeros. */
4055 lowbit = ffs((int) i) - 1;
4057 /* Only even shifts are allowed in ARM mode so round down to the
4058 nearest even number. */
4059 if (TARGET_ARM)
4060 lowbit &= ~1;
4062 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
4063 return TRUE;
4065 if (TARGET_ARM)
4067 /* Allow rotated constants in ARM mode. */
4068 if (lowbit <= 4
4069 && ((i & ~0xc000003f) == 0
4070 || (i & ~0xf000000f) == 0
4071 || (i & ~0xfc000003) == 0))
4072 return TRUE;
4074 else
4076 HOST_WIDE_INT v;
4078 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
4079 v = i & 0xff;
4080 v |= v << 16;
4081 if (i == v || i == (v | (v << 8)))
4082 return TRUE;
4084 /* Allow repeated pattern 0xXY00XY00. */
4085 v = i & 0xff00;
4086 v |= v << 16;
4087 if (i == v)
4088 return TRUE;
4091 return FALSE;
4094 /* Return true if I is a valid constant for the operation CODE. */
4096 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
4098 if (const_ok_for_arm (i))
4099 return 1;
4101 switch (code)
4103 case SET:
4104 /* See if we can use movw. */
4105 if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
4106 return 1;
4107 else
4108 /* Otherwise, try mvn. */
4109 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4111 case PLUS:
4112 /* See if we can use addw or subw. */
4113 if (TARGET_THUMB2
4114 && ((i & 0xfffff000) == 0
4115 || ((-i) & 0xfffff000) == 0))
4116 return 1;
4117 /* Fall through. */
4118 case COMPARE:
4119 case EQ:
4120 case NE:
4121 case GT:
4122 case LE:
4123 case LT:
4124 case GE:
4125 case GEU:
4126 case LTU:
4127 case GTU:
4128 case LEU:
4129 case UNORDERED:
4130 case ORDERED:
4131 case UNEQ:
4132 case UNGE:
4133 case UNLT:
4134 case UNGT:
4135 case UNLE:
4136 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
4138 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
4139 case XOR:
4140 return 0;
4142 case IOR:
4143 if (TARGET_THUMB2)
4144 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4145 return 0;
4147 case AND:
4148 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4150 default:
4151 gcc_unreachable ();
4155 /* Return true if I is a valid di mode constant for the operation CODE. */
4157 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4159 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4160 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4161 rtx hi = GEN_INT (hi_val);
4162 rtx lo = GEN_INT (lo_val);
4164 if (TARGET_THUMB1)
4165 return 0;
4167 switch (code)
4169 case AND:
4170 case IOR:
4171 case XOR:
4172 return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
4173 && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
4174 case PLUS:
4175 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4177 default:
4178 return 0;
4182 /* Emit a sequence of insns to handle a large constant.
4183 CODE is the code of the operation required, it can be any of SET, PLUS,
4184 IOR, AND, XOR, MINUS;
4185 MODE is the mode in which the operation is being performed;
4186 VAL is the integer to operate on;
4187 SOURCE is the other operand (a register, or a null-pointer for SET);
4188 SUBTARGETS means it is safe to create scratch registers if that will
4189 either produce a simpler sequence, or we will want to cse the values.
4190 Return value is the number of insns emitted. */
4192 /* ??? Tweak this for thumb2. */
4194 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4195 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4197 rtx cond;
4199 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4200 cond = COND_EXEC_TEST (PATTERN (insn));
4201 else
4202 cond = NULL_RTX;
4204 if (subtargets || code == SET
4205 || (REG_P (target) && REG_P (source)
4206 && REGNO (target) != REGNO (source)))
4208 /* After arm_reorg has been called, we can't fix up expensive
4209 constants by pushing them into memory so we must synthesize
4210 them in-line, regardless of the cost. This is only likely to
4211 be more costly on chips that have load delay slots and we are
4212 compiling without running the scheduler (so no splitting
4213 occurred before the final instruction emission).
4215 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4217 if (!cfun->machine->after_arm_reorg
4218 && !cond
4219 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4220 1, 0)
4221 > (arm_constant_limit (optimize_function_for_size_p (cfun))
4222 + (code != SET))))
4224 if (code == SET)
4226 /* Currently SET is the only monadic value for CODE, all
4227 the rest are diadic. */
4228 if (TARGET_USE_MOVT)
4229 arm_emit_movpair (target, GEN_INT (val));
4230 else
4231 emit_set_insn (target, GEN_INT (val));
4233 return 1;
4235 else
4237 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4239 if (TARGET_USE_MOVT)
4240 arm_emit_movpair (temp, GEN_INT (val));
4241 else
4242 emit_set_insn (temp, GEN_INT (val));
4244 /* For MINUS, the value is subtracted from, since we never
4245 have subtraction of a constant. */
4246 if (code == MINUS)
4247 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4248 else
4249 emit_set_insn (target,
4250 gen_rtx_fmt_ee (code, mode, source, temp));
4251 return 2;
4256 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4260 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4261 ARM/THUMB2 immediates, and add up to VAL.
4262 Thr function return value gives the number of insns required. */
4263 static int
4264 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4265 struct four_ints *return_sequence)
4267 int best_consecutive_zeros = 0;
4268 int i;
4269 int best_start = 0;
4270 int insns1, insns2;
4271 struct four_ints tmp_sequence;
4273 /* If we aren't targeting ARM, the best place to start is always at
4274 the bottom, otherwise look more closely. */
4275 if (TARGET_ARM)
4277 for (i = 0; i < 32; i += 2)
4279 int consecutive_zeros = 0;
4281 if (!(val & (3 << i)))
4283 while ((i < 32) && !(val & (3 << i)))
4285 consecutive_zeros += 2;
4286 i += 2;
4288 if (consecutive_zeros > best_consecutive_zeros)
4290 best_consecutive_zeros = consecutive_zeros;
4291 best_start = i - consecutive_zeros;
4293 i -= 2;
4298 /* So long as it won't require any more insns to do so, it's
4299 desirable to emit a small constant (in bits 0...9) in the last
4300 insn. This way there is more chance that it can be combined with
4301 a later addressing insn to form a pre-indexed load or store
4302 operation. Consider:
4304 *((volatile int *)0xe0000100) = 1;
4305 *((volatile int *)0xe0000110) = 2;
4307 We want this to wind up as:
4309 mov rA, #0xe0000000
4310 mov rB, #1
4311 str rB, [rA, #0x100]
4312 mov rB, #2
4313 str rB, [rA, #0x110]
4315 rather than having to synthesize both large constants from scratch.
4317 Therefore, we calculate how many insns would be required to emit
4318 the constant starting from `best_start', and also starting from
4319 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4320 yield a shorter sequence, we may as well use zero. */
4321 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4322 if (best_start != 0
4323 && ((HOST_WIDE_INT_1U << best_start) < val))
4325 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4326 if (insns2 <= insns1)
4328 *return_sequence = tmp_sequence;
4329 insns1 = insns2;
4333 return insns1;
4336 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4337 static int
4338 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4339 struct four_ints *return_sequence, int i)
4341 int remainder = val & 0xffffffff;
4342 int insns = 0;
4344 /* Try and find a way of doing the job in either two or three
4345 instructions.
4347 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4348 location. We start at position I. This may be the MSB, or
4349 optimial_immediate_sequence may have positioned it at the largest block
4350 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4351 wrapping around to the top of the word when we drop off the bottom.
4352 In the worst case this code should produce no more than four insns.
4354 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4355 constants, shifted to any arbitrary location. We should always start
4356 at the MSB. */
4359 int end;
4360 unsigned int b1, b2, b3, b4;
4361 unsigned HOST_WIDE_INT result;
4362 int loc;
4364 gcc_assert (insns < 4);
4366 if (i <= 0)
4367 i += 32;
4369 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4370 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4372 loc = i;
4373 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4374 /* We can use addw/subw for the last 12 bits. */
4375 result = remainder;
4376 else
4378 /* Use an 8-bit shifted/rotated immediate. */
4379 end = i - 8;
4380 if (end < 0)
4381 end += 32;
4382 result = remainder & ((0x0ff << end)
4383 | ((i < end) ? (0xff >> (32 - end))
4384 : 0));
4385 i -= 8;
4388 else
4390 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4391 arbitrary shifts. */
4392 i -= TARGET_ARM ? 2 : 1;
4393 continue;
4396 /* Next, see if we can do a better job with a thumb2 replicated
4397 constant.
4399 We do it this way around to catch the cases like 0x01F001E0 where
4400 two 8-bit immediates would work, but a replicated constant would
4401 make it worse.
4403 TODO: 16-bit constants that don't clear all the bits, but still win.
4404 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4405 if (TARGET_THUMB2)
4407 b1 = (remainder & 0xff000000) >> 24;
4408 b2 = (remainder & 0x00ff0000) >> 16;
4409 b3 = (remainder & 0x0000ff00) >> 8;
4410 b4 = remainder & 0xff;
4412 if (loc > 24)
4414 /* The 8-bit immediate already found clears b1 (and maybe b2),
4415 but must leave b3 and b4 alone. */
4417 /* First try to find a 32-bit replicated constant that clears
4418 almost everything. We can assume that we can't do it in one,
4419 or else we wouldn't be here. */
4420 unsigned int tmp = b1 & b2 & b3 & b4;
4421 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4422 + (tmp << 24);
4423 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4424 + (tmp == b3) + (tmp == b4);
4425 if (tmp
4426 && (matching_bytes >= 3
4427 || (matching_bytes == 2
4428 && const_ok_for_op (remainder & ~tmp2, code))))
4430 /* At least 3 of the bytes match, and the fourth has at
4431 least as many bits set, or two of the bytes match
4432 and it will only require one more insn to finish. */
4433 result = tmp2;
4434 i = tmp != b1 ? 32
4435 : tmp != b2 ? 24
4436 : tmp != b3 ? 16
4437 : 8;
4440 /* Second, try to find a 16-bit replicated constant that can
4441 leave three of the bytes clear. If b2 or b4 is already
4442 zero, then we can. If the 8-bit from above would not
4443 clear b2 anyway, then we still win. */
4444 else if (b1 == b3 && (!b2 || !b4
4445 || (remainder & 0x00ff0000 & ~result)))
4447 result = remainder & 0xff00ff00;
4448 i = 24;
4451 else if (loc > 16)
4453 /* The 8-bit immediate already found clears b2 (and maybe b3)
4454 and we don't get here unless b1 is alredy clear, but it will
4455 leave b4 unchanged. */
4457 /* If we can clear b2 and b4 at once, then we win, since the
4458 8-bits couldn't possibly reach that far. */
4459 if (b2 == b4)
4461 result = remainder & 0x00ff00ff;
4462 i = 16;
4467 return_sequence->i[insns++] = result;
4468 remainder &= ~result;
4470 if (code == SET || code == MINUS)
4471 code = PLUS;
4473 while (remainder);
4475 return insns;
4478 /* Emit an instruction with the indicated PATTERN. If COND is
4479 non-NULL, conditionalize the execution of the instruction on COND
4480 being true. */
4482 static void
4483 emit_constant_insn (rtx cond, rtx pattern)
4485 if (cond)
4486 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4487 emit_insn (pattern);
4490 /* As above, but extra parameter GENERATE which, if clear, suppresses
4491 RTL generation. */
4493 static int
4494 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4495 unsigned HOST_WIDE_INT val, rtx target, rtx source,
4496 int subtargets, int generate)
4498 int can_invert = 0;
4499 int can_negate = 0;
4500 int final_invert = 0;
4501 int i;
4502 int set_sign_bit_copies = 0;
4503 int clear_sign_bit_copies = 0;
4504 int clear_zero_bit_copies = 0;
4505 int set_zero_bit_copies = 0;
4506 int insns = 0, neg_insns, inv_insns;
4507 unsigned HOST_WIDE_INT temp1, temp2;
4508 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4509 struct four_ints *immediates;
4510 struct four_ints pos_immediates, neg_immediates, inv_immediates;
4512 /* Find out which operations are safe for a given CODE. Also do a quick
4513 check for degenerate cases; these can occur when DImode operations
4514 are split. */
4515 switch (code)
4517 case SET:
4518 can_invert = 1;
4519 break;
4521 case PLUS:
4522 can_negate = 1;
4523 break;
4525 case IOR:
4526 if (remainder == 0xffffffff)
4528 if (generate)
4529 emit_constant_insn (cond,
4530 gen_rtx_SET (target,
4531 GEN_INT (ARM_SIGN_EXTEND (val))));
4532 return 1;
4535 if (remainder == 0)
4537 if (reload_completed && rtx_equal_p (target, source))
4538 return 0;
4540 if (generate)
4541 emit_constant_insn (cond, gen_rtx_SET (target, source));
4542 return 1;
4544 break;
4546 case AND:
4547 if (remainder == 0)
4549 if (generate)
4550 emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
4551 return 1;
4553 if (remainder == 0xffffffff)
4555 if (reload_completed && rtx_equal_p (target, source))
4556 return 0;
4557 if (generate)
4558 emit_constant_insn (cond, gen_rtx_SET (target, source));
4559 return 1;
4561 can_invert = 1;
4562 break;
4564 case XOR:
4565 if (remainder == 0)
4567 if (reload_completed && rtx_equal_p (target, source))
4568 return 0;
4569 if (generate)
4570 emit_constant_insn (cond, gen_rtx_SET (target, source));
4571 return 1;
4574 if (remainder == 0xffffffff)
4576 if (generate)
4577 emit_constant_insn (cond,
4578 gen_rtx_SET (target,
4579 gen_rtx_NOT (mode, source)));
4580 return 1;
4582 final_invert = 1;
4583 break;
4585 case MINUS:
4586 /* We treat MINUS as (val - source), since (source - val) is always
4587 passed as (source + (-val)). */
4588 if (remainder == 0)
4590 if (generate)
4591 emit_constant_insn (cond,
4592 gen_rtx_SET (target,
4593 gen_rtx_NEG (mode, source)));
4594 return 1;
4596 if (const_ok_for_arm (val))
4598 if (generate)
4599 emit_constant_insn (cond,
4600 gen_rtx_SET (target,
4601 gen_rtx_MINUS (mode, GEN_INT (val),
4602 source)));
4603 return 1;
4606 break;
4608 default:
4609 gcc_unreachable ();
4612 /* If we can do it in one insn get out quickly. */
4613 if (const_ok_for_op (val, code))
4615 if (generate)
4616 emit_constant_insn (cond,
4617 gen_rtx_SET (target,
4618 (source
4619 ? gen_rtx_fmt_ee (code, mode, source,
4620 GEN_INT (val))
4621 : GEN_INT (val))));
4622 return 1;
4625 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4626 insn. */
4627 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4628 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4630 if (generate)
4632 if (mode == SImode && i == 16)
4633 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4634 smaller insn. */
4635 emit_constant_insn (cond,
4636 gen_zero_extendhisi2
4637 (target, gen_lowpart (HImode, source)));
4638 else
4639 /* Extz only supports SImode, but we can coerce the operands
4640 into that mode. */
4641 emit_constant_insn (cond,
4642 gen_extzv_t2 (gen_lowpart (SImode, target),
4643 gen_lowpart (SImode, source),
4644 GEN_INT (i), const0_rtx));
4647 return 1;
4650 /* Calculate a few attributes that may be useful for specific
4651 optimizations. */
4652 /* Count number of leading zeros. */
4653 for (i = 31; i >= 0; i--)
4655 if ((remainder & (1 << i)) == 0)
4656 clear_sign_bit_copies++;
4657 else
4658 break;
4661 /* Count number of leading 1's. */
4662 for (i = 31; i >= 0; i--)
4664 if ((remainder & (1 << i)) != 0)
4665 set_sign_bit_copies++;
4666 else
4667 break;
4670 /* Count number of trailing zero's. */
4671 for (i = 0; i <= 31; i++)
4673 if ((remainder & (1 << i)) == 0)
4674 clear_zero_bit_copies++;
4675 else
4676 break;
4679 /* Count number of trailing 1's. */
4680 for (i = 0; i <= 31; i++)
4682 if ((remainder & (1 << i)) != 0)
4683 set_zero_bit_copies++;
4684 else
4685 break;
4688 switch (code)
4690 case SET:
4691 /* See if we can do this by sign_extending a constant that is known
4692 to be negative. This is a good, way of doing it, since the shift
4693 may well merge into a subsequent insn. */
4694 if (set_sign_bit_copies > 1)
4696 if (const_ok_for_arm
4697 (temp1 = ARM_SIGN_EXTEND (remainder
4698 << (set_sign_bit_copies - 1))))
4700 if (generate)
4702 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4703 emit_constant_insn (cond,
4704 gen_rtx_SET (new_src, GEN_INT (temp1)));
4705 emit_constant_insn (cond,
4706 gen_ashrsi3 (target, new_src,
4707 GEN_INT (set_sign_bit_copies - 1)));
4709 return 2;
4711 /* For an inverted constant, we will need to set the low bits,
4712 these will be shifted out of harm's way. */
4713 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4714 if (const_ok_for_arm (~temp1))
4716 if (generate)
4718 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4719 emit_constant_insn (cond,
4720 gen_rtx_SET (new_src, GEN_INT (temp1)));
4721 emit_constant_insn (cond,
4722 gen_ashrsi3 (target, new_src,
4723 GEN_INT (set_sign_bit_copies - 1)));
4725 return 2;
4729 /* See if we can calculate the value as the difference between two
4730 valid immediates. */
4731 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4733 int topshift = clear_sign_bit_copies & ~1;
4735 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4736 & (0xff000000 >> topshift));
4738 /* If temp1 is zero, then that means the 9 most significant
4739 bits of remainder were 1 and we've caused it to overflow.
4740 When topshift is 0 we don't need to do anything since we
4741 can borrow from 'bit 32'. */
4742 if (temp1 == 0 && topshift != 0)
4743 temp1 = 0x80000000 >> (topshift - 1);
4745 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4747 if (const_ok_for_arm (temp2))
4749 if (generate)
4751 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4752 emit_constant_insn (cond,
4753 gen_rtx_SET (new_src, GEN_INT (temp1)));
4754 emit_constant_insn (cond,
4755 gen_addsi3 (target, new_src,
4756 GEN_INT (-temp2)));
4759 return 2;
4763 /* See if we can generate this by setting the bottom (or the top)
4764 16 bits, and then shifting these into the other half of the
4765 word. We only look for the simplest cases, to do more would cost
4766 too much. Be careful, however, not to generate this when the
4767 alternative would take fewer insns. */
4768 if (val & 0xffff0000)
4770 temp1 = remainder & 0xffff0000;
4771 temp2 = remainder & 0x0000ffff;
4773 /* Overlaps outside this range are best done using other methods. */
4774 for (i = 9; i < 24; i++)
4776 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4777 && !const_ok_for_arm (temp2))
4779 rtx new_src = (subtargets
4780 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4781 : target);
4782 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4783 source, subtargets, generate);
4784 source = new_src;
4785 if (generate)
4786 emit_constant_insn
4787 (cond,
4788 gen_rtx_SET
4789 (target,
4790 gen_rtx_IOR (mode,
4791 gen_rtx_ASHIFT (mode, source,
4792 GEN_INT (i)),
4793 source)));
4794 return insns + 1;
4798 /* Don't duplicate cases already considered. */
4799 for (i = 17; i < 24; i++)
4801 if (((temp1 | (temp1 >> i)) == remainder)
4802 && !const_ok_for_arm (temp1))
4804 rtx new_src = (subtargets
4805 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4806 : target);
4807 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4808 source, subtargets, generate);
4809 source = new_src;
4810 if (generate)
4811 emit_constant_insn
4812 (cond,
4813 gen_rtx_SET (target,
4814 gen_rtx_IOR
4815 (mode,
4816 gen_rtx_LSHIFTRT (mode, source,
4817 GEN_INT (i)),
4818 source)));
4819 return insns + 1;
4823 break;
4825 case IOR:
4826 case XOR:
4827 /* If we have IOR or XOR, and the constant can be loaded in a
4828 single instruction, and we can find a temporary to put it in,
4829 then this can be done in two instructions instead of 3-4. */
4830 if (subtargets
4831 /* TARGET can't be NULL if SUBTARGETS is 0 */
4832 || (reload_completed && !reg_mentioned_p (target, source)))
4834 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4836 if (generate)
4838 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4840 emit_constant_insn (cond,
4841 gen_rtx_SET (sub, GEN_INT (val)));
4842 emit_constant_insn (cond,
4843 gen_rtx_SET (target,
4844 gen_rtx_fmt_ee (code, mode,
4845 source, sub)));
4847 return 2;
4851 if (code == XOR)
4852 break;
4854 /* Convert.
4855 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4856 and the remainder 0s for e.g. 0xfff00000)
4857 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4859 This can be done in 2 instructions by using shifts with mov or mvn.
4860 e.g. for
4861 x = x | 0xfff00000;
4862 we generate.
4863 mvn r0, r0, asl #12
4864 mvn r0, r0, lsr #12 */
4865 if (set_sign_bit_copies > 8
4866 && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
4868 if (generate)
4870 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4871 rtx shift = GEN_INT (set_sign_bit_copies);
4873 emit_constant_insn
4874 (cond,
4875 gen_rtx_SET (sub,
4876 gen_rtx_NOT (mode,
4877 gen_rtx_ASHIFT (mode,
4878 source,
4879 shift))));
4880 emit_constant_insn
4881 (cond,
4882 gen_rtx_SET (target,
4883 gen_rtx_NOT (mode,
4884 gen_rtx_LSHIFTRT (mode, sub,
4885 shift))));
4887 return 2;
4890 /* Convert
4891 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4893 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4895 For eg. r0 = r0 | 0xfff
4896 mvn r0, r0, lsr #12
4897 mvn r0, r0, asl #12
4900 if (set_zero_bit_copies > 8
4901 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4903 if (generate)
4905 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4906 rtx shift = GEN_INT (set_zero_bit_copies);
4908 emit_constant_insn
4909 (cond,
4910 gen_rtx_SET (sub,
4911 gen_rtx_NOT (mode,
4912 gen_rtx_LSHIFTRT (mode,
4913 source,
4914 shift))));
4915 emit_constant_insn
4916 (cond,
4917 gen_rtx_SET (target,
4918 gen_rtx_NOT (mode,
4919 gen_rtx_ASHIFT (mode, sub,
4920 shift))));
4922 return 2;
4925 /* This will never be reached for Thumb2 because orn is a valid
4926 instruction. This is for Thumb1 and the ARM 32 bit cases.
4928 x = y | constant (such that ~constant is a valid constant)
4929 Transform this to
4930 x = ~(~y & ~constant).
4932 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4934 if (generate)
4936 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4937 emit_constant_insn (cond,
4938 gen_rtx_SET (sub,
4939 gen_rtx_NOT (mode, source)));
4940 source = sub;
4941 if (subtargets)
4942 sub = gen_reg_rtx (mode);
4943 emit_constant_insn (cond,
4944 gen_rtx_SET (sub,
4945 gen_rtx_AND (mode, source,
4946 GEN_INT (temp1))));
4947 emit_constant_insn (cond,
4948 gen_rtx_SET (target,
4949 gen_rtx_NOT (mode, sub)));
4951 return 3;
4953 break;
4955 case AND:
4956 /* See if two shifts will do 2 or more insn's worth of work. */
4957 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
4959 HOST_WIDE_INT shift_mask = ((0xffffffff
4960 << (32 - clear_sign_bit_copies))
4961 & 0xffffffff);
4963 if ((remainder | shift_mask) != 0xffffffff)
4965 HOST_WIDE_INT new_val
4966 = ARM_SIGN_EXTEND (remainder | shift_mask);
4968 if (generate)
4970 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4971 insns = arm_gen_constant (AND, SImode, cond, new_val,
4972 new_src, source, subtargets, 1);
4973 source = new_src;
4975 else
4977 rtx targ = subtargets ? NULL_RTX : target;
4978 insns = arm_gen_constant (AND, mode, cond, new_val,
4979 targ, source, subtargets, 0);
4983 if (generate)
4985 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4986 rtx shift = GEN_INT (clear_sign_bit_copies);
4988 emit_insn (gen_ashlsi3 (new_src, source, shift));
4989 emit_insn (gen_lshrsi3 (target, new_src, shift));
4992 return insns + 2;
4995 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
4997 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
4999 if ((remainder | shift_mask) != 0xffffffff)
5001 HOST_WIDE_INT new_val
5002 = ARM_SIGN_EXTEND (remainder | shift_mask);
5003 if (generate)
5005 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5007 insns = arm_gen_constant (AND, mode, cond, new_val,
5008 new_src, source, subtargets, 1);
5009 source = new_src;
5011 else
5013 rtx targ = subtargets ? NULL_RTX : target;
5015 insns = arm_gen_constant (AND, mode, cond, new_val,
5016 targ, source, subtargets, 0);
5020 if (generate)
5022 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5023 rtx shift = GEN_INT (clear_zero_bit_copies);
5025 emit_insn (gen_lshrsi3 (new_src, source, shift));
5026 emit_insn (gen_ashlsi3 (target, new_src, shift));
5029 return insns + 2;
5032 break;
5034 default:
5035 break;
5038 /* Calculate what the instruction sequences would be if we generated it
5039 normally, negated, or inverted. */
5040 if (code == AND)
5041 /* AND cannot be split into multiple insns, so invert and use BIC. */
5042 insns = 99;
5043 else
5044 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
5046 if (can_negate)
5047 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
5048 &neg_immediates);
5049 else
5050 neg_insns = 99;
5052 if (can_invert || final_invert)
5053 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
5054 &inv_immediates);
5055 else
5056 inv_insns = 99;
5058 immediates = &pos_immediates;
5060 /* Is the negated immediate sequence more efficient? */
5061 if (neg_insns < insns && neg_insns <= inv_insns)
5063 insns = neg_insns;
5064 immediates = &neg_immediates;
5066 else
5067 can_negate = 0;
5069 /* Is the inverted immediate sequence more efficient?
5070 We must allow for an extra NOT instruction for XOR operations, although
5071 there is some chance that the final 'mvn' will get optimized later. */
5072 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
5074 insns = inv_insns;
5075 immediates = &inv_immediates;
5077 else
5079 can_invert = 0;
5080 final_invert = 0;
5083 /* Now output the chosen sequence as instructions. */
5084 if (generate)
5086 for (i = 0; i < insns; i++)
5088 rtx new_src, temp1_rtx;
5090 temp1 = immediates->i[i];
5092 if (code == SET || code == MINUS)
5093 new_src = (subtargets ? gen_reg_rtx (mode) : target);
5094 else if ((final_invert || i < (insns - 1)) && subtargets)
5095 new_src = gen_reg_rtx (mode);
5096 else
5097 new_src = target;
5099 if (can_invert)
5100 temp1 = ~temp1;
5101 else if (can_negate)
5102 temp1 = -temp1;
5104 temp1 = trunc_int_for_mode (temp1, mode);
5105 temp1_rtx = GEN_INT (temp1);
5107 if (code == SET)
5109 else if (code == MINUS)
5110 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
5111 else
5112 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
5114 emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
5115 source = new_src;
5117 if (code == SET)
5119 can_negate = can_invert;
5120 can_invert = 0;
5121 code = PLUS;
5123 else if (code == MINUS)
5124 code = PLUS;
5128 if (final_invert)
5130 if (generate)
5131 emit_constant_insn (cond, gen_rtx_SET (target,
5132 gen_rtx_NOT (mode, source)));
5133 insns++;
5136 return insns;
5139 /* Canonicalize a comparison so that we are more likely to recognize it.
5140 This can be done for a few constant compares, where we can make the
5141 immediate value easier to load. */
5143 static void
5144 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5145 bool op0_preserve_value)
5147 machine_mode mode;
5148 unsigned HOST_WIDE_INT i, maxval;
5150 mode = GET_MODE (*op0);
5151 if (mode == VOIDmode)
5152 mode = GET_MODE (*op1);
5154 maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5156 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
5157 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
5158 reversed or (for constant OP1) adjusted to GE/LT. Similarly
5159 for GTU/LEU in Thumb mode. */
5160 if (mode == DImode)
5163 if (*code == GT || *code == LE
5164 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
5166 /* Missing comparison. First try to use an available
5167 comparison. */
5168 if (CONST_INT_P (*op1))
5170 i = INTVAL (*op1);
5171 switch (*code)
5173 case GT:
5174 case LE:
5175 if (i != maxval
5176 && arm_const_double_by_immediates (GEN_INT (i + 1)))
5178 *op1 = GEN_INT (i + 1);
5179 *code = *code == GT ? GE : LT;
5180 return;
5182 break;
5183 case GTU:
5184 case LEU:
5185 if (i != ~((unsigned HOST_WIDE_INT) 0)
5186 && arm_const_double_by_immediates (GEN_INT (i + 1)))
5188 *op1 = GEN_INT (i + 1);
5189 *code = *code == GTU ? GEU : LTU;
5190 return;
5192 break;
5193 default:
5194 gcc_unreachable ();
5198 /* If that did not work, reverse the condition. */
5199 if (!op0_preserve_value)
5201 std::swap (*op0, *op1);
5202 *code = (int)swap_condition ((enum rtx_code)*code);
5205 return;
5208 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5209 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5210 to facilitate possible combining with a cmp into 'ands'. */
5211 if (mode == SImode
5212 && GET_CODE (*op0) == ZERO_EXTEND
5213 && GET_CODE (XEXP (*op0, 0)) == SUBREG
5214 && GET_MODE (XEXP (*op0, 0)) == QImode
5215 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5216 && subreg_lowpart_p (XEXP (*op0, 0))
5217 && *op1 == const0_rtx)
5218 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5219 GEN_INT (255));
5221 /* Comparisons smaller than DImode. Only adjust comparisons against
5222 an out-of-range constant. */
5223 if (!CONST_INT_P (*op1)
5224 || const_ok_for_arm (INTVAL (*op1))
5225 || const_ok_for_arm (- INTVAL (*op1)))
5226 return;
5228 i = INTVAL (*op1);
5230 switch (*code)
5232 case EQ:
5233 case NE:
5234 return;
5236 case GT:
5237 case LE:
5238 if (i != maxval
5239 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5241 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5242 *code = *code == GT ? GE : LT;
5243 return;
5245 break;
5247 case GE:
5248 case LT:
5249 if (i != ~maxval
5250 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5252 *op1 = GEN_INT (i - 1);
5253 *code = *code == GE ? GT : LE;
5254 return;
5256 break;
5258 case GTU:
5259 case LEU:
5260 if (i != ~((unsigned HOST_WIDE_INT) 0)
5261 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5263 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5264 *code = *code == GTU ? GEU : LTU;
5265 return;
5267 break;
5269 case GEU:
5270 case LTU:
5271 if (i != 0
5272 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5274 *op1 = GEN_INT (i - 1);
5275 *code = *code == GEU ? GTU : LEU;
5276 return;
5278 break;
5280 default:
5281 gcc_unreachable ();
5286 /* Define how to find the value returned by a function. */
5288 static rtx
5289 arm_function_value(const_tree type, const_tree func,
5290 bool outgoing ATTRIBUTE_UNUSED)
5292 machine_mode mode;
5293 int unsignedp ATTRIBUTE_UNUSED;
5294 rtx r ATTRIBUTE_UNUSED;
5296 mode = TYPE_MODE (type);
5298 if (TARGET_AAPCS_BASED)
5299 return aapcs_allocate_return_reg (mode, type, func);
5301 /* Promote integer types. */
5302 if (INTEGRAL_TYPE_P (type))
5303 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5305 /* Promotes small structs returned in a register to full-word size
5306 for big-endian AAPCS. */
5307 if (arm_return_in_msb (type))
5309 HOST_WIDE_INT size = int_size_in_bytes (type);
5310 if (size % UNITS_PER_WORD != 0)
5312 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5313 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5317 return arm_libcall_value_1 (mode);
5320 /* libcall hashtable helpers. */
5322 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5324 static inline hashval_t hash (const rtx_def *);
5325 static inline bool equal (const rtx_def *, const rtx_def *);
5326 static inline void remove (rtx_def *);
5329 inline bool
5330 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5332 return rtx_equal_p (p1, p2);
5335 inline hashval_t
5336 libcall_hasher::hash (const rtx_def *p1)
5338 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5341 typedef hash_table<libcall_hasher> libcall_table_type;
5343 static void
5344 add_libcall (libcall_table_type *htab, rtx libcall)
5346 *htab->find_slot (libcall, INSERT) = libcall;
5349 static bool
5350 arm_libcall_uses_aapcs_base (const_rtx libcall)
5352 static bool init_done = false;
5353 static libcall_table_type *libcall_htab = NULL;
5355 if (!init_done)
5357 init_done = true;
5359 libcall_htab = new libcall_table_type (31);
5360 add_libcall (libcall_htab,
5361 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5362 add_libcall (libcall_htab,
5363 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5364 add_libcall (libcall_htab,
5365 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5366 add_libcall (libcall_htab,
5367 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5369 add_libcall (libcall_htab,
5370 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5371 add_libcall (libcall_htab,
5372 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5373 add_libcall (libcall_htab,
5374 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5375 add_libcall (libcall_htab,
5376 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5378 add_libcall (libcall_htab,
5379 convert_optab_libfunc (sext_optab, SFmode, HFmode));
5380 add_libcall (libcall_htab,
5381 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5382 add_libcall (libcall_htab,
5383 convert_optab_libfunc (sfix_optab, SImode, DFmode));
5384 add_libcall (libcall_htab,
5385 convert_optab_libfunc (ufix_optab, SImode, DFmode));
5386 add_libcall (libcall_htab,
5387 convert_optab_libfunc (sfix_optab, DImode, DFmode));
5388 add_libcall (libcall_htab,
5389 convert_optab_libfunc (ufix_optab, DImode, DFmode));
5390 add_libcall (libcall_htab,
5391 convert_optab_libfunc (sfix_optab, DImode, SFmode));
5392 add_libcall (libcall_htab,
5393 convert_optab_libfunc (ufix_optab, DImode, SFmode));
5395 /* Values from double-precision helper functions are returned in core
5396 registers if the selected core only supports single-precision
5397 arithmetic, even if we are using the hard-float ABI. The same is
5398 true for single-precision helpers, but we will never be using the
5399 hard-float ABI on a CPU which doesn't support single-precision
5400 operations in hardware. */
5401 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5402 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5403 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5404 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5405 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5406 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5407 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5408 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5409 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5410 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5411 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5412 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5413 SFmode));
5414 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5415 DFmode));
5416 add_libcall (libcall_htab,
5417 convert_optab_libfunc (trunc_optab, HFmode, DFmode));
5420 return libcall && libcall_htab->find (libcall) != NULL;
5423 static rtx
5424 arm_libcall_value_1 (machine_mode mode)
5426 if (TARGET_AAPCS_BASED)
5427 return aapcs_libcall_value (mode);
5428 else if (TARGET_IWMMXT_ABI
5429 && arm_vector_mode_supported_p (mode))
5430 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5431 else
5432 return gen_rtx_REG (mode, ARG_REGISTER (1));
5435 /* Define how to find the value returned by a library function
5436 assuming the value has mode MODE. */
5438 static rtx
5439 arm_libcall_value (machine_mode mode, const_rtx libcall)
5441 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5442 && GET_MODE_CLASS (mode) == MODE_FLOAT)
5444 /* The following libcalls return their result in integer registers,
5445 even though they return a floating point value. */
5446 if (arm_libcall_uses_aapcs_base (libcall))
5447 return gen_rtx_REG (mode, ARG_REGISTER(1));
5451 return arm_libcall_value_1 (mode);
5454 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5456 static bool
5457 arm_function_value_regno_p (const unsigned int regno)
5459 if (regno == ARG_REGISTER (1)
5460 || (TARGET_32BIT
5461 && TARGET_AAPCS_BASED
5462 && TARGET_HARD_FLOAT
5463 && regno == FIRST_VFP_REGNUM)
5464 || (TARGET_IWMMXT_ABI
5465 && regno == FIRST_IWMMXT_REGNUM))
5466 return true;
5468 return false;
5471 /* Determine the amount of memory needed to store the possible return
5472 registers of an untyped call. */
5474 arm_apply_result_size (void)
5476 int size = 16;
5478 if (TARGET_32BIT)
5480 if (TARGET_HARD_FLOAT_ABI)
5481 size += 32;
5482 if (TARGET_IWMMXT_ABI)
5483 size += 8;
5486 return size;
5489 /* Decide whether TYPE should be returned in memory (true)
5490 or in a register (false). FNTYPE is the type of the function making
5491 the call. */
5492 static bool
5493 arm_return_in_memory (const_tree type, const_tree fntype)
5495 HOST_WIDE_INT size;
5497 size = int_size_in_bytes (type); /* Negative if not fixed size. */
5499 if (TARGET_AAPCS_BASED)
5501 /* Simple, non-aggregate types (ie not including vectors and
5502 complex) are always returned in a register (or registers).
5503 We don't care about which register here, so we can short-cut
5504 some of the detail. */
5505 if (!AGGREGATE_TYPE_P (type)
5506 && TREE_CODE (type) != VECTOR_TYPE
5507 && TREE_CODE (type) != COMPLEX_TYPE)
5508 return false;
5510 /* Any return value that is no larger than one word can be
5511 returned in r0. */
5512 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5513 return false;
5515 /* Check any available co-processors to see if they accept the
5516 type as a register candidate (VFP, for example, can return
5517 some aggregates in consecutive registers). These aren't
5518 available if the call is variadic. */
5519 if (aapcs_select_return_coproc (type, fntype) >= 0)
5520 return false;
5522 /* Vector values should be returned using ARM registers, not
5523 memory (unless they're over 16 bytes, which will break since
5524 we only have four call-clobbered registers to play with). */
5525 if (TREE_CODE (type) == VECTOR_TYPE)
5526 return (size < 0 || size > (4 * UNITS_PER_WORD));
5528 /* The rest go in memory. */
5529 return true;
5532 if (TREE_CODE (type) == VECTOR_TYPE)
5533 return (size < 0 || size > (4 * UNITS_PER_WORD));
5535 if (!AGGREGATE_TYPE_P (type) &&
5536 (TREE_CODE (type) != VECTOR_TYPE))
5537 /* All simple types are returned in registers. */
5538 return false;
5540 if (arm_abi != ARM_ABI_APCS)
5542 /* ATPCS and later return aggregate types in memory only if they are
5543 larger than a word (or are variable size). */
5544 return (size < 0 || size > UNITS_PER_WORD);
5547 /* For the arm-wince targets we choose to be compatible with Microsoft's
5548 ARM and Thumb compilers, which always return aggregates in memory. */
5549 #ifndef ARM_WINCE
5550 /* All structures/unions bigger than one word are returned in memory.
5551 Also catch the case where int_size_in_bytes returns -1. In this case
5552 the aggregate is either huge or of variable size, and in either case
5553 we will want to return it via memory and not in a register. */
5554 if (size < 0 || size > UNITS_PER_WORD)
5555 return true;
5557 if (TREE_CODE (type) == RECORD_TYPE)
5559 tree field;
5561 /* For a struct the APCS says that we only return in a register
5562 if the type is 'integer like' and every addressable element
5563 has an offset of zero. For practical purposes this means
5564 that the structure can have at most one non bit-field element
5565 and that this element must be the first one in the structure. */
5567 /* Find the first field, ignoring non FIELD_DECL things which will
5568 have been created by C++. */
5569 for (field = TYPE_FIELDS (type);
5570 field && TREE_CODE (field) != FIELD_DECL;
5571 field = DECL_CHAIN (field))
5572 continue;
5574 if (field == NULL)
5575 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5577 /* Check that the first field is valid for returning in a register. */
5579 /* ... Floats are not allowed */
5580 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5581 return true;
5583 /* ... Aggregates that are not themselves valid for returning in
5584 a register are not allowed. */
5585 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5586 return true;
5588 /* Now check the remaining fields, if any. Only bitfields are allowed,
5589 since they are not addressable. */
5590 for (field = DECL_CHAIN (field);
5591 field;
5592 field = DECL_CHAIN (field))
5594 if (TREE_CODE (field) != FIELD_DECL)
5595 continue;
5597 if (!DECL_BIT_FIELD_TYPE (field))
5598 return true;
5601 return false;
5604 if (TREE_CODE (type) == UNION_TYPE)
5606 tree field;
5608 /* Unions can be returned in registers if every element is
5609 integral, or can be returned in an integer register. */
5610 for (field = TYPE_FIELDS (type);
5611 field;
5612 field = DECL_CHAIN (field))
5614 if (TREE_CODE (field) != FIELD_DECL)
5615 continue;
5617 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5618 return true;
5620 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5621 return true;
5624 return false;
5626 #endif /* not ARM_WINCE */
5628 /* Return all other types in memory. */
5629 return true;
5632 const struct pcs_attribute_arg
5634 const char *arg;
5635 enum arm_pcs value;
5636 } pcs_attribute_args[] =
5638 {"aapcs", ARM_PCS_AAPCS},
5639 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5640 #if 0
5641 /* We could recognize these, but changes would be needed elsewhere
5642 * to implement them. */
5643 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5644 {"atpcs", ARM_PCS_ATPCS},
5645 {"apcs", ARM_PCS_APCS},
5646 #endif
5647 {NULL, ARM_PCS_UNKNOWN}
5650 static enum arm_pcs
5651 arm_pcs_from_attribute (tree attr)
5653 const struct pcs_attribute_arg *ptr;
5654 const char *arg;
5656 /* Get the value of the argument. */
5657 if (TREE_VALUE (attr) == NULL_TREE
5658 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5659 return ARM_PCS_UNKNOWN;
5661 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5663 /* Check it against the list of known arguments. */
5664 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5665 if (streq (arg, ptr->arg))
5666 return ptr->value;
5668 /* An unrecognized interrupt type. */
5669 return ARM_PCS_UNKNOWN;
5672 /* Get the PCS variant to use for this call. TYPE is the function's type
5673 specification, DECL is the specific declartion. DECL may be null if
5674 the call could be indirect or if this is a library call. */
5675 static enum arm_pcs
5676 arm_get_pcs_model (const_tree type, const_tree decl)
5678 bool user_convention = false;
5679 enum arm_pcs user_pcs = arm_pcs_default;
5680 tree attr;
5682 gcc_assert (type);
5684 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5685 if (attr)
5687 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5688 user_convention = true;
5691 if (TARGET_AAPCS_BASED)
5693 /* Detect varargs functions. These always use the base rules
5694 (no argument is ever a candidate for a co-processor
5695 register). */
5696 bool base_rules = stdarg_p (type);
5698 if (user_convention)
5700 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5701 sorry ("non-AAPCS derived PCS variant");
5702 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5703 error ("variadic functions must use the base AAPCS variant");
5706 if (base_rules)
5707 return ARM_PCS_AAPCS;
5708 else if (user_convention)
5709 return user_pcs;
5710 else if (decl && flag_unit_at_a_time)
5712 /* Local functions never leak outside this compilation unit,
5713 so we are free to use whatever conventions are
5714 appropriate. */
5715 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5716 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5717 if (i && i->local)
5718 return ARM_PCS_AAPCS_LOCAL;
5721 else if (user_convention && user_pcs != arm_pcs_default)
5722 sorry ("PCS variant");
5724 /* For everything else we use the target's default. */
5725 return arm_pcs_default;
5729 static void
5730 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5731 const_tree fntype ATTRIBUTE_UNUSED,
5732 rtx libcall ATTRIBUTE_UNUSED,
5733 const_tree fndecl ATTRIBUTE_UNUSED)
5735 /* Record the unallocated VFP registers. */
5736 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5737 pcum->aapcs_vfp_reg_alloc = 0;
5740 /* Walk down the type tree of TYPE counting consecutive base elements.
5741 If *MODEP is VOIDmode, then set it to the first valid floating point
5742 type. If a non-floating point type is found, or if a floating point
5743 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5744 otherwise return the count in the sub-tree. */
5745 static int
5746 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5748 machine_mode mode;
5749 HOST_WIDE_INT size;
5751 switch (TREE_CODE (type))
5753 case REAL_TYPE:
5754 mode = TYPE_MODE (type);
5755 if (mode != DFmode && mode != SFmode && mode != HFmode)
5756 return -1;
5758 if (*modep == VOIDmode)
5759 *modep = mode;
5761 if (*modep == mode)
5762 return 1;
5764 break;
5766 case COMPLEX_TYPE:
5767 mode = TYPE_MODE (TREE_TYPE (type));
5768 if (mode != DFmode && mode != SFmode)
5769 return -1;
5771 if (*modep == VOIDmode)
5772 *modep = mode;
5774 if (*modep == mode)
5775 return 2;
5777 break;
5779 case VECTOR_TYPE:
5780 /* Use V2SImode and V4SImode as representatives of all 64-bit
5781 and 128-bit vector types, whether or not those modes are
5782 supported with the present options. */
5783 size = int_size_in_bytes (type);
5784 switch (size)
5786 case 8:
5787 mode = V2SImode;
5788 break;
5789 case 16:
5790 mode = V4SImode;
5791 break;
5792 default:
5793 return -1;
5796 if (*modep == VOIDmode)
5797 *modep = mode;
5799 /* Vector modes are considered to be opaque: two vectors are
5800 equivalent for the purposes of being homogeneous aggregates
5801 if they are the same size. */
5802 if (*modep == mode)
5803 return 1;
5805 break;
5807 case ARRAY_TYPE:
5809 int count;
5810 tree index = TYPE_DOMAIN (type);
5812 /* Can't handle incomplete types nor sizes that are not
5813 fixed. */
5814 if (!COMPLETE_TYPE_P (type)
5815 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5816 return -1;
5818 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5819 if (count == -1
5820 || !index
5821 || !TYPE_MAX_VALUE (index)
5822 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5823 || !TYPE_MIN_VALUE (index)
5824 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5825 || count < 0)
5826 return -1;
5828 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5829 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5831 /* There must be no padding. */
5832 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5833 return -1;
5835 return count;
5838 case RECORD_TYPE:
5840 int count = 0;
5841 int sub_count;
5842 tree field;
5844 /* Can't handle incomplete types nor sizes that are not
5845 fixed. */
5846 if (!COMPLETE_TYPE_P (type)
5847 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5848 return -1;
5850 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5852 if (TREE_CODE (field) != FIELD_DECL)
5853 continue;
5855 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5856 if (sub_count < 0)
5857 return -1;
5858 count += sub_count;
5861 /* There must be no padding. */
5862 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5863 return -1;
5865 return count;
5868 case UNION_TYPE:
5869 case QUAL_UNION_TYPE:
5871 /* These aren't very interesting except in a degenerate case. */
5872 int count = 0;
5873 int sub_count;
5874 tree field;
5876 /* Can't handle incomplete types nor sizes that are not
5877 fixed. */
5878 if (!COMPLETE_TYPE_P (type)
5879 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5880 return -1;
5882 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5884 if (TREE_CODE (field) != FIELD_DECL)
5885 continue;
5887 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5888 if (sub_count < 0)
5889 return -1;
5890 count = count > sub_count ? count : sub_count;
5893 /* There must be no padding. */
5894 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5895 return -1;
5897 return count;
5900 default:
5901 break;
5904 return -1;
5907 /* Return true if PCS_VARIANT should use VFP registers. */
5908 static bool
5909 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5911 if (pcs_variant == ARM_PCS_AAPCS_VFP)
5913 static bool seen_thumb1_vfp = false;
5915 if (TARGET_THUMB1 && !seen_thumb1_vfp)
5917 sorry ("Thumb-1 hard-float VFP ABI");
5918 /* sorry() is not immediately fatal, so only display this once. */
5919 seen_thumb1_vfp = true;
5922 return true;
5925 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5926 return false;
5928 return (TARGET_32BIT && TARGET_HARD_FLOAT &&
5929 (TARGET_VFP_DOUBLE || !is_double));
5932 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5933 suitable for passing or returning in VFP registers for the PCS
5934 variant selected. If it is, then *BASE_MODE is updated to contain
5935 a machine mode describing each element of the argument's type and
5936 *COUNT to hold the number of such elements. */
5937 static bool
5938 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5939 machine_mode mode, const_tree type,
5940 machine_mode *base_mode, int *count)
5942 machine_mode new_mode = VOIDmode;
5944 /* If we have the type information, prefer that to working things
5945 out from the mode. */
5946 if (type)
5948 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5950 if (ag_count > 0 && ag_count <= 4)
5951 *count = ag_count;
5952 else
5953 return false;
5955 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
5956 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5957 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5959 *count = 1;
5960 new_mode = mode;
5962 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5964 *count = 2;
5965 new_mode = (mode == DCmode ? DFmode : SFmode);
5967 else
5968 return false;
5971 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
5972 return false;
5974 *base_mode = new_mode;
5975 return true;
5978 static bool
5979 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
5980 machine_mode mode, const_tree type)
5982 int count ATTRIBUTE_UNUSED;
5983 machine_mode ag_mode ATTRIBUTE_UNUSED;
5985 if (!use_vfp_abi (pcs_variant, false))
5986 return false;
5987 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5988 &ag_mode, &count);
5991 static bool
5992 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5993 const_tree type)
5995 if (!use_vfp_abi (pcum->pcs_variant, false))
5996 return false;
5998 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
5999 &pcum->aapcs_vfp_rmode,
6000 &pcum->aapcs_vfp_rcount);
6003 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
6004 for the behaviour of this function. */
6006 static bool
6007 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6008 const_tree type ATTRIBUTE_UNUSED)
6010 int rmode_size
6011 = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
6012 int shift = rmode_size / GET_MODE_SIZE (SFmode);
6013 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
6014 int regno;
6016 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
6017 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
6019 pcum->aapcs_vfp_reg_alloc = mask << regno;
6020 if (mode == BLKmode
6021 || (mode == TImode && ! TARGET_NEON)
6022 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
6024 int i;
6025 int rcount = pcum->aapcs_vfp_rcount;
6026 int rshift = shift;
6027 machine_mode rmode = pcum->aapcs_vfp_rmode;
6028 rtx par;
6029 if (!TARGET_NEON)
6031 /* Avoid using unsupported vector modes. */
6032 if (rmode == V2SImode)
6033 rmode = DImode;
6034 else if (rmode == V4SImode)
6036 rmode = DImode;
6037 rcount *= 2;
6038 rshift /= 2;
6041 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
6042 for (i = 0; i < rcount; i++)
6044 rtx tmp = gen_rtx_REG (rmode,
6045 FIRST_VFP_REGNUM + regno + i * rshift);
6046 tmp = gen_rtx_EXPR_LIST
6047 (VOIDmode, tmp,
6048 GEN_INT (i * GET_MODE_SIZE (rmode)));
6049 XVECEXP (par, 0, i) = tmp;
6052 pcum->aapcs_reg = par;
6054 else
6055 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
6056 return true;
6058 return false;
6061 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
6062 comment there for the behaviour of this function. */
6064 static rtx
6065 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
6066 machine_mode mode,
6067 const_tree type ATTRIBUTE_UNUSED)
6069 if (!use_vfp_abi (pcs_variant, false))
6070 return NULL;
6072 if (mode == BLKmode
6073 || (GET_MODE_CLASS (mode) == MODE_INT
6074 && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
6075 && !TARGET_NEON))
6077 int count;
6078 machine_mode ag_mode;
6079 int i;
6080 rtx par;
6081 int shift;
6083 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6084 &ag_mode, &count);
6086 if (!TARGET_NEON)
6088 if (ag_mode == V2SImode)
6089 ag_mode = DImode;
6090 else if (ag_mode == V4SImode)
6092 ag_mode = DImode;
6093 count *= 2;
6096 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
6097 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
6098 for (i = 0; i < count; i++)
6100 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
6101 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
6102 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
6103 XVECEXP (par, 0, i) = tmp;
6106 return par;
6109 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
6112 static void
6113 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
6114 machine_mode mode ATTRIBUTE_UNUSED,
6115 const_tree type ATTRIBUTE_UNUSED)
6117 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
6118 pcum->aapcs_vfp_reg_alloc = 0;
6119 return;
6122 #define AAPCS_CP(X) \
6124 aapcs_ ## X ## _cum_init, \
6125 aapcs_ ## X ## _is_call_candidate, \
6126 aapcs_ ## X ## _allocate, \
6127 aapcs_ ## X ## _is_return_candidate, \
6128 aapcs_ ## X ## _allocate_return_reg, \
6129 aapcs_ ## X ## _advance \
6132 /* Table of co-processors that can be used to pass arguments in
6133 registers. Idealy no arugment should be a candidate for more than
6134 one co-processor table entry, but the table is processed in order
6135 and stops after the first match. If that entry then fails to put
6136 the argument into a co-processor register, the argument will go on
6137 the stack. */
6138 static struct
6140 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
6141 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
6143 /* Return true if an argument of mode MODE (or type TYPE if MODE is
6144 BLKmode) is a candidate for this co-processor's registers; this
6145 function should ignore any position-dependent state in
6146 CUMULATIVE_ARGS and only use call-type dependent information. */
6147 bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6149 /* Return true if the argument does get a co-processor register; it
6150 should set aapcs_reg to an RTX of the register allocated as is
6151 required for a return from FUNCTION_ARG. */
6152 bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6154 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6155 be returned in this co-processor's registers. */
6156 bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6158 /* Allocate and return an RTX element to hold the return type of a call. This
6159 routine must not fail and will only be called if is_return_candidate
6160 returned true with the same parameters. */
6161 rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6163 /* Finish processing this argument and prepare to start processing
6164 the next one. */
6165 void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6166 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6168 AAPCS_CP(vfp)
6171 #undef AAPCS_CP
6173 static int
6174 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6175 const_tree type)
6177 int i;
6179 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6180 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6181 return i;
6183 return -1;
6186 static int
6187 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6189 /* We aren't passed a decl, so we can't check that a call is local.
6190 However, it isn't clear that that would be a win anyway, since it
6191 might limit some tail-calling opportunities. */
6192 enum arm_pcs pcs_variant;
6194 if (fntype)
6196 const_tree fndecl = NULL_TREE;
6198 if (TREE_CODE (fntype) == FUNCTION_DECL)
6200 fndecl = fntype;
6201 fntype = TREE_TYPE (fntype);
6204 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6206 else
6207 pcs_variant = arm_pcs_default;
6209 if (pcs_variant != ARM_PCS_AAPCS)
6211 int i;
6213 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6214 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6215 TYPE_MODE (type),
6216 type))
6217 return i;
6219 return -1;
6222 static rtx
6223 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6224 const_tree fntype)
6226 /* We aren't passed a decl, so we can't check that a call is local.
6227 However, it isn't clear that that would be a win anyway, since it
6228 might limit some tail-calling opportunities. */
6229 enum arm_pcs pcs_variant;
6230 int unsignedp ATTRIBUTE_UNUSED;
6232 if (fntype)
6234 const_tree fndecl = NULL_TREE;
6236 if (TREE_CODE (fntype) == FUNCTION_DECL)
6238 fndecl = fntype;
6239 fntype = TREE_TYPE (fntype);
6242 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6244 else
6245 pcs_variant = arm_pcs_default;
6247 /* Promote integer types. */
6248 if (type && INTEGRAL_TYPE_P (type))
6249 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
6251 if (pcs_variant != ARM_PCS_AAPCS)
6253 int i;
6255 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6256 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6257 type))
6258 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6259 mode, type);
6262 /* Promotes small structs returned in a register to full-word size
6263 for big-endian AAPCS. */
6264 if (type && arm_return_in_msb (type))
6266 HOST_WIDE_INT size = int_size_in_bytes (type);
6267 if (size % UNITS_PER_WORD != 0)
6269 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6270 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
6274 return gen_rtx_REG (mode, R0_REGNUM);
6277 static rtx
6278 aapcs_libcall_value (machine_mode mode)
6280 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6281 && GET_MODE_SIZE (mode) <= 4)
6282 mode = SImode;
6284 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
6287 /* Lay out a function argument using the AAPCS rules. The rule
6288 numbers referred to here are those in the AAPCS. */
6289 static void
6290 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
6291 const_tree type, bool named)
6293 int nregs, nregs2;
6294 int ncrn;
6296 /* We only need to do this once per argument. */
6297 if (pcum->aapcs_arg_processed)
6298 return;
6300 pcum->aapcs_arg_processed = true;
6302 /* Special case: if named is false then we are handling an incoming
6303 anonymous argument which is on the stack. */
6304 if (!named)
6305 return;
6307 /* Is this a potential co-processor register candidate? */
6308 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6310 int slot = aapcs_select_call_coproc (pcum, mode, type);
6311 pcum->aapcs_cprc_slot = slot;
6313 /* We don't have to apply any of the rules from part B of the
6314 preparation phase, these are handled elsewhere in the
6315 compiler. */
6317 if (slot >= 0)
6319 /* A Co-processor register candidate goes either in its own
6320 class of registers or on the stack. */
6321 if (!pcum->aapcs_cprc_failed[slot])
6323 /* C1.cp - Try to allocate the argument to co-processor
6324 registers. */
6325 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
6326 return;
6328 /* C2.cp - Put the argument on the stack and note that we
6329 can't assign any more candidates in this slot. We also
6330 need to note that we have allocated stack space, so that
6331 we won't later try to split a non-cprc candidate between
6332 core registers and the stack. */
6333 pcum->aapcs_cprc_failed[slot] = true;
6334 pcum->can_split = false;
6337 /* We didn't get a register, so this argument goes on the
6338 stack. */
6339 gcc_assert (pcum->can_split == false);
6340 return;
6344 /* C3 - For double-word aligned arguments, round the NCRN up to the
6345 next even number. */
6346 ncrn = pcum->aapcs_ncrn;
6347 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
6348 ncrn++;
6350 nregs = ARM_NUM_REGS2(mode, type);
6352 /* Sigh, this test should really assert that nregs > 0, but a GCC
6353 extension allows empty structs and then gives them empty size; it
6354 then allows such a structure to be passed by value. For some of
6355 the code below we have to pretend that such an argument has
6356 non-zero size so that we 'locate' it correctly either in
6357 registers or on the stack. */
6358 gcc_assert (nregs >= 0);
6360 nregs2 = nregs ? nregs : 1;
6362 /* C4 - Argument fits entirely in core registers. */
6363 if (ncrn + nregs2 <= NUM_ARG_REGS)
6365 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6366 pcum->aapcs_next_ncrn = ncrn + nregs;
6367 return;
6370 /* C5 - Some core registers left and there are no arguments already
6371 on the stack: split this argument between the remaining core
6372 registers and the stack. */
6373 if (ncrn < NUM_ARG_REGS && pcum->can_split)
6375 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6376 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6377 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
6378 return;
6381 /* C6 - NCRN is set to 4. */
6382 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6384 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
6385 return;
6388 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6389 for a call to a function whose data type is FNTYPE.
6390 For a library call, FNTYPE is NULL. */
6391 void
6392 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
6393 rtx libname,
6394 tree fndecl ATTRIBUTE_UNUSED)
6396 /* Long call handling. */
6397 if (fntype)
6398 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
6399 else
6400 pcum->pcs_variant = arm_pcs_default;
6402 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6404 if (arm_libcall_uses_aapcs_base (libname))
6405 pcum->pcs_variant = ARM_PCS_AAPCS;
6407 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
6408 pcum->aapcs_reg = NULL_RTX;
6409 pcum->aapcs_partial = 0;
6410 pcum->aapcs_arg_processed = false;
6411 pcum->aapcs_cprc_slot = -1;
6412 pcum->can_split = true;
6414 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6416 int i;
6418 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6420 pcum->aapcs_cprc_failed[i] = false;
6421 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
6424 return;
6427 /* Legacy ABIs */
6429 /* On the ARM, the offset starts at 0. */
6430 pcum->nregs = 0;
6431 pcum->iwmmxt_nregs = 0;
6432 pcum->can_split = true;
6434 /* Varargs vectors are treated the same as long long.
6435 named_count avoids having to change the way arm handles 'named' */
6436 pcum->named_count = 0;
6437 pcum->nargs = 0;
6439 if (TARGET_REALLY_IWMMXT && fntype)
6441 tree fn_arg;
6443 for (fn_arg = TYPE_ARG_TYPES (fntype);
6444 fn_arg;
6445 fn_arg = TREE_CHAIN (fn_arg))
6446 pcum->named_count += 1;
6448 if (! pcum->named_count)
6449 pcum->named_count = INT_MAX;
6453 /* Return true if mode/type need doubleword alignment. */
6454 static bool
6455 arm_needs_doubleword_align (machine_mode mode, const_tree type)
6457 if (!type)
6458 return PARM_BOUNDARY < GET_MODE_ALIGNMENT (mode);
6460 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
6461 if (!AGGREGATE_TYPE_P (type))
6462 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
6464 /* Array types: Use member alignment of element type. */
6465 if (TREE_CODE (type) == ARRAY_TYPE)
6466 return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
6468 /* Record/aggregate types: Use greatest member alignment of any member. */
6469 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6470 if (DECL_ALIGN (field) > PARM_BOUNDARY)
6471 return true;
6473 return false;
6477 /* Determine where to put an argument to a function.
6478 Value is zero to push the argument on the stack,
6479 or a hard register in which to store the argument.
6481 MODE is the argument's machine mode.
6482 TYPE is the data type of the argument (as a tree).
6483 This is null for libcalls where that information may
6484 not be available.
6485 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6486 the preceding args and about the function being called.
6487 NAMED is nonzero if this argument is a named parameter
6488 (otherwise it is an extra parameter matching an ellipsis).
6490 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6491 other arguments are passed on the stack. If (NAMED == 0) (which happens
6492 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6493 defined), say it is passed in the stack (function_prologue will
6494 indeed make it pass in the stack if necessary). */
6496 static rtx
6497 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
6498 const_tree type, bool named)
6500 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6501 int nregs;
6503 /* Handle the special case quickly. Pick an arbitrary value for op2 of
6504 a call insn (op3 of a call_value insn). */
6505 if (mode == VOIDmode)
6506 return const0_rtx;
6508 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6510 aapcs_layout_arg (pcum, mode, type, named);
6511 return pcum->aapcs_reg;
6514 /* Varargs vectors are treated the same as long long.
6515 named_count avoids having to change the way arm handles 'named' */
6516 if (TARGET_IWMMXT_ABI
6517 && arm_vector_mode_supported_p (mode)
6518 && pcum->named_count > pcum->nargs + 1)
6520 if (pcum->iwmmxt_nregs <= 9)
6521 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
6522 else
6524 pcum->can_split = false;
6525 return NULL_RTX;
6529 /* Put doubleword aligned quantities in even register pairs. */
6530 if (pcum->nregs & 1
6531 && ARM_DOUBLEWORD_ALIGN
6532 && arm_needs_doubleword_align (mode, type))
6533 pcum->nregs++;
6535 /* Only allow splitting an arg between regs and memory if all preceding
6536 args were allocated to regs. For args passed by reference we only count
6537 the reference pointer. */
6538 if (pcum->can_split)
6539 nregs = 1;
6540 else
6541 nregs = ARM_NUM_REGS2 (mode, type);
6543 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
6544 return NULL_RTX;
6546 return gen_rtx_REG (mode, pcum->nregs);
6549 static unsigned int
6550 arm_function_arg_boundary (machine_mode mode, const_tree type)
6552 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
6553 ? DOUBLEWORD_ALIGNMENT
6554 : PARM_BOUNDARY);
6557 static int
6558 arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
6559 tree type, bool named)
6561 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6562 int nregs = pcum->nregs;
6564 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6566 aapcs_layout_arg (pcum, mode, type, named);
6567 return pcum->aapcs_partial;
6570 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6571 return 0;
6573 if (NUM_ARG_REGS > nregs
6574 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6575 && pcum->can_split)
6576 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6578 return 0;
6581 /* Update the data in PCUM to advance over an argument
6582 of mode MODE and data type TYPE.
6583 (TYPE is null for libcalls where that information may not be available.) */
6585 static void
6586 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6587 const_tree type, bool named)
6589 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6591 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6593 aapcs_layout_arg (pcum, mode, type, named);
6595 if (pcum->aapcs_cprc_slot >= 0)
6597 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6598 type);
6599 pcum->aapcs_cprc_slot = -1;
6602 /* Generic stuff. */
6603 pcum->aapcs_arg_processed = false;
6604 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6605 pcum->aapcs_reg = NULL_RTX;
6606 pcum->aapcs_partial = 0;
6608 else
6610 pcum->nargs += 1;
6611 if (arm_vector_mode_supported_p (mode)
6612 && pcum->named_count > pcum->nargs
6613 && TARGET_IWMMXT_ABI)
6614 pcum->iwmmxt_nregs += 1;
6615 else
6616 pcum->nregs += ARM_NUM_REGS2 (mode, type);
6620 /* Variable sized types are passed by reference. This is a GCC
6621 extension to the ARM ABI. */
6623 static bool
6624 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6625 machine_mode mode ATTRIBUTE_UNUSED,
6626 const_tree type, bool named ATTRIBUTE_UNUSED)
6628 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6631 /* Encode the current state of the #pragma [no_]long_calls. */
6632 typedef enum
6634 OFF, /* No #pragma [no_]long_calls is in effect. */
6635 LONG, /* #pragma long_calls is in effect. */
6636 SHORT /* #pragma no_long_calls is in effect. */
6637 } arm_pragma_enum;
6639 static arm_pragma_enum arm_pragma_long_calls = OFF;
6641 void
6642 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6644 arm_pragma_long_calls = LONG;
6647 void
6648 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6650 arm_pragma_long_calls = SHORT;
6653 void
6654 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6656 arm_pragma_long_calls = OFF;
6659 /* Handle an attribute requiring a FUNCTION_DECL;
6660 arguments as in struct attribute_spec.handler. */
6661 static tree
6662 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6663 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6665 if (TREE_CODE (*node) != FUNCTION_DECL)
6667 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6668 name);
6669 *no_add_attrs = true;
6672 return NULL_TREE;
6675 /* Handle an "interrupt" or "isr" attribute;
6676 arguments as in struct attribute_spec.handler. */
6677 static tree
6678 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6679 bool *no_add_attrs)
6681 if (DECL_P (*node))
6683 if (TREE_CODE (*node) != FUNCTION_DECL)
6685 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6686 name);
6687 *no_add_attrs = true;
6689 /* FIXME: the argument if any is checked for type attributes;
6690 should it be checked for decl ones? */
6692 else
6694 if (TREE_CODE (*node) == FUNCTION_TYPE
6695 || TREE_CODE (*node) == METHOD_TYPE)
6697 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6699 warning (OPT_Wattributes, "%qE attribute ignored",
6700 name);
6701 *no_add_attrs = true;
6704 else if (TREE_CODE (*node) == POINTER_TYPE
6705 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6706 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6707 && arm_isr_value (args) != ARM_FT_UNKNOWN)
6709 *node = build_variant_type_copy (*node);
6710 TREE_TYPE (*node) = build_type_attribute_variant
6711 (TREE_TYPE (*node),
6712 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6713 *no_add_attrs = true;
6715 else
6717 /* Possibly pass this attribute on from the type to a decl. */
6718 if (flags & ((int) ATTR_FLAG_DECL_NEXT
6719 | (int) ATTR_FLAG_FUNCTION_NEXT
6720 | (int) ATTR_FLAG_ARRAY_NEXT))
6722 *no_add_attrs = true;
6723 return tree_cons (name, args, NULL_TREE);
6725 else
6727 warning (OPT_Wattributes, "%qE attribute ignored",
6728 name);
6733 return NULL_TREE;
6736 /* Handle a "pcs" attribute; arguments as in struct
6737 attribute_spec.handler. */
6738 static tree
6739 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6740 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6742 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6744 warning (OPT_Wattributes, "%qE attribute ignored", name);
6745 *no_add_attrs = true;
6747 return NULL_TREE;
6750 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6751 /* Handle the "notshared" attribute. This attribute is another way of
6752 requesting hidden visibility. ARM's compiler supports
6753 "__declspec(notshared)"; we support the same thing via an
6754 attribute. */
6756 static tree
6757 arm_handle_notshared_attribute (tree *node,
6758 tree name ATTRIBUTE_UNUSED,
6759 tree args ATTRIBUTE_UNUSED,
6760 int flags ATTRIBUTE_UNUSED,
6761 bool *no_add_attrs)
6763 tree decl = TYPE_NAME (*node);
6765 if (decl)
6767 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6768 DECL_VISIBILITY_SPECIFIED (decl) = 1;
6769 *no_add_attrs = false;
6771 return NULL_TREE;
6773 #endif
6775 /* This function returns true if a function with declaration FNDECL and type
6776 FNTYPE uses the stack to pass arguments or return variables and false
6777 otherwise. This is used for functions with the attributes
6778 'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
6779 diagnostic messages if the stack is used. NAME is the name of the attribute
6780 used. */
6782 static bool
6783 cmse_func_args_or_return_in_stack (tree fndecl, tree name, tree fntype)
6785 function_args_iterator args_iter;
6786 CUMULATIVE_ARGS args_so_far_v;
6787 cumulative_args_t args_so_far;
6788 bool first_param = true;
6789 tree arg_type, prev_arg_type = NULL_TREE, ret_type;
6791 /* Error out if any argument is passed on the stack. */
6792 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
6793 args_so_far = pack_cumulative_args (&args_so_far_v);
6794 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
6796 rtx arg_rtx;
6797 machine_mode arg_mode = TYPE_MODE (arg_type);
6799 prev_arg_type = arg_type;
6800 if (VOID_TYPE_P (arg_type))
6801 continue;
6803 if (!first_param)
6804 arm_function_arg_advance (args_so_far, arg_mode, arg_type, true);
6805 arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type, true);
6806 if (!arg_rtx
6807 || arm_arg_partial_bytes (args_so_far, arg_mode, arg_type, true))
6809 error ("%qE attribute not available to functions with arguments "
6810 "passed on the stack", name);
6811 return true;
6813 first_param = false;
6816 /* Error out for variadic functions since we cannot control how many
6817 arguments will be passed and thus stack could be used. stdarg_p () is not
6818 used for the checking to avoid browsing arguments twice. */
6819 if (prev_arg_type != NULL_TREE && !VOID_TYPE_P (prev_arg_type))
6821 error ("%qE attribute not available to functions with variable number "
6822 "of arguments", name);
6823 return true;
6826 /* Error out if return value is passed on the stack. */
6827 ret_type = TREE_TYPE (fntype);
6828 if (arm_return_in_memory (ret_type, fntype))
6830 error ("%qE attribute not available to functions that return value on "
6831 "the stack", name);
6832 return true;
6834 return false;
6837 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
6838 function will check whether the attribute is allowed here and will add the
6839 attribute to the function declaration tree or otherwise issue a warning. */
6841 static tree
6842 arm_handle_cmse_nonsecure_entry (tree *node, tree name,
6843 tree /* args */,
6844 int /* flags */,
6845 bool *no_add_attrs)
6847 tree fndecl;
6849 if (!use_cmse)
6851 *no_add_attrs = true;
6852 warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
6853 name);
6854 return NULL_TREE;
6857 /* Ignore attribute for function types. */
6858 if (TREE_CODE (*node) != FUNCTION_DECL)
6860 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6861 name);
6862 *no_add_attrs = true;
6863 return NULL_TREE;
6866 fndecl = *node;
6868 /* Warn for static linkage functions. */
6869 if (!TREE_PUBLIC (fndecl))
6871 warning (OPT_Wattributes, "%qE attribute has no effect on functions "
6872 "with static linkage", name);
6873 *no_add_attrs = true;
6874 return NULL_TREE;
6877 *no_add_attrs |= cmse_func_args_or_return_in_stack (fndecl, name,
6878 TREE_TYPE (fndecl));
6879 return NULL_TREE;
6883 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
6884 function will check whether the attribute is allowed here and will add the
6885 attribute to the function type tree or otherwise issue a diagnostic. The
6886 reason we check this at declaration time is to only allow the use of the
6887 attribute with declarations of function pointers and not function
6888 declarations. This function checks NODE is of the expected type and issues
6889 diagnostics otherwise using NAME. If it is not of the expected type
6890 *NO_ADD_ATTRS will be set to true. */
6892 static tree
6893 arm_handle_cmse_nonsecure_call (tree *node, tree name,
6894 tree /* args */,
6895 int /* flags */,
6896 bool *no_add_attrs)
6898 tree decl = NULL_TREE, fntype = NULL_TREE;
6899 tree type;
6901 if (!use_cmse)
6903 *no_add_attrs = true;
6904 warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
6905 name);
6906 return NULL_TREE;
6909 if (TREE_CODE (*node) == VAR_DECL || TREE_CODE (*node) == TYPE_DECL)
6911 decl = *node;
6912 fntype = TREE_TYPE (decl);
6915 while (fntype != NULL_TREE && TREE_CODE (fntype) == POINTER_TYPE)
6916 fntype = TREE_TYPE (fntype);
6918 if (!decl || TREE_CODE (fntype) != FUNCTION_TYPE)
6920 warning (OPT_Wattributes, "%qE attribute only applies to base type of a "
6921 "function pointer", name);
6922 *no_add_attrs = true;
6923 return NULL_TREE;
6926 *no_add_attrs |= cmse_func_args_or_return_in_stack (NULL, name, fntype);
6928 if (*no_add_attrs)
6929 return NULL_TREE;
6931 /* Prevent trees being shared among function types with and without
6932 cmse_nonsecure_call attribute. */
6933 type = TREE_TYPE (decl);
6935 type = build_distinct_type_copy (type);
6936 TREE_TYPE (decl) = type;
6937 fntype = type;
6939 while (TREE_CODE (fntype) != FUNCTION_TYPE)
6941 type = fntype;
6942 fntype = TREE_TYPE (fntype);
6943 fntype = build_distinct_type_copy (fntype);
6944 TREE_TYPE (type) = fntype;
6947 /* Construct a type attribute and add it to the function type. */
6948 tree attrs = tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE,
6949 TYPE_ATTRIBUTES (fntype));
6950 TYPE_ATTRIBUTES (fntype) = attrs;
6951 return NULL_TREE;
6954 /* Return 0 if the attributes for two types are incompatible, 1 if they
6955 are compatible, and 2 if they are nearly compatible (which causes a
6956 warning to be generated). */
6957 static int
6958 arm_comp_type_attributes (const_tree type1, const_tree type2)
6960 int l1, l2, s1, s2;
6962 /* Check for mismatch of non-default calling convention. */
6963 if (TREE_CODE (type1) != FUNCTION_TYPE)
6964 return 1;
6966 /* Check for mismatched call attributes. */
6967 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
6968 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
6969 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
6970 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
6972 /* Only bother to check if an attribute is defined. */
6973 if (l1 | l2 | s1 | s2)
6975 /* If one type has an attribute, the other must have the same attribute. */
6976 if ((l1 != l2) || (s1 != s2))
6977 return 0;
6979 /* Disallow mixed attributes. */
6980 if ((l1 & s2) || (l2 & s1))
6981 return 0;
6984 /* Check for mismatched ISR attribute. */
6985 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
6986 if (! l1)
6987 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
6988 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
6989 if (! l2)
6990 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
6991 if (l1 != l2)
6992 return 0;
6994 l1 = lookup_attribute ("cmse_nonsecure_call",
6995 TYPE_ATTRIBUTES (type1)) != NULL;
6996 l2 = lookup_attribute ("cmse_nonsecure_call",
6997 TYPE_ATTRIBUTES (type2)) != NULL;
6999 if (l1 != l2)
7000 return 0;
7002 return 1;
7005 /* Assigns default attributes to newly defined type. This is used to
7006 set short_call/long_call attributes for function types of
7007 functions defined inside corresponding #pragma scopes. */
7008 static void
7009 arm_set_default_type_attributes (tree type)
7011 /* Add __attribute__ ((long_call)) to all functions, when
7012 inside #pragma long_calls or __attribute__ ((short_call)),
7013 when inside #pragma no_long_calls. */
7014 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
7016 tree type_attr_list, attr_name;
7017 type_attr_list = TYPE_ATTRIBUTES (type);
7019 if (arm_pragma_long_calls == LONG)
7020 attr_name = get_identifier ("long_call");
7021 else if (arm_pragma_long_calls == SHORT)
7022 attr_name = get_identifier ("short_call");
7023 else
7024 return;
7026 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
7027 TYPE_ATTRIBUTES (type) = type_attr_list;
7031 /* Return true if DECL is known to be linked into section SECTION. */
7033 static bool
7034 arm_function_in_section_p (tree decl, section *section)
7036 /* We can only be certain about the prevailing symbol definition. */
7037 if (!decl_binds_to_current_def_p (decl))
7038 return false;
7040 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
7041 if (!DECL_SECTION_NAME (decl))
7043 /* Make sure that we will not create a unique section for DECL. */
7044 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
7045 return false;
7048 return function_section (decl) == section;
7051 /* Return nonzero if a 32-bit "long_call" should be generated for
7052 a call from the current function to DECL. We generate a long_call
7053 if the function:
7055 a. has an __attribute__((long call))
7056 or b. is within the scope of a #pragma long_calls
7057 or c. the -mlong-calls command line switch has been specified
7059 However we do not generate a long call if the function:
7061 d. has an __attribute__ ((short_call))
7062 or e. is inside the scope of a #pragma no_long_calls
7063 or f. is defined in the same section as the current function. */
7065 bool
7066 arm_is_long_call_p (tree decl)
7068 tree attrs;
7070 if (!decl)
7071 return TARGET_LONG_CALLS;
7073 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
7074 if (lookup_attribute ("short_call", attrs))
7075 return false;
7077 /* For "f", be conservative, and only cater for cases in which the
7078 whole of the current function is placed in the same section. */
7079 if (!flag_reorder_blocks_and_partition
7080 && TREE_CODE (decl) == FUNCTION_DECL
7081 && arm_function_in_section_p (decl, current_function_section ()))
7082 return false;
7084 if (lookup_attribute ("long_call", attrs))
7085 return true;
7087 return TARGET_LONG_CALLS;
7090 /* Return nonzero if it is ok to make a tail-call to DECL. */
7091 static bool
7092 arm_function_ok_for_sibcall (tree decl, tree exp)
7094 unsigned long func_type;
7096 if (cfun->machine->sibcall_blocked)
7097 return false;
7099 /* Never tailcall something if we are generating code for Thumb-1. */
7100 if (TARGET_THUMB1)
7101 return false;
7103 /* The PIC register is live on entry to VxWorks PLT entries, so we
7104 must make the call before restoring the PIC register. */
7105 if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
7106 return false;
7108 /* If we are interworking and the function is not declared static
7109 then we can't tail-call it unless we know that it exists in this
7110 compilation unit (since it might be a Thumb routine). */
7111 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
7112 && !TREE_ASM_WRITTEN (decl))
7113 return false;
7115 func_type = arm_current_func_type ();
7116 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
7117 if (IS_INTERRUPT (func_type))
7118 return false;
7120 /* ARMv8-M non-secure entry functions need to return with bxns which is only
7121 generated for entry functions themselves. */
7122 if (IS_CMSE_ENTRY (arm_current_func_type ()))
7123 return false;
7125 /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7126 this would complicate matters for later code generation. */
7127 if (TREE_CODE (exp) == CALL_EXPR)
7129 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7130 if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype)))
7131 return false;
7134 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
7136 /* Check that the return value locations are the same. For
7137 example that we aren't returning a value from the sibling in
7138 a VFP register but then need to transfer it to a core
7139 register. */
7140 rtx a, b;
7141 tree decl_or_type = decl;
7143 /* If it is an indirect function pointer, get the function type. */
7144 if (!decl)
7145 decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7147 a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
7148 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
7149 cfun->decl, false);
7150 if (!rtx_equal_p (a, b))
7151 return false;
7154 /* Never tailcall if function may be called with a misaligned SP. */
7155 if (IS_STACKALIGN (func_type))
7156 return false;
7158 /* The AAPCS says that, on bare-metal, calls to unresolved weak
7159 references should become a NOP. Don't convert such calls into
7160 sibling calls. */
7161 if (TARGET_AAPCS_BASED
7162 && arm_abi == ARM_ABI_AAPCS
7163 && decl
7164 && DECL_WEAK (decl))
7165 return false;
7167 /* Everything else is ok. */
7168 return true;
7172 /* Addressing mode support functions. */
7174 /* Return nonzero if X is a legitimate immediate operand when compiling
7175 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
7177 legitimate_pic_operand_p (rtx x)
7179 if (GET_CODE (x) == SYMBOL_REF
7180 || (GET_CODE (x) == CONST
7181 && GET_CODE (XEXP (x, 0)) == PLUS
7182 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
7183 return 0;
7185 return 1;
7188 /* Record that the current function needs a PIC register. Initialize
7189 cfun->machine->pic_reg if we have not already done so. */
7191 static void
7192 require_pic_register (void)
7194 /* A lot of the logic here is made obscure by the fact that this
7195 routine gets called as part of the rtx cost estimation process.
7196 We don't want those calls to affect any assumptions about the real
7197 function; and further, we can't call entry_of_function() until we
7198 start the real expansion process. */
7199 if (!crtl->uses_pic_offset_table)
7201 gcc_assert (can_create_pseudo_p ());
7202 if (arm_pic_register != INVALID_REGNUM
7203 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
7205 if (!cfun->machine->pic_reg)
7206 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
7208 /* Play games to avoid marking the function as needing pic
7209 if we are being called as part of the cost-estimation
7210 process. */
7211 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7212 crtl->uses_pic_offset_table = 1;
7214 else
7216 rtx_insn *seq, *insn;
7218 if (!cfun->machine->pic_reg)
7219 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
7221 /* Play games to avoid marking the function as needing pic
7222 if we are being called as part of the cost-estimation
7223 process. */
7224 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7226 crtl->uses_pic_offset_table = 1;
7227 start_sequence ();
7229 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
7230 && arm_pic_register > LAST_LO_REGNUM)
7231 emit_move_insn (cfun->machine->pic_reg,
7232 gen_rtx_REG (Pmode, arm_pic_register));
7233 else
7234 arm_load_pic_register (0UL);
7236 seq = get_insns ();
7237 end_sequence ();
7239 for (insn = seq; insn; insn = NEXT_INSN (insn))
7240 if (INSN_P (insn))
7241 INSN_LOCATION (insn) = prologue_location;
7243 /* We can be called during expansion of PHI nodes, where
7244 we can't yet emit instructions directly in the final
7245 insn stream. Queue the insns on the entry edge, they will
7246 be committed after everything else is expanded. */
7247 insert_insn_on_edge (seq,
7248 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
7255 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
7257 if (GET_CODE (orig) == SYMBOL_REF
7258 || GET_CODE (orig) == LABEL_REF)
7260 if (reg == 0)
7262 gcc_assert (can_create_pseudo_p ());
7263 reg = gen_reg_rtx (Pmode);
7266 /* VxWorks does not impose a fixed gap between segments; the run-time
7267 gap can be different from the object-file gap. We therefore can't
7268 use GOTOFF unless we are absolutely sure that the symbol is in the
7269 same segment as the GOT. Unfortunately, the flexibility of linker
7270 scripts means that we can't be sure of that in general, so assume
7271 that GOTOFF is never valid on VxWorks. */
7272 /* References to weak symbols cannot be resolved locally: they
7273 may be overridden by a non-weak definition at link time. */
7274 rtx_insn *insn;
7275 if ((GET_CODE (orig) == LABEL_REF
7276 || (GET_CODE (orig) == SYMBOL_REF
7277 && SYMBOL_REF_LOCAL_P (orig)
7278 && (SYMBOL_REF_DECL (orig)
7279 ? !DECL_WEAK (SYMBOL_REF_DECL (orig)) : 1)))
7280 && NEED_GOT_RELOC
7281 && arm_pic_data_is_text_relative)
7282 insn = arm_pic_static_addr (orig, reg);
7283 else
7285 rtx pat;
7286 rtx mem;
7288 /* If this function doesn't have a pic register, create one now. */
7289 require_pic_register ();
7291 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
7293 /* Make the MEM as close to a constant as possible. */
7294 mem = SET_SRC (pat);
7295 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
7296 MEM_READONLY_P (mem) = 1;
7297 MEM_NOTRAP_P (mem) = 1;
7299 insn = emit_insn (pat);
7302 /* Put a REG_EQUAL note on this insn, so that it can be optimized
7303 by loop. */
7304 set_unique_reg_note (insn, REG_EQUAL, orig);
7306 return reg;
7308 else if (GET_CODE (orig) == CONST)
7310 rtx base, offset;
7312 if (GET_CODE (XEXP (orig, 0)) == PLUS
7313 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
7314 return orig;
7316 /* Handle the case where we have: const (UNSPEC_TLS). */
7317 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
7318 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
7319 return orig;
7321 /* Handle the case where we have:
7322 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
7323 CONST_INT. */
7324 if (GET_CODE (XEXP (orig, 0)) == PLUS
7325 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
7326 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
7328 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
7329 return orig;
7332 if (reg == 0)
7334 gcc_assert (can_create_pseudo_p ());
7335 reg = gen_reg_rtx (Pmode);
7338 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
7340 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
7341 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
7342 base == reg ? 0 : reg);
7344 if (CONST_INT_P (offset))
7346 /* The base register doesn't really matter, we only want to
7347 test the index for the appropriate mode. */
7348 if (!arm_legitimate_index_p (mode, offset, SET, 0))
7350 gcc_assert (can_create_pseudo_p ());
7351 offset = force_reg (Pmode, offset);
7354 if (CONST_INT_P (offset))
7355 return plus_constant (Pmode, base, INTVAL (offset));
7358 if (GET_MODE_SIZE (mode) > 4
7359 && (GET_MODE_CLASS (mode) == MODE_INT
7360 || TARGET_SOFT_FLOAT))
7362 emit_insn (gen_addsi3 (reg, base, offset));
7363 return reg;
7366 return gen_rtx_PLUS (Pmode, base, offset);
7369 return orig;
7373 /* Find a spare register to use during the prolog of a function. */
7375 static int
7376 thumb_find_work_register (unsigned long pushed_regs_mask)
7378 int reg;
7380 /* Check the argument registers first as these are call-used. The
7381 register allocation order means that sometimes r3 might be used
7382 but earlier argument registers might not, so check them all. */
7383 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
7384 if (!df_regs_ever_live_p (reg))
7385 return reg;
7387 /* Before going on to check the call-saved registers we can try a couple
7388 more ways of deducing that r3 is available. The first is when we are
7389 pushing anonymous arguments onto the stack and we have less than 4
7390 registers worth of fixed arguments(*). In this case r3 will be part of
7391 the variable argument list and so we can be sure that it will be
7392 pushed right at the start of the function. Hence it will be available
7393 for the rest of the prologue.
7394 (*): ie crtl->args.pretend_args_size is greater than 0. */
7395 if (cfun->machine->uses_anonymous_args
7396 && crtl->args.pretend_args_size > 0)
7397 return LAST_ARG_REGNUM;
7399 /* The other case is when we have fixed arguments but less than 4 registers
7400 worth. In this case r3 might be used in the body of the function, but
7401 it is not being used to convey an argument into the function. In theory
7402 we could just check crtl->args.size to see how many bytes are
7403 being passed in argument registers, but it seems that it is unreliable.
7404 Sometimes it will have the value 0 when in fact arguments are being
7405 passed. (See testcase execute/20021111-1.c for an example). So we also
7406 check the args_info.nregs field as well. The problem with this field is
7407 that it makes no allowances for arguments that are passed to the
7408 function but which are not used. Hence we could miss an opportunity
7409 when a function has an unused argument in r3. But it is better to be
7410 safe than to be sorry. */
7411 if (! cfun->machine->uses_anonymous_args
7412 && crtl->args.size >= 0
7413 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
7414 && (TARGET_AAPCS_BASED
7415 ? crtl->args.info.aapcs_ncrn < 4
7416 : crtl->args.info.nregs < 4))
7417 return LAST_ARG_REGNUM;
7419 /* Otherwise look for a call-saved register that is going to be pushed. */
7420 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
7421 if (pushed_regs_mask & (1 << reg))
7422 return reg;
7424 if (TARGET_THUMB2)
7426 /* Thumb-2 can use high regs. */
7427 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
7428 if (pushed_regs_mask & (1 << reg))
7429 return reg;
7431 /* Something went wrong - thumb_compute_save_reg_mask()
7432 should have arranged for a suitable register to be pushed. */
7433 gcc_unreachable ();
7436 static GTY(()) int pic_labelno;
7438 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
7439 low register. */
7441 void
7442 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
7444 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
7446 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
7447 return;
7449 gcc_assert (flag_pic);
7451 pic_reg = cfun->machine->pic_reg;
7452 if (TARGET_VXWORKS_RTP)
7454 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
7455 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7456 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
7458 emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
7460 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7461 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
7463 else
7465 /* We use an UNSPEC rather than a LABEL_REF because this label
7466 never appears in the code stream. */
7468 labelno = GEN_INT (pic_labelno++);
7469 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7470 l1 = gen_rtx_CONST (VOIDmode, l1);
7472 /* On the ARM the PC register contains 'dot + 8' at the time of the
7473 addition, on the Thumb it is 'dot + 4'. */
7474 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7475 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
7476 UNSPEC_GOTSYM_OFF);
7477 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7479 if (TARGET_32BIT)
7481 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7483 else /* TARGET_THUMB1 */
7485 if (arm_pic_register != INVALID_REGNUM
7486 && REGNO (pic_reg) > LAST_LO_REGNUM)
7488 /* We will have pushed the pic register, so we should always be
7489 able to find a work register. */
7490 pic_tmp = gen_rtx_REG (SImode,
7491 thumb_find_work_register (saved_regs));
7492 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
7493 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
7494 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
7496 else if (arm_pic_register != INVALID_REGNUM
7497 && arm_pic_register > LAST_LO_REGNUM
7498 && REGNO (pic_reg) <= LAST_LO_REGNUM)
7500 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7501 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
7502 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
7504 else
7505 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7509 /* Need to emit this whether or not we obey regdecls,
7510 since setjmp/longjmp can cause life info to screw up. */
7511 emit_use (pic_reg);
7514 /* Generate code to load the address of a static var when flag_pic is set. */
7515 static rtx_insn *
7516 arm_pic_static_addr (rtx orig, rtx reg)
7518 rtx l1, labelno, offset_rtx;
7520 gcc_assert (flag_pic);
7522 /* We use an UNSPEC rather than a LABEL_REF because this label
7523 never appears in the code stream. */
7524 labelno = GEN_INT (pic_labelno++);
7525 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7526 l1 = gen_rtx_CONST (VOIDmode, l1);
7528 /* On the ARM the PC register contains 'dot + 8' at the time of the
7529 addition, on the Thumb it is 'dot + 4'. */
7530 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7531 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
7532 UNSPEC_SYMBOL_OFFSET);
7533 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
7535 return emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
7538 /* Return nonzero if X is valid as an ARM state addressing register. */
7539 static int
7540 arm_address_register_rtx_p (rtx x, int strict_p)
7542 int regno;
7544 if (!REG_P (x))
7545 return 0;
7547 regno = REGNO (x);
7549 if (strict_p)
7550 return ARM_REGNO_OK_FOR_BASE_P (regno);
7552 return (regno <= LAST_ARM_REGNUM
7553 || regno >= FIRST_PSEUDO_REGISTER
7554 || regno == FRAME_POINTER_REGNUM
7555 || regno == ARG_POINTER_REGNUM);
7558 /* Return TRUE if this rtx is the difference of a symbol and a label,
7559 and will reduce to a PC-relative relocation in the object file.
7560 Expressions like this can be left alone when generating PIC, rather
7561 than forced through the GOT. */
7562 static int
7563 pcrel_constant_p (rtx x)
7565 if (GET_CODE (x) == MINUS)
7566 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
7568 return FALSE;
7571 /* Return true if X will surely end up in an index register after next
7572 splitting pass. */
7573 static bool
7574 will_be_in_index_register (const_rtx x)
7576 /* arm.md: calculate_pic_address will split this into a register. */
7577 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
7580 /* Return nonzero if X is a valid ARM state address operand. */
7582 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
7583 int strict_p)
7585 bool use_ldrd;
7586 enum rtx_code code = GET_CODE (x);
7588 if (arm_address_register_rtx_p (x, strict_p))
7589 return 1;
7591 use_ldrd = (TARGET_LDRD
7592 && (mode == DImode || mode == DFmode));
7594 if (code == POST_INC || code == PRE_DEC
7595 || ((code == PRE_INC || code == POST_DEC)
7596 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7597 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7599 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7600 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7601 && GET_CODE (XEXP (x, 1)) == PLUS
7602 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7604 rtx addend = XEXP (XEXP (x, 1), 1);
7606 /* Don't allow ldrd post increment by register because it's hard
7607 to fixup invalid register choices. */
7608 if (use_ldrd
7609 && GET_CODE (x) == POST_MODIFY
7610 && REG_P (addend))
7611 return 0;
7613 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
7614 && arm_legitimate_index_p (mode, addend, outer, strict_p));
7617 /* After reload constants split into minipools will have addresses
7618 from a LABEL_REF. */
7619 else if (reload_completed
7620 && (code == LABEL_REF
7621 || (code == CONST
7622 && GET_CODE (XEXP (x, 0)) == PLUS
7623 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7624 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7625 return 1;
7627 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7628 return 0;
7630 else if (code == PLUS)
7632 rtx xop0 = XEXP (x, 0);
7633 rtx xop1 = XEXP (x, 1);
7635 return ((arm_address_register_rtx_p (xop0, strict_p)
7636 && ((CONST_INT_P (xop1)
7637 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
7638 || (!strict_p && will_be_in_index_register (xop1))))
7639 || (arm_address_register_rtx_p (xop1, strict_p)
7640 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
7643 #if 0
7644 /* Reload currently can't handle MINUS, so disable this for now */
7645 else if (GET_CODE (x) == MINUS)
7647 rtx xop0 = XEXP (x, 0);
7648 rtx xop1 = XEXP (x, 1);
7650 return (arm_address_register_rtx_p (xop0, strict_p)
7651 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
7653 #endif
7655 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7656 && code == SYMBOL_REF
7657 && CONSTANT_POOL_ADDRESS_P (x)
7658 && ! (flag_pic
7659 && symbol_mentioned_p (get_pool_constant (x))
7660 && ! pcrel_constant_p (get_pool_constant (x))))
7661 return 1;
7663 return 0;
7666 /* Return nonzero if X is a valid Thumb-2 address operand. */
7667 static int
7668 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7670 bool use_ldrd;
7671 enum rtx_code code = GET_CODE (x);
7673 if (arm_address_register_rtx_p (x, strict_p))
7674 return 1;
7676 use_ldrd = (TARGET_LDRD
7677 && (mode == DImode || mode == DFmode));
7679 if (code == POST_INC || code == PRE_DEC
7680 || ((code == PRE_INC || code == POST_DEC)
7681 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7682 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7684 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7685 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7686 && GET_CODE (XEXP (x, 1)) == PLUS
7687 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7689 /* Thumb-2 only has autoincrement by constant. */
7690 rtx addend = XEXP (XEXP (x, 1), 1);
7691 HOST_WIDE_INT offset;
7693 if (!CONST_INT_P (addend))
7694 return 0;
7696 offset = INTVAL(addend);
7697 if (GET_MODE_SIZE (mode) <= 4)
7698 return (offset > -256 && offset < 256);
7700 return (use_ldrd && offset > -1024 && offset < 1024
7701 && (offset & 3) == 0);
7704 /* After reload constants split into minipools will have addresses
7705 from a LABEL_REF. */
7706 else if (reload_completed
7707 && (code == LABEL_REF
7708 || (code == CONST
7709 && GET_CODE (XEXP (x, 0)) == PLUS
7710 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7711 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7712 return 1;
7714 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7715 return 0;
7717 else if (code == PLUS)
7719 rtx xop0 = XEXP (x, 0);
7720 rtx xop1 = XEXP (x, 1);
7722 return ((arm_address_register_rtx_p (xop0, strict_p)
7723 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
7724 || (!strict_p && will_be_in_index_register (xop1))))
7725 || (arm_address_register_rtx_p (xop1, strict_p)
7726 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
7729 /* Normally we can assign constant values to target registers without
7730 the help of constant pool. But there are cases we have to use constant
7731 pool like:
7732 1) assign a label to register.
7733 2) sign-extend a 8bit value to 32bit and then assign to register.
7735 Constant pool access in format:
7736 (set (reg r0) (mem (symbol_ref (".LC0"))))
7737 will cause the use of literal pool (later in function arm_reorg).
7738 So here we mark such format as an invalid format, then the compiler
7739 will adjust it into:
7740 (set (reg r0) (symbol_ref (".LC0")))
7741 (set (reg r0) (mem (reg r0))).
7742 No extra register is required, and (mem (reg r0)) won't cause the use
7743 of literal pools. */
7744 else if (arm_disable_literal_pool && code == SYMBOL_REF
7745 && CONSTANT_POOL_ADDRESS_P (x))
7746 return 0;
7748 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7749 && code == SYMBOL_REF
7750 && CONSTANT_POOL_ADDRESS_P (x)
7751 && ! (flag_pic
7752 && symbol_mentioned_p (get_pool_constant (x))
7753 && ! pcrel_constant_p (get_pool_constant (x))))
7754 return 1;
7756 return 0;
7759 /* Return nonzero if INDEX is valid for an address index operand in
7760 ARM state. */
7761 static int
7762 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
7763 int strict_p)
7765 HOST_WIDE_INT range;
7766 enum rtx_code code = GET_CODE (index);
7768 /* Standard coprocessor addressing modes. */
7769 if (TARGET_HARD_FLOAT
7770 && (mode == SFmode || mode == DFmode))
7771 return (code == CONST_INT && INTVAL (index) < 1024
7772 && INTVAL (index) > -1024
7773 && (INTVAL (index) & 3) == 0);
7775 /* For quad modes, we restrict the constant offset to be slightly less
7776 than what the instruction format permits. We do this because for
7777 quad mode moves, we will actually decompose them into two separate
7778 double-mode reads or writes. INDEX must therefore be a valid
7779 (double-mode) offset and so should INDEX+8. */
7780 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7781 return (code == CONST_INT
7782 && INTVAL (index) < 1016
7783 && INTVAL (index) > -1024
7784 && (INTVAL (index) & 3) == 0);
7786 /* We have no such constraint on double mode offsets, so we permit the
7787 full range of the instruction format. */
7788 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7789 return (code == CONST_INT
7790 && INTVAL (index) < 1024
7791 && INTVAL (index) > -1024
7792 && (INTVAL (index) & 3) == 0);
7794 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7795 return (code == CONST_INT
7796 && INTVAL (index) < 1024
7797 && INTVAL (index) > -1024
7798 && (INTVAL (index) & 3) == 0);
7800 if (arm_address_register_rtx_p (index, strict_p)
7801 && (GET_MODE_SIZE (mode) <= 4))
7802 return 1;
7804 if (mode == DImode || mode == DFmode)
7806 if (code == CONST_INT)
7808 HOST_WIDE_INT val = INTVAL (index);
7810 if (TARGET_LDRD)
7811 return val > -256 && val < 256;
7812 else
7813 return val > -4096 && val < 4092;
7816 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
7819 if (GET_MODE_SIZE (mode) <= 4
7820 && ! (arm_arch4
7821 && (mode == HImode
7822 || mode == HFmode
7823 || (mode == QImode && outer == SIGN_EXTEND))))
7825 if (code == MULT)
7827 rtx xiop0 = XEXP (index, 0);
7828 rtx xiop1 = XEXP (index, 1);
7830 return ((arm_address_register_rtx_p (xiop0, strict_p)
7831 && power_of_two_operand (xiop1, SImode))
7832 || (arm_address_register_rtx_p (xiop1, strict_p)
7833 && power_of_two_operand (xiop0, SImode)));
7835 else if (code == LSHIFTRT || code == ASHIFTRT
7836 || code == ASHIFT || code == ROTATERT)
7838 rtx op = XEXP (index, 1);
7840 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7841 && CONST_INT_P (op)
7842 && INTVAL (op) > 0
7843 && INTVAL (op) <= 31);
7847 /* For ARM v4 we may be doing a sign-extend operation during the
7848 load. */
7849 if (arm_arch4)
7851 if (mode == HImode
7852 || mode == HFmode
7853 || (outer == SIGN_EXTEND && mode == QImode))
7854 range = 256;
7855 else
7856 range = 4096;
7858 else
7859 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
7861 return (code == CONST_INT
7862 && INTVAL (index) < range
7863 && INTVAL (index) > -range);
7866 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7867 index operand. i.e. 1, 2, 4 or 8. */
7868 static bool
7869 thumb2_index_mul_operand (rtx op)
7871 HOST_WIDE_INT val;
7873 if (!CONST_INT_P (op))
7874 return false;
7876 val = INTVAL(op);
7877 return (val == 1 || val == 2 || val == 4 || val == 8);
7880 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
7881 static int
7882 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
7884 enum rtx_code code = GET_CODE (index);
7886 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
7887 /* Standard coprocessor addressing modes. */
7888 if (TARGET_HARD_FLOAT
7889 && (mode == SFmode || mode == DFmode))
7890 return (code == CONST_INT && INTVAL (index) < 1024
7891 /* Thumb-2 allows only > -256 index range for it's core register
7892 load/stores. Since we allow SF/DF in core registers, we have
7893 to use the intersection between -256~4096 (core) and -1024~1024
7894 (coprocessor). */
7895 && INTVAL (index) > -256
7896 && (INTVAL (index) & 3) == 0);
7898 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7900 /* For DImode assume values will usually live in core regs
7901 and only allow LDRD addressing modes. */
7902 if (!TARGET_LDRD || mode != DImode)
7903 return (code == CONST_INT
7904 && INTVAL (index) < 1024
7905 && INTVAL (index) > -1024
7906 && (INTVAL (index) & 3) == 0);
7909 /* For quad modes, we restrict the constant offset to be slightly less
7910 than what the instruction format permits. We do this because for
7911 quad mode moves, we will actually decompose them into two separate
7912 double-mode reads or writes. INDEX must therefore be a valid
7913 (double-mode) offset and so should INDEX+8. */
7914 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7915 return (code == CONST_INT
7916 && INTVAL (index) < 1016
7917 && INTVAL (index) > -1024
7918 && (INTVAL (index) & 3) == 0);
7920 /* We have no such constraint on double mode offsets, so we permit the
7921 full range of the instruction format. */
7922 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7923 return (code == CONST_INT
7924 && INTVAL (index) < 1024
7925 && INTVAL (index) > -1024
7926 && (INTVAL (index) & 3) == 0);
7928 if (arm_address_register_rtx_p (index, strict_p)
7929 && (GET_MODE_SIZE (mode) <= 4))
7930 return 1;
7932 if (mode == DImode || mode == DFmode)
7934 if (code == CONST_INT)
7936 HOST_WIDE_INT val = INTVAL (index);
7937 /* ??? Can we assume ldrd for thumb2? */
7938 /* Thumb-2 ldrd only has reg+const addressing modes. */
7939 /* ldrd supports offsets of +-1020.
7940 However the ldr fallback does not. */
7941 return val > -256 && val < 256 && (val & 3) == 0;
7943 else
7944 return 0;
7947 if (code == MULT)
7949 rtx xiop0 = XEXP (index, 0);
7950 rtx xiop1 = XEXP (index, 1);
7952 return ((arm_address_register_rtx_p (xiop0, strict_p)
7953 && thumb2_index_mul_operand (xiop1))
7954 || (arm_address_register_rtx_p (xiop1, strict_p)
7955 && thumb2_index_mul_operand (xiop0)));
7957 else if (code == ASHIFT)
7959 rtx op = XEXP (index, 1);
7961 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7962 && CONST_INT_P (op)
7963 && INTVAL (op) > 0
7964 && INTVAL (op) <= 3);
7967 return (code == CONST_INT
7968 && INTVAL (index) < 4096
7969 && INTVAL (index) > -256);
7972 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
7973 static int
7974 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
7976 int regno;
7978 if (!REG_P (x))
7979 return 0;
7981 regno = REGNO (x);
7983 if (strict_p)
7984 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
7986 return (regno <= LAST_LO_REGNUM
7987 || regno > LAST_VIRTUAL_REGISTER
7988 || regno == FRAME_POINTER_REGNUM
7989 || (GET_MODE_SIZE (mode) >= 4
7990 && (regno == STACK_POINTER_REGNUM
7991 || regno >= FIRST_PSEUDO_REGISTER
7992 || x == hard_frame_pointer_rtx
7993 || x == arg_pointer_rtx)));
7996 /* Return nonzero if x is a legitimate index register. This is the case
7997 for any base register that can access a QImode object. */
7998 inline static int
7999 thumb1_index_register_rtx_p (rtx x, int strict_p)
8001 return thumb1_base_register_rtx_p (x, QImode, strict_p);
8004 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8006 The AP may be eliminated to either the SP or the FP, so we use the
8007 least common denominator, e.g. SImode, and offsets from 0 to 64.
8009 ??? Verify whether the above is the right approach.
8011 ??? Also, the FP may be eliminated to the SP, so perhaps that
8012 needs special handling also.
8014 ??? Look at how the mips16 port solves this problem. It probably uses
8015 better ways to solve some of these problems.
8017 Although it is not incorrect, we don't accept QImode and HImode
8018 addresses based on the frame pointer or arg pointer until the
8019 reload pass starts. This is so that eliminating such addresses
8020 into stack based ones won't produce impossible code. */
8022 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8024 /* ??? Not clear if this is right. Experiment. */
8025 if (GET_MODE_SIZE (mode) < 4
8026 && !(reload_in_progress || reload_completed)
8027 && (reg_mentioned_p (frame_pointer_rtx, x)
8028 || reg_mentioned_p (arg_pointer_rtx, x)
8029 || reg_mentioned_p (virtual_incoming_args_rtx, x)
8030 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
8031 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
8032 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
8033 return 0;
8035 /* Accept any base register. SP only in SImode or larger. */
8036 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
8037 return 1;
8039 /* This is PC relative data before arm_reorg runs. */
8040 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
8041 && GET_CODE (x) == SYMBOL_REF
8042 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
8043 return 1;
8045 /* This is PC relative data after arm_reorg runs. */
8046 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
8047 && reload_completed
8048 && (GET_CODE (x) == LABEL_REF
8049 || (GET_CODE (x) == CONST
8050 && GET_CODE (XEXP (x, 0)) == PLUS
8051 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8052 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8053 return 1;
8055 /* Post-inc indexing only supported for SImode and larger. */
8056 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
8057 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
8058 return 1;
8060 else if (GET_CODE (x) == PLUS)
8062 /* REG+REG address can be any two index registers. */
8063 /* We disallow FRAME+REG addressing since we know that FRAME
8064 will be replaced with STACK, and SP relative addressing only
8065 permits SP+OFFSET. */
8066 if (GET_MODE_SIZE (mode) <= 4
8067 && XEXP (x, 0) != frame_pointer_rtx
8068 && XEXP (x, 1) != frame_pointer_rtx
8069 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8070 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
8071 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
8072 return 1;
8074 /* REG+const has 5-7 bit offset for non-SP registers. */
8075 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8076 || XEXP (x, 0) == arg_pointer_rtx)
8077 && CONST_INT_P (XEXP (x, 1))
8078 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
8079 return 1;
8081 /* REG+const has 10-bit offset for SP, but only SImode and
8082 larger is supported. */
8083 /* ??? Should probably check for DI/DFmode overflow here
8084 just like GO_IF_LEGITIMATE_OFFSET does. */
8085 else if (REG_P (XEXP (x, 0))
8086 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
8087 && GET_MODE_SIZE (mode) >= 4
8088 && CONST_INT_P (XEXP (x, 1))
8089 && INTVAL (XEXP (x, 1)) >= 0
8090 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
8091 && (INTVAL (XEXP (x, 1)) & 3) == 0)
8092 return 1;
8094 else if (REG_P (XEXP (x, 0))
8095 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
8096 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
8097 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
8098 && REGNO (XEXP (x, 0))
8099 <= LAST_VIRTUAL_POINTER_REGISTER))
8100 && GET_MODE_SIZE (mode) >= 4
8101 && CONST_INT_P (XEXP (x, 1))
8102 && (INTVAL (XEXP (x, 1)) & 3) == 0)
8103 return 1;
8106 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8107 && GET_MODE_SIZE (mode) == 4
8108 && GET_CODE (x) == SYMBOL_REF
8109 && CONSTANT_POOL_ADDRESS_P (x)
8110 && ! (flag_pic
8111 && symbol_mentioned_p (get_pool_constant (x))
8112 && ! pcrel_constant_p (get_pool_constant (x))))
8113 return 1;
8115 return 0;
8118 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
8119 instruction of mode MODE. */
8121 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
8123 switch (GET_MODE_SIZE (mode))
8125 case 1:
8126 return val >= 0 && val < 32;
8128 case 2:
8129 return val >= 0 && val < 64 && (val & 1) == 0;
8131 default:
8132 return (val >= 0
8133 && (val + GET_MODE_SIZE (mode)) <= 128
8134 && (val & 3) == 0);
8138 bool
8139 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
8141 if (TARGET_ARM)
8142 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
8143 else if (TARGET_THUMB2)
8144 return thumb2_legitimate_address_p (mode, x, strict_p);
8145 else /* if (TARGET_THUMB1) */
8146 return thumb1_legitimate_address_p (mode, x, strict_p);
8149 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
8151 Given an rtx X being reloaded into a reg required to be
8152 in class CLASS, return the class of reg to actually use.
8153 In general this is just CLASS, but for the Thumb core registers and
8154 immediate constants we prefer a LO_REGS class or a subset. */
8156 static reg_class_t
8157 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
8159 if (TARGET_32BIT)
8160 return rclass;
8161 else
8163 if (rclass == GENERAL_REGS)
8164 return LO_REGS;
8165 else
8166 return rclass;
8170 /* Build the SYMBOL_REF for __tls_get_addr. */
8172 static GTY(()) rtx tls_get_addr_libfunc;
8174 static rtx
8175 get_tls_get_addr (void)
8177 if (!tls_get_addr_libfunc)
8178 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
8179 return tls_get_addr_libfunc;
8183 arm_load_tp (rtx target)
8185 if (!target)
8186 target = gen_reg_rtx (SImode);
8188 if (TARGET_HARD_TP)
8190 /* Can return in any reg. */
8191 emit_insn (gen_load_tp_hard (target));
8193 else
8195 /* Always returned in r0. Immediately copy the result into a pseudo,
8196 otherwise other uses of r0 (e.g. setting up function arguments) may
8197 clobber the value. */
8199 rtx tmp;
8201 emit_insn (gen_load_tp_soft ());
8203 tmp = gen_rtx_REG (SImode, R0_REGNUM);
8204 emit_move_insn (target, tmp);
8206 return target;
8209 static rtx
8210 load_tls_operand (rtx x, rtx reg)
8212 rtx tmp;
8214 if (reg == NULL_RTX)
8215 reg = gen_reg_rtx (SImode);
8217 tmp = gen_rtx_CONST (SImode, x);
8219 emit_move_insn (reg, tmp);
8221 return reg;
8224 static rtx_insn *
8225 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
8227 rtx label, labelno, sum;
8229 gcc_assert (reloc != TLS_DESCSEQ);
8230 start_sequence ();
8232 labelno = GEN_INT (pic_labelno++);
8233 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8234 label = gen_rtx_CONST (VOIDmode, label);
8236 sum = gen_rtx_UNSPEC (Pmode,
8237 gen_rtvec (4, x, GEN_INT (reloc), label,
8238 GEN_INT (TARGET_ARM ? 8 : 4)),
8239 UNSPEC_TLS);
8240 reg = load_tls_operand (sum, reg);
8242 if (TARGET_ARM)
8243 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
8244 else
8245 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8247 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
8248 LCT_PURE, /* LCT_CONST? */
8249 Pmode, 1, reg, Pmode);
8251 rtx_insn *insns = get_insns ();
8252 end_sequence ();
8254 return insns;
8257 static rtx
8258 arm_tls_descseq_addr (rtx x, rtx reg)
8260 rtx labelno = GEN_INT (pic_labelno++);
8261 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8262 rtx sum = gen_rtx_UNSPEC (Pmode,
8263 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
8264 gen_rtx_CONST (VOIDmode, label),
8265 GEN_INT (!TARGET_ARM)),
8266 UNSPEC_TLS);
8267 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
8269 emit_insn (gen_tlscall (x, labelno));
8270 if (!reg)
8271 reg = gen_reg_rtx (SImode);
8272 else
8273 gcc_assert (REGNO (reg) != R0_REGNUM);
8275 emit_move_insn (reg, reg0);
8277 return reg;
8281 legitimize_tls_address (rtx x, rtx reg)
8283 rtx dest, tp, label, labelno, sum, ret, eqv, addend;
8284 rtx_insn *insns;
8285 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
8287 switch (model)
8289 case TLS_MODEL_GLOBAL_DYNAMIC:
8290 if (TARGET_GNU2_TLS)
8292 reg = arm_tls_descseq_addr (x, reg);
8294 tp = arm_load_tp (NULL_RTX);
8296 dest = gen_rtx_PLUS (Pmode, tp, reg);
8298 else
8300 /* Original scheme */
8301 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
8302 dest = gen_reg_rtx (Pmode);
8303 emit_libcall_block (insns, dest, ret, x);
8305 return dest;
8307 case TLS_MODEL_LOCAL_DYNAMIC:
8308 if (TARGET_GNU2_TLS)
8310 reg = arm_tls_descseq_addr (x, reg);
8312 tp = arm_load_tp (NULL_RTX);
8314 dest = gen_rtx_PLUS (Pmode, tp, reg);
8316 else
8318 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
8320 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
8321 share the LDM result with other LD model accesses. */
8322 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
8323 UNSPEC_TLS);
8324 dest = gen_reg_rtx (Pmode);
8325 emit_libcall_block (insns, dest, ret, eqv);
8327 /* Load the addend. */
8328 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
8329 GEN_INT (TLS_LDO32)),
8330 UNSPEC_TLS);
8331 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
8332 dest = gen_rtx_PLUS (Pmode, dest, addend);
8334 return dest;
8336 case TLS_MODEL_INITIAL_EXEC:
8337 labelno = GEN_INT (pic_labelno++);
8338 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8339 label = gen_rtx_CONST (VOIDmode, label);
8340 sum = gen_rtx_UNSPEC (Pmode,
8341 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
8342 GEN_INT (TARGET_ARM ? 8 : 4)),
8343 UNSPEC_TLS);
8344 reg = load_tls_operand (sum, reg);
8346 if (TARGET_ARM)
8347 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
8348 else if (TARGET_THUMB2)
8349 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
8350 else
8352 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8353 emit_move_insn (reg, gen_const_mem (SImode, reg));
8356 tp = arm_load_tp (NULL_RTX);
8358 return gen_rtx_PLUS (Pmode, tp, reg);
8360 case TLS_MODEL_LOCAL_EXEC:
8361 tp = arm_load_tp (NULL_RTX);
8363 reg = gen_rtx_UNSPEC (Pmode,
8364 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
8365 UNSPEC_TLS);
8366 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
8368 return gen_rtx_PLUS (Pmode, tp, reg);
8370 default:
8371 abort ();
8375 /* Try machine-dependent ways of modifying an illegitimate address
8376 to be legitimate. If we find one, return the new, valid address. */
8378 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8380 if (arm_tls_referenced_p (x))
8382 rtx addend = NULL;
8384 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
8386 addend = XEXP (XEXP (x, 0), 1);
8387 x = XEXP (XEXP (x, 0), 0);
8390 if (GET_CODE (x) != SYMBOL_REF)
8391 return x;
8393 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
8395 x = legitimize_tls_address (x, NULL_RTX);
8397 if (addend)
8399 x = gen_rtx_PLUS (SImode, x, addend);
8400 orig_x = x;
8402 else
8403 return x;
8406 if (!TARGET_ARM)
8408 /* TODO: legitimize_address for Thumb2. */
8409 if (TARGET_THUMB2)
8410 return x;
8411 return thumb_legitimize_address (x, orig_x, mode);
8414 if (GET_CODE (x) == PLUS)
8416 rtx xop0 = XEXP (x, 0);
8417 rtx xop1 = XEXP (x, 1);
8419 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
8420 xop0 = force_reg (SImode, xop0);
8422 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
8423 && !symbol_mentioned_p (xop1))
8424 xop1 = force_reg (SImode, xop1);
8426 if (ARM_BASE_REGISTER_RTX_P (xop0)
8427 && CONST_INT_P (xop1))
8429 HOST_WIDE_INT n, low_n;
8430 rtx base_reg, val;
8431 n = INTVAL (xop1);
8433 /* VFP addressing modes actually allow greater offsets, but for
8434 now we just stick with the lowest common denominator. */
8435 if (mode == DImode || mode == DFmode)
8437 low_n = n & 0x0f;
8438 n &= ~0x0f;
8439 if (low_n > 4)
8441 n += 16;
8442 low_n -= 16;
8445 else
8447 low_n = ((mode) == TImode ? 0
8448 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
8449 n -= low_n;
8452 base_reg = gen_reg_rtx (SImode);
8453 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
8454 emit_move_insn (base_reg, val);
8455 x = plus_constant (Pmode, base_reg, low_n);
8457 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8458 x = gen_rtx_PLUS (SImode, xop0, xop1);
8461 /* XXX We don't allow MINUS any more -- see comment in
8462 arm_legitimate_address_outer_p (). */
8463 else if (GET_CODE (x) == MINUS)
8465 rtx xop0 = XEXP (x, 0);
8466 rtx xop1 = XEXP (x, 1);
8468 if (CONSTANT_P (xop0))
8469 xop0 = force_reg (SImode, xop0);
8471 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
8472 xop1 = force_reg (SImode, xop1);
8474 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8475 x = gen_rtx_MINUS (SImode, xop0, xop1);
8478 /* Make sure to take full advantage of the pre-indexed addressing mode
8479 with absolute addresses which often allows for the base register to
8480 be factorized for multiple adjacent memory references, and it might
8481 even allows for the mini pool to be avoided entirely. */
8482 else if (CONST_INT_P (x) && optimize > 0)
8484 unsigned int bits;
8485 HOST_WIDE_INT mask, base, index;
8486 rtx base_reg;
8488 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
8489 use a 8-bit index. So let's use a 12-bit index for SImode only and
8490 hope that arm_gen_constant will enable ldrb to use more bits. */
8491 bits = (mode == SImode) ? 12 : 8;
8492 mask = (1 << bits) - 1;
8493 base = INTVAL (x) & ~mask;
8494 index = INTVAL (x) & mask;
8495 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
8497 /* It'll most probably be more efficient to generate the base
8498 with more bits set and use a negative index instead. */
8499 base |= mask;
8500 index -= mask;
8502 base_reg = force_reg (SImode, GEN_INT (base));
8503 x = plus_constant (Pmode, base_reg, index);
8506 if (flag_pic)
8508 /* We need to find and carefully transform any SYMBOL and LABEL
8509 references; so go back to the original address expression. */
8510 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8512 if (new_x != orig_x)
8513 x = new_x;
8516 return x;
8520 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8521 to be legitimate. If we find one, return the new, valid address. */
8523 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8525 if (GET_CODE (x) == PLUS
8526 && CONST_INT_P (XEXP (x, 1))
8527 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
8528 || INTVAL (XEXP (x, 1)) < 0))
8530 rtx xop0 = XEXP (x, 0);
8531 rtx xop1 = XEXP (x, 1);
8532 HOST_WIDE_INT offset = INTVAL (xop1);
8534 /* Try and fold the offset into a biasing of the base register and
8535 then offsetting that. Don't do this when optimizing for space
8536 since it can cause too many CSEs. */
8537 if (optimize_size && offset >= 0
8538 && offset < 256 + 31 * GET_MODE_SIZE (mode))
8540 HOST_WIDE_INT delta;
8542 if (offset >= 256)
8543 delta = offset - (256 - GET_MODE_SIZE (mode));
8544 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
8545 delta = 31 * GET_MODE_SIZE (mode);
8546 else
8547 delta = offset & (~31 * GET_MODE_SIZE (mode));
8549 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
8550 NULL_RTX);
8551 x = plus_constant (Pmode, xop0, delta);
8553 else if (offset < 0 && offset > -256)
8554 /* Small negative offsets are best done with a subtract before the
8555 dereference, forcing these into a register normally takes two
8556 instructions. */
8557 x = force_operand (x, NULL_RTX);
8558 else
8560 /* For the remaining cases, force the constant into a register. */
8561 xop1 = force_reg (SImode, xop1);
8562 x = gen_rtx_PLUS (SImode, xop0, xop1);
8565 else if (GET_CODE (x) == PLUS
8566 && s_register_operand (XEXP (x, 1), SImode)
8567 && !s_register_operand (XEXP (x, 0), SImode))
8569 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
8571 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
8574 if (flag_pic)
8576 /* We need to find and carefully transform any SYMBOL and LABEL
8577 references; so go back to the original address expression. */
8578 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8580 if (new_x != orig_x)
8581 x = new_x;
8584 return x;
8587 /* Return TRUE if X contains any TLS symbol references. */
8589 bool
8590 arm_tls_referenced_p (rtx x)
8592 if (! TARGET_HAVE_TLS)
8593 return false;
8595 subrtx_iterator::array_type array;
8596 FOR_EACH_SUBRTX (iter, array, x, ALL)
8598 const_rtx x = *iter;
8599 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
8600 return true;
8602 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8603 TLS offsets, not real symbol references. */
8604 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8605 iter.skip_subrtxes ();
8607 return false;
8610 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8612 On the ARM, allow any integer (invalid ones are removed later by insn
8613 patterns), nice doubles and symbol_refs which refer to the function's
8614 constant pool XXX.
8616 When generating pic allow anything. */
8618 static bool
8619 arm_legitimate_constant_p_1 (machine_mode, rtx x)
8621 return flag_pic || !label_mentioned_p (x);
8624 static bool
8625 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8627 /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
8628 RTX. These RTX must therefore be allowed for Thumb-1 so that when run
8629 for ARMv8-M Baseline or later the result is valid. */
8630 if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
8631 x = XEXP (x, 0);
8633 return (CONST_INT_P (x)
8634 || CONST_DOUBLE_P (x)
8635 || CONSTANT_ADDRESS_P (x)
8636 || flag_pic);
8639 static bool
8640 arm_legitimate_constant_p (machine_mode mode, rtx x)
8642 return (!arm_cannot_force_const_mem (mode, x)
8643 && (TARGET_32BIT
8644 ? arm_legitimate_constant_p_1 (mode, x)
8645 : thumb_legitimate_constant_p (mode, x)));
8648 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8650 static bool
8651 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8653 rtx base, offset;
8655 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8657 split_const (x, &base, &offset);
8658 if (GET_CODE (base) == SYMBOL_REF
8659 && !offset_within_block_p (base, INTVAL (offset)))
8660 return true;
8662 return arm_tls_referenced_p (x);
8665 #define REG_OR_SUBREG_REG(X) \
8666 (REG_P (X) \
8667 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8669 #define REG_OR_SUBREG_RTX(X) \
8670 (REG_P (X) ? (X) : SUBREG_REG (X))
8672 static inline int
8673 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8675 machine_mode mode = GET_MODE (x);
8676 int total, words;
8678 switch (code)
8680 case ASHIFT:
8681 case ASHIFTRT:
8682 case LSHIFTRT:
8683 case ROTATERT:
8684 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8686 case PLUS:
8687 case MINUS:
8688 case COMPARE:
8689 case NEG:
8690 case NOT:
8691 return COSTS_N_INSNS (1);
8693 case MULT:
8694 if (CONST_INT_P (XEXP (x, 1)))
8696 int cycles = 0;
8697 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8699 while (i)
8701 i >>= 2;
8702 cycles++;
8704 return COSTS_N_INSNS (2) + cycles;
8706 return COSTS_N_INSNS (1) + 16;
8708 case SET:
8709 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8710 the mode. */
8711 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8712 return (COSTS_N_INSNS (words)
8713 + 4 * ((MEM_P (SET_SRC (x)))
8714 + MEM_P (SET_DEST (x))));
8716 case CONST_INT:
8717 if (outer == SET)
8719 if (UINTVAL (x) < 256
8720 /* 16-bit constant. */
8721 || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
8722 return 0;
8723 if (thumb_shiftable_const (INTVAL (x)))
8724 return COSTS_N_INSNS (2);
8725 return COSTS_N_INSNS (3);
8727 else if ((outer == PLUS || outer == COMPARE)
8728 && INTVAL (x) < 256 && INTVAL (x) > -256)
8729 return 0;
8730 else if ((outer == IOR || outer == XOR || outer == AND)
8731 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8732 return COSTS_N_INSNS (1);
8733 else if (outer == AND)
8735 int i;
8736 /* This duplicates the tests in the andsi3 expander. */
8737 for (i = 9; i <= 31; i++)
8738 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
8739 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
8740 return COSTS_N_INSNS (2);
8742 else if (outer == ASHIFT || outer == ASHIFTRT
8743 || outer == LSHIFTRT)
8744 return 0;
8745 return COSTS_N_INSNS (2);
8747 case CONST:
8748 case CONST_DOUBLE:
8749 case LABEL_REF:
8750 case SYMBOL_REF:
8751 return COSTS_N_INSNS (3);
8753 case UDIV:
8754 case UMOD:
8755 case DIV:
8756 case MOD:
8757 return 100;
8759 case TRUNCATE:
8760 return 99;
8762 case AND:
8763 case XOR:
8764 case IOR:
8765 /* XXX guess. */
8766 return 8;
8768 case MEM:
8769 /* XXX another guess. */
8770 /* Memory costs quite a lot for the first word, but subsequent words
8771 load at the equivalent of a single insn each. */
8772 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8773 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8774 ? 4 : 0));
8776 case IF_THEN_ELSE:
8777 /* XXX a guess. */
8778 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8779 return 14;
8780 return 2;
8782 case SIGN_EXTEND:
8783 case ZERO_EXTEND:
8784 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
8785 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
8787 if (mode == SImode)
8788 return total;
8790 if (arm_arch6)
8791 return total + COSTS_N_INSNS (1);
8793 /* Assume a two-shift sequence. Increase the cost slightly so
8794 we prefer actual shifts over an extend operation. */
8795 return total + 1 + COSTS_N_INSNS (2);
8797 default:
8798 return 99;
8802 /* Estimates the size cost of thumb1 instructions.
8803 For now most of the code is copied from thumb1_rtx_costs. We need more
8804 fine grain tuning when we have more related test cases. */
8805 static inline int
8806 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8808 machine_mode mode = GET_MODE (x);
8809 int words, cost;
8811 switch (code)
8813 case ASHIFT:
8814 case ASHIFTRT:
8815 case LSHIFTRT:
8816 case ROTATERT:
8817 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8819 case PLUS:
8820 case MINUS:
8821 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8822 defined by RTL expansion, especially for the expansion of
8823 multiplication. */
8824 if ((GET_CODE (XEXP (x, 0)) == MULT
8825 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
8826 || (GET_CODE (XEXP (x, 1)) == MULT
8827 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
8828 return COSTS_N_INSNS (2);
8829 /* Fall through. */
8830 case COMPARE:
8831 case NEG:
8832 case NOT:
8833 return COSTS_N_INSNS (1);
8835 case MULT:
8836 if (CONST_INT_P (XEXP (x, 1)))
8838 /* Thumb1 mul instruction can't operate on const. We must Load it
8839 into a register first. */
8840 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
8841 /* For the targets which have a very small and high-latency multiply
8842 unit, we prefer to synthesize the mult with up to 5 instructions,
8843 giving a good balance between size and performance. */
8844 if (arm_arch6m && arm_m_profile_small_mul)
8845 return COSTS_N_INSNS (5);
8846 else
8847 return COSTS_N_INSNS (1) + const_size;
8849 return COSTS_N_INSNS (1);
8851 case SET:
8852 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8853 the mode. */
8854 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8855 cost = COSTS_N_INSNS (words);
8856 if (satisfies_constraint_J (SET_SRC (x))
8857 || satisfies_constraint_K (SET_SRC (x))
8858 /* Too big an immediate for a 2-byte mov, using MOVT. */
8859 || (CONST_INT_P (SET_SRC (x))
8860 && UINTVAL (SET_SRC (x)) >= 256
8861 && TARGET_HAVE_MOVT
8862 && satisfies_constraint_j (SET_SRC (x)))
8863 /* thumb1_movdi_insn. */
8864 || ((words > 1) && MEM_P (SET_SRC (x))))
8865 cost += COSTS_N_INSNS (1);
8866 return cost;
8868 case CONST_INT:
8869 if (outer == SET)
8871 if (UINTVAL (x) < 256)
8872 return COSTS_N_INSNS (1);
8873 /* movw is 4byte long. */
8874 if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
8875 return COSTS_N_INSNS (2);
8876 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
8877 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
8878 return COSTS_N_INSNS (2);
8879 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
8880 if (thumb_shiftable_const (INTVAL (x)))
8881 return COSTS_N_INSNS (2);
8882 return COSTS_N_INSNS (3);
8884 else if ((outer == PLUS || outer == COMPARE)
8885 && INTVAL (x) < 256 && INTVAL (x) > -256)
8886 return 0;
8887 else if ((outer == IOR || outer == XOR || outer == AND)
8888 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8889 return COSTS_N_INSNS (1);
8890 else if (outer == AND)
8892 int i;
8893 /* This duplicates the tests in the andsi3 expander. */
8894 for (i = 9; i <= 31; i++)
8895 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
8896 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
8897 return COSTS_N_INSNS (2);
8899 else if (outer == ASHIFT || outer == ASHIFTRT
8900 || outer == LSHIFTRT)
8901 return 0;
8902 return COSTS_N_INSNS (2);
8904 case CONST:
8905 case CONST_DOUBLE:
8906 case LABEL_REF:
8907 case SYMBOL_REF:
8908 return COSTS_N_INSNS (3);
8910 case UDIV:
8911 case UMOD:
8912 case DIV:
8913 case MOD:
8914 return 100;
8916 case TRUNCATE:
8917 return 99;
8919 case AND:
8920 case XOR:
8921 case IOR:
8922 return COSTS_N_INSNS (1);
8924 case MEM:
8925 return (COSTS_N_INSNS (1)
8926 + COSTS_N_INSNS (1)
8927 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8928 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8929 ? COSTS_N_INSNS (1) : 0));
8931 case IF_THEN_ELSE:
8932 /* XXX a guess. */
8933 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8934 return 14;
8935 return 2;
8937 case ZERO_EXTEND:
8938 /* XXX still guessing. */
8939 switch (GET_MODE (XEXP (x, 0)))
8941 case QImode:
8942 return (1 + (mode == DImode ? 4 : 0)
8943 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8945 case HImode:
8946 return (4 + (mode == DImode ? 4 : 0)
8947 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8949 case SImode:
8950 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8952 default:
8953 return 99;
8956 default:
8957 return 99;
8961 /* Helper function for arm_rtx_costs. If the operand is a valid shift
8962 operand, then return the operand that is being shifted. If the shift
8963 is not by a constant, then set SHIFT_REG to point to the operand.
8964 Return NULL if OP is not a shifter operand. */
8965 static rtx
8966 shifter_op_p (rtx op, rtx *shift_reg)
8968 enum rtx_code code = GET_CODE (op);
8970 if (code == MULT && CONST_INT_P (XEXP (op, 1))
8971 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
8972 return XEXP (op, 0);
8973 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
8974 return XEXP (op, 0);
8975 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
8976 || code == ASHIFTRT)
8978 if (!CONST_INT_P (XEXP (op, 1)))
8979 *shift_reg = XEXP (op, 1);
8980 return XEXP (op, 0);
8983 return NULL;
8986 static bool
8987 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
8989 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
8990 rtx_code code = GET_CODE (x);
8991 gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
8993 switch (XINT (x, 1))
8995 case UNSPEC_UNALIGNED_LOAD:
8996 /* We can only do unaligned loads into the integer unit, and we can't
8997 use LDM or LDRD. */
8998 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
8999 if (speed_p)
9000 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9001 + extra_cost->ldst.load_unaligned);
9003 #ifdef NOT_YET
9004 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9005 ADDR_SPACE_GENERIC, speed_p);
9006 #endif
9007 return true;
9009 case UNSPEC_UNALIGNED_STORE:
9010 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9011 if (speed_p)
9012 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9013 + extra_cost->ldst.store_unaligned);
9015 *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
9016 #ifdef NOT_YET
9017 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9018 ADDR_SPACE_GENERIC, speed_p);
9019 #endif
9020 return true;
9022 case UNSPEC_VRINTZ:
9023 case UNSPEC_VRINTP:
9024 case UNSPEC_VRINTM:
9025 case UNSPEC_VRINTR:
9026 case UNSPEC_VRINTX:
9027 case UNSPEC_VRINTA:
9028 if (speed_p)
9029 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9031 return true;
9032 default:
9033 *cost = COSTS_N_INSNS (2);
9034 break;
9036 return true;
9039 /* Cost of a libcall. We assume one insn per argument, an amount for the
9040 call (one insn for -Os) and then one for processing the result. */
9041 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9043 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9044 do \
9046 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9047 if (shift_op != NULL \
9048 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9050 if (shift_reg) \
9052 if (speed_p) \
9053 *cost += extra_cost->alu.arith_shift_reg; \
9054 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9055 ASHIFT, 1, speed_p); \
9057 else if (speed_p) \
9058 *cost += extra_cost->alu.arith_shift; \
9060 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
9061 ASHIFT, 0, speed_p) \
9062 + rtx_cost (XEXP (x, 1 - IDX), \
9063 GET_MODE (shift_op), \
9064 OP, 1, speed_p)); \
9065 return true; \
9068 while (0);
9070 /* RTX costs. Make an estimate of the cost of executing the operation
9071 X, which is contained with an operation with code OUTER_CODE.
9072 SPEED_P indicates whether the cost desired is the performance cost,
9073 or the size cost. The estimate is stored in COST and the return
9074 value is TRUE if the cost calculation is final, or FALSE if the
9075 caller should recurse through the operands of X to add additional
9076 costs.
9078 We currently make no attempt to model the size savings of Thumb-2
9079 16-bit instructions. At the normal points in compilation where
9080 this code is called we have no measure of whether the condition
9081 flags are live or not, and thus no realistic way to determine what
9082 the size will eventually be. */
9083 static bool
9084 arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
9085 const struct cpu_cost_table *extra_cost,
9086 int *cost, bool speed_p)
9088 machine_mode mode = GET_MODE (x);
9090 *cost = COSTS_N_INSNS (1);
9092 if (TARGET_THUMB1)
9094 if (speed_p)
9095 *cost = thumb1_rtx_costs (x, code, outer_code);
9096 else
9097 *cost = thumb1_size_rtx_costs (x, code, outer_code);
9098 return true;
9101 switch (code)
9103 case SET:
9104 *cost = 0;
9105 /* SET RTXs don't have a mode so we get it from the destination. */
9106 mode = GET_MODE (SET_DEST (x));
9108 if (REG_P (SET_SRC (x))
9109 && REG_P (SET_DEST (x)))
9111 /* Assume that most copies can be done with a single insn,
9112 unless we don't have HW FP, in which case everything
9113 larger than word mode will require two insns. */
9114 *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9115 && GET_MODE_SIZE (mode) > 4)
9116 || mode == DImode)
9117 ? 2 : 1);
9118 /* Conditional register moves can be encoded
9119 in 16 bits in Thumb mode. */
9120 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9121 *cost >>= 1;
9123 return true;
9126 if (CONST_INT_P (SET_SRC (x)))
9128 /* Handle CONST_INT here, since the value doesn't have a mode
9129 and we would otherwise be unable to work out the true cost. */
9130 *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
9131 0, speed_p);
9132 outer_code = SET;
9133 /* Slightly lower the cost of setting a core reg to a constant.
9134 This helps break up chains and allows for better scheduling. */
9135 if (REG_P (SET_DEST (x))
9136 && REGNO (SET_DEST (x)) <= LR_REGNUM)
9137 *cost -= 1;
9138 x = SET_SRC (x);
9139 /* Immediate moves with an immediate in the range [0, 255] can be
9140 encoded in 16 bits in Thumb mode. */
9141 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9142 && INTVAL (x) >= 0 && INTVAL (x) <=255)
9143 *cost >>= 1;
9144 goto const_int_cost;
9147 return false;
9149 case MEM:
9150 /* A memory access costs 1 insn if the mode is small, or the address is
9151 a single register, otherwise it costs one insn per word. */
9152 if (REG_P (XEXP (x, 0)))
9153 *cost = COSTS_N_INSNS (1);
9154 else if (flag_pic
9155 && GET_CODE (XEXP (x, 0)) == PLUS
9156 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9157 /* This will be split into two instructions.
9158 See arm.md:calculate_pic_address. */
9159 *cost = COSTS_N_INSNS (2);
9160 else
9161 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9163 /* For speed optimizations, add the costs of the address and
9164 accessing memory. */
9165 if (speed_p)
9166 #ifdef NOT_YET
9167 *cost += (extra_cost->ldst.load
9168 + arm_address_cost (XEXP (x, 0), mode,
9169 ADDR_SPACE_GENERIC, speed_p));
9170 #else
9171 *cost += extra_cost->ldst.load;
9172 #endif
9173 return true;
9175 case PARALLEL:
9177 /* Calculations of LDM costs are complex. We assume an initial cost
9178 (ldm_1st) which will load the number of registers mentioned in
9179 ldm_regs_per_insn_1st registers; then each additional
9180 ldm_regs_per_insn_subsequent registers cost one more insn. The
9181 formula for N regs is thus:
9183 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9184 + ldm_regs_per_insn_subsequent - 1)
9185 / ldm_regs_per_insn_subsequent).
9187 Additional costs may also be added for addressing. A similar
9188 formula is used for STM. */
9190 bool is_ldm = load_multiple_operation (x, SImode);
9191 bool is_stm = store_multiple_operation (x, SImode);
9193 if (is_ldm || is_stm)
9195 if (speed_p)
9197 HOST_WIDE_INT nregs = XVECLEN (x, 0);
9198 HOST_WIDE_INT regs_per_insn_1st = is_ldm
9199 ? extra_cost->ldst.ldm_regs_per_insn_1st
9200 : extra_cost->ldst.stm_regs_per_insn_1st;
9201 HOST_WIDE_INT regs_per_insn_sub = is_ldm
9202 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9203 : extra_cost->ldst.stm_regs_per_insn_subsequent;
9205 *cost += regs_per_insn_1st
9206 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9207 + regs_per_insn_sub - 1)
9208 / regs_per_insn_sub);
9209 return true;
9213 return false;
9215 case DIV:
9216 case UDIV:
9217 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9218 && (mode == SFmode || !TARGET_VFP_SINGLE))
9219 *cost += COSTS_N_INSNS (speed_p
9220 ? extra_cost->fp[mode != SFmode].div : 0);
9221 else if (mode == SImode && TARGET_IDIV)
9222 *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
9223 else
9224 *cost = LIBCALL_COST (2);
9225 return false; /* All arguments must be in registers. */
9227 case MOD:
9228 /* MOD by a power of 2 can be expanded as:
9229 rsbs r1, r0, #0
9230 and r0, r0, #(n - 1)
9231 and r1, r1, #(n - 1)
9232 rsbpl r0, r1, #0. */
9233 if (CONST_INT_P (XEXP (x, 1))
9234 && exact_log2 (INTVAL (XEXP (x, 1))) > 0
9235 && mode == SImode)
9237 *cost += COSTS_N_INSNS (3);
9239 if (speed_p)
9240 *cost += 2 * extra_cost->alu.logical
9241 + extra_cost->alu.arith;
9242 return true;
9245 /* Fall-through. */
9246 case UMOD:
9247 *cost = LIBCALL_COST (2);
9248 return false; /* All arguments must be in registers. */
9250 case ROTATE:
9251 if (mode == SImode && REG_P (XEXP (x, 1)))
9253 *cost += (COSTS_N_INSNS (1)
9254 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9255 if (speed_p)
9256 *cost += extra_cost->alu.shift_reg;
9257 return true;
9259 /* Fall through */
9260 case ROTATERT:
9261 case ASHIFT:
9262 case LSHIFTRT:
9263 case ASHIFTRT:
9264 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9266 *cost += (COSTS_N_INSNS (2)
9267 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9268 if (speed_p)
9269 *cost += 2 * extra_cost->alu.shift;
9270 return true;
9272 else if (mode == SImode)
9274 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9275 /* Slightly disparage register shifts at -Os, but not by much. */
9276 if (!CONST_INT_P (XEXP (x, 1)))
9277 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9278 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9279 return true;
9281 else if (GET_MODE_CLASS (mode) == MODE_INT
9282 && GET_MODE_SIZE (mode) < 4)
9284 if (code == ASHIFT)
9286 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9287 /* Slightly disparage register shifts at -Os, but not by
9288 much. */
9289 if (!CONST_INT_P (XEXP (x, 1)))
9290 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9291 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9293 else if (code == LSHIFTRT || code == ASHIFTRT)
9295 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9297 /* Can use SBFX/UBFX. */
9298 if (speed_p)
9299 *cost += extra_cost->alu.bfx;
9300 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9302 else
9304 *cost += COSTS_N_INSNS (1);
9305 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9306 if (speed_p)
9308 if (CONST_INT_P (XEXP (x, 1)))
9309 *cost += 2 * extra_cost->alu.shift;
9310 else
9311 *cost += (extra_cost->alu.shift
9312 + extra_cost->alu.shift_reg);
9314 else
9315 /* Slightly disparage register shifts. */
9316 *cost += !CONST_INT_P (XEXP (x, 1));
9319 else /* Rotates. */
9321 *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
9322 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9323 if (speed_p)
9325 if (CONST_INT_P (XEXP (x, 1)))
9326 *cost += (2 * extra_cost->alu.shift
9327 + extra_cost->alu.log_shift);
9328 else
9329 *cost += (extra_cost->alu.shift
9330 + extra_cost->alu.shift_reg
9331 + extra_cost->alu.log_shift_reg);
9334 return true;
9337 *cost = LIBCALL_COST (2);
9338 return false;
9340 case BSWAP:
9341 if (arm_arch6)
9343 if (mode == SImode)
9345 if (speed_p)
9346 *cost += extra_cost->alu.rev;
9348 return false;
9351 else
9353 /* No rev instruction available. Look at arm_legacy_rev
9354 and thumb_legacy_rev for the form of RTL used then. */
9355 if (TARGET_THUMB)
9357 *cost += COSTS_N_INSNS (9);
9359 if (speed_p)
9361 *cost += 6 * extra_cost->alu.shift;
9362 *cost += 3 * extra_cost->alu.logical;
9365 else
9367 *cost += COSTS_N_INSNS (4);
9369 if (speed_p)
9371 *cost += 2 * extra_cost->alu.shift;
9372 *cost += extra_cost->alu.arith_shift;
9373 *cost += 2 * extra_cost->alu.logical;
9376 return true;
9378 return false;
9380 case MINUS:
9381 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9382 && (mode == SFmode || !TARGET_VFP_SINGLE))
9384 if (GET_CODE (XEXP (x, 0)) == MULT
9385 || GET_CODE (XEXP (x, 1)) == MULT)
9387 rtx mul_op0, mul_op1, sub_op;
9389 if (speed_p)
9390 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9392 if (GET_CODE (XEXP (x, 0)) == MULT)
9394 mul_op0 = XEXP (XEXP (x, 0), 0);
9395 mul_op1 = XEXP (XEXP (x, 0), 1);
9396 sub_op = XEXP (x, 1);
9398 else
9400 mul_op0 = XEXP (XEXP (x, 1), 0);
9401 mul_op1 = XEXP (XEXP (x, 1), 1);
9402 sub_op = XEXP (x, 0);
9405 /* The first operand of the multiply may be optionally
9406 negated. */
9407 if (GET_CODE (mul_op0) == NEG)
9408 mul_op0 = XEXP (mul_op0, 0);
9410 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9411 + rtx_cost (mul_op1, mode, code, 0, speed_p)
9412 + rtx_cost (sub_op, mode, code, 0, speed_p));
9414 return true;
9417 if (speed_p)
9418 *cost += extra_cost->fp[mode != SFmode].addsub;
9419 return false;
9422 if (mode == SImode)
9424 rtx shift_by_reg = NULL;
9425 rtx shift_op;
9426 rtx non_shift_op;
9428 shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9429 if (shift_op == NULL)
9431 shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9432 non_shift_op = XEXP (x, 0);
9434 else
9435 non_shift_op = XEXP (x, 1);
9437 if (shift_op != NULL)
9439 if (shift_by_reg != NULL)
9441 if (speed_p)
9442 *cost += extra_cost->alu.arith_shift_reg;
9443 *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
9445 else if (speed_p)
9446 *cost += extra_cost->alu.arith_shift;
9448 *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
9449 *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
9450 return true;
9453 if (arm_arch_thumb2
9454 && GET_CODE (XEXP (x, 1)) == MULT)
9456 /* MLS. */
9457 if (speed_p)
9458 *cost += extra_cost->mult[0].add;
9459 *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
9460 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
9461 *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
9462 return true;
9465 if (CONST_INT_P (XEXP (x, 0)))
9467 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9468 INTVAL (XEXP (x, 0)), NULL_RTX,
9469 NULL_RTX, 1, 0);
9470 *cost = COSTS_N_INSNS (insns);
9471 if (speed_p)
9472 *cost += insns * extra_cost->alu.arith;
9473 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9474 return true;
9476 else if (speed_p)
9477 *cost += extra_cost->alu.arith;
9479 return false;
9482 if (GET_MODE_CLASS (mode) == MODE_INT
9483 && GET_MODE_SIZE (mode) < 4)
9485 rtx shift_op, shift_reg;
9486 shift_reg = NULL;
9488 /* We check both sides of the MINUS for shifter operands since,
9489 unlike PLUS, it's not commutative. */
9491 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
9492 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
9494 /* Slightly disparage, as we might need to widen the result. */
9495 *cost += 1;
9496 if (speed_p)
9497 *cost += extra_cost->alu.arith;
9499 if (CONST_INT_P (XEXP (x, 0)))
9501 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9502 return true;
9505 return false;
9508 if (mode == DImode)
9510 *cost += COSTS_N_INSNS (1);
9512 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9514 rtx op1 = XEXP (x, 1);
9516 if (speed_p)
9517 *cost += 2 * extra_cost->alu.arith;
9519 if (GET_CODE (op1) == ZERO_EXTEND)
9520 *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
9521 0, speed_p);
9522 else
9523 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
9524 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9525 0, speed_p);
9526 return true;
9528 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9530 if (speed_p)
9531 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9532 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
9533 0, speed_p)
9534 + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
9535 return true;
9537 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9538 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9540 if (speed_p)
9541 *cost += (extra_cost->alu.arith
9542 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9543 ? extra_cost->alu.arith
9544 : extra_cost->alu.arith_shift));
9545 *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
9546 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
9547 GET_CODE (XEXP (x, 1)), 0, speed_p));
9548 return true;
9551 if (speed_p)
9552 *cost += 2 * extra_cost->alu.arith;
9553 return false;
9556 /* Vector mode? */
9558 *cost = LIBCALL_COST (2);
9559 return false;
9561 case PLUS:
9562 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9563 && (mode == SFmode || !TARGET_VFP_SINGLE))
9565 if (GET_CODE (XEXP (x, 0)) == MULT)
9567 rtx mul_op0, mul_op1, add_op;
9569 if (speed_p)
9570 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9572 mul_op0 = XEXP (XEXP (x, 0), 0);
9573 mul_op1 = XEXP (XEXP (x, 0), 1);
9574 add_op = XEXP (x, 1);
9576 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9577 + rtx_cost (mul_op1, mode, code, 0, speed_p)
9578 + rtx_cost (add_op, mode, code, 0, speed_p));
9580 return true;
9583 if (speed_p)
9584 *cost += extra_cost->fp[mode != SFmode].addsub;
9585 return false;
9587 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9589 *cost = LIBCALL_COST (2);
9590 return false;
9593 /* Narrow modes can be synthesized in SImode, but the range
9594 of useful sub-operations is limited. Check for shift operations
9595 on one of the operands. Only left shifts can be used in the
9596 narrow modes. */
9597 if (GET_MODE_CLASS (mode) == MODE_INT
9598 && GET_MODE_SIZE (mode) < 4)
9600 rtx shift_op, shift_reg;
9601 shift_reg = NULL;
9603 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
9605 if (CONST_INT_P (XEXP (x, 1)))
9607 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9608 INTVAL (XEXP (x, 1)), NULL_RTX,
9609 NULL_RTX, 1, 0);
9610 *cost = COSTS_N_INSNS (insns);
9611 if (speed_p)
9612 *cost += insns * extra_cost->alu.arith;
9613 /* Slightly penalize a narrow operation as the result may
9614 need widening. */
9615 *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9616 return true;
9619 /* Slightly penalize a narrow operation as the result may
9620 need widening. */
9621 *cost += 1;
9622 if (speed_p)
9623 *cost += extra_cost->alu.arith;
9625 return false;
9628 if (mode == SImode)
9630 rtx shift_op, shift_reg;
9632 if (TARGET_INT_SIMD
9633 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9634 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9636 /* UXTA[BH] or SXTA[BH]. */
9637 if (speed_p)
9638 *cost += extra_cost->alu.extend_arith;
9639 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9640 0, speed_p)
9641 + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
9642 return true;
9645 shift_reg = NULL;
9646 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9647 if (shift_op != NULL)
9649 if (shift_reg)
9651 if (speed_p)
9652 *cost += extra_cost->alu.arith_shift_reg;
9653 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
9655 else if (speed_p)
9656 *cost += extra_cost->alu.arith_shift;
9658 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
9659 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9660 return true;
9662 if (GET_CODE (XEXP (x, 0)) == MULT)
9664 rtx mul_op = XEXP (x, 0);
9666 if (TARGET_DSP_MULTIPLY
9667 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9668 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9669 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9670 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9671 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9672 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9673 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9674 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9675 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9676 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9677 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9678 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9679 == 16))))))
9681 /* SMLA[BT][BT]. */
9682 if (speed_p)
9683 *cost += extra_cost->mult[0].extend_add;
9684 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
9685 SIGN_EXTEND, 0, speed_p)
9686 + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
9687 SIGN_EXTEND, 0, speed_p)
9688 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9689 return true;
9692 if (speed_p)
9693 *cost += extra_cost->mult[0].add;
9694 *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
9695 + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
9696 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9697 return true;
9699 if (CONST_INT_P (XEXP (x, 1)))
9701 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9702 INTVAL (XEXP (x, 1)), NULL_RTX,
9703 NULL_RTX, 1, 0);
9704 *cost = COSTS_N_INSNS (insns);
9705 if (speed_p)
9706 *cost += insns * extra_cost->alu.arith;
9707 *cost += rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9708 return true;
9710 else if (speed_p)
9711 *cost += extra_cost->alu.arith;
9713 return false;
9716 if (mode == DImode)
9718 if (arm_arch3m
9719 && GET_CODE (XEXP (x, 0)) == MULT
9720 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
9721 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
9722 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
9723 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
9725 if (speed_p)
9726 *cost += extra_cost->mult[1].extend_add;
9727 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
9728 ZERO_EXTEND, 0, speed_p)
9729 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
9730 ZERO_EXTEND, 0, speed_p)
9731 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9732 return true;
9735 *cost += COSTS_N_INSNS (1);
9737 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9738 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9740 if (speed_p)
9741 *cost += (extra_cost->alu.arith
9742 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9743 ? extra_cost->alu.arith
9744 : extra_cost->alu.arith_shift));
9746 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9747 0, speed_p)
9748 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9749 return true;
9752 if (speed_p)
9753 *cost += 2 * extra_cost->alu.arith;
9754 return false;
9757 /* Vector mode? */
9758 *cost = LIBCALL_COST (2);
9759 return false;
9760 case IOR:
9761 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
9763 if (speed_p)
9764 *cost += extra_cost->alu.rev;
9766 return true;
9768 /* Fall through. */
9769 case AND: case XOR:
9770 if (mode == SImode)
9772 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9773 rtx op0 = XEXP (x, 0);
9774 rtx shift_op, shift_reg;
9776 if (subcode == NOT
9777 && (code == AND
9778 || (code == IOR && TARGET_THUMB2)))
9779 op0 = XEXP (op0, 0);
9781 shift_reg = NULL;
9782 shift_op = shifter_op_p (op0, &shift_reg);
9783 if (shift_op != NULL)
9785 if (shift_reg)
9787 if (speed_p)
9788 *cost += extra_cost->alu.log_shift_reg;
9789 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
9791 else if (speed_p)
9792 *cost += extra_cost->alu.log_shift;
9794 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
9795 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9796 return true;
9799 if (CONST_INT_P (XEXP (x, 1)))
9801 int insns = arm_gen_constant (code, SImode, NULL_RTX,
9802 INTVAL (XEXP (x, 1)), NULL_RTX,
9803 NULL_RTX, 1, 0);
9805 *cost = COSTS_N_INSNS (insns);
9806 if (speed_p)
9807 *cost += insns * extra_cost->alu.logical;
9808 *cost += rtx_cost (op0, mode, code, 0, speed_p);
9809 return true;
9812 if (speed_p)
9813 *cost += extra_cost->alu.logical;
9814 *cost += (rtx_cost (op0, mode, code, 0, speed_p)
9815 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9816 return true;
9819 if (mode == DImode)
9821 rtx op0 = XEXP (x, 0);
9822 enum rtx_code subcode = GET_CODE (op0);
9824 *cost += COSTS_N_INSNS (1);
9826 if (subcode == NOT
9827 && (code == AND
9828 || (code == IOR && TARGET_THUMB2)))
9829 op0 = XEXP (op0, 0);
9831 if (GET_CODE (op0) == ZERO_EXTEND)
9833 if (speed_p)
9834 *cost += 2 * extra_cost->alu.logical;
9836 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
9837 0, speed_p)
9838 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
9839 return true;
9841 else if (GET_CODE (op0) == SIGN_EXTEND)
9843 if (speed_p)
9844 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
9846 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
9847 0, speed_p)
9848 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
9849 return true;
9852 if (speed_p)
9853 *cost += 2 * extra_cost->alu.logical;
9855 return true;
9857 /* Vector mode? */
9859 *cost = LIBCALL_COST (2);
9860 return false;
9862 case MULT:
9863 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9864 && (mode == SFmode || !TARGET_VFP_SINGLE))
9866 rtx op0 = XEXP (x, 0);
9868 if (GET_CODE (op0) == NEG && !flag_rounding_math)
9869 op0 = XEXP (op0, 0);
9871 if (speed_p)
9872 *cost += extra_cost->fp[mode != SFmode].mult;
9874 *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
9875 + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
9876 return true;
9878 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9880 *cost = LIBCALL_COST (2);
9881 return false;
9884 if (mode == SImode)
9886 if (TARGET_DSP_MULTIPLY
9887 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
9888 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
9889 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
9890 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
9891 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
9892 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
9893 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
9894 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
9895 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
9896 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
9897 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
9898 && (INTVAL (XEXP (XEXP (x, 1), 1))
9899 == 16))))))
9901 /* SMUL[TB][TB]. */
9902 if (speed_p)
9903 *cost += extra_cost->mult[0].extend;
9904 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
9905 SIGN_EXTEND, 0, speed_p);
9906 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
9907 SIGN_EXTEND, 1, speed_p);
9908 return true;
9910 if (speed_p)
9911 *cost += extra_cost->mult[0].simple;
9912 return false;
9915 if (mode == DImode)
9917 if (arm_arch3m
9918 && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9919 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
9920 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
9921 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
9923 if (speed_p)
9924 *cost += extra_cost->mult[1].extend;
9925 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
9926 ZERO_EXTEND, 0, speed_p)
9927 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
9928 ZERO_EXTEND, 0, speed_p));
9929 return true;
9932 *cost = LIBCALL_COST (2);
9933 return false;
9936 /* Vector mode? */
9937 *cost = LIBCALL_COST (2);
9938 return false;
9940 case NEG:
9941 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9942 && (mode == SFmode || !TARGET_VFP_SINGLE))
9944 if (GET_CODE (XEXP (x, 0)) == MULT)
9946 /* VNMUL. */
9947 *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
9948 return true;
9951 if (speed_p)
9952 *cost += extra_cost->fp[mode != SFmode].neg;
9954 return false;
9956 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9958 *cost = LIBCALL_COST (1);
9959 return false;
9962 if (mode == SImode)
9964 if (GET_CODE (XEXP (x, 0)) == ABS)
9966 *cost += COSTS_N_INSNS (1);
9967 /* Assume the non-flag-changing variant. */
9968 if (speed_p)
9969 *cost += (extra_cost->alu.log_shift
9970 + extra_cost->alu.arith_shift);
9971 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
9972 return true;
9975 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
9976 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
9978 *cost += COSTS_N_INSNS (1);
9979 /* No extra cost for MOV imm and MVN imm. */
9980 /* If the comparison op is using the flags, there's no further
9981 cost, otherwise we need to add the cost of the comparison. */
9982 if (!(REG_P (XEXP (XEXP (x, 0), 0))
9983 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
9984 && XEXP (XEXP (x, 0), 1) == const0_rtx))
9986 mode = GET_MODE (XEXP (XEXP (x, 0), 0));
9987 *cost += (COSTS_N_INSNS (1)
9988 + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
9989 0, speed_p)
9990 + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
9991 1, speed_p));
9992 if (speed_p)
9993 *cost += extra_cost->alu.arith;
9995 return true;
9998 if (speed_p)
9999 *cost += extra_cost->alu.arith;
10000 return false;
10003 if (GET_MODE_CLASS (mode) == MODE_INT
10004 && GET_MODE_SIZE (mode) < 4)
10006 /* Slightly disparage, as we might need an extend operation. */
10007 *cost += 1;
10008 if (speed_p)
10009 *cost += extra_cost->alu.arith;
10010 return false;
10013 if (mode == DImode)
10015 *cost += COSTS_N_INSNS (1);
10016 if (speed_p)
10017 *cost += 2 * extra_cost->alu.arith;
10018 return false;
10021 /* Vector mode? */
10022 *cost = LIBCALL_COST (1);
10023 return false;
10025 case NOT:
10026 if (mode == SImode)
10028 rtx shift_op;
10029 rtx shift_reg = NULL;
10031 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10033 if (shift_op)
10035 if (shift_reg != NULL)
10037 if (speed_p)
10038 *cost += extra_cost->alu.log_shift_reg;
10039 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10041 else if (speed_p)
10042 *cost += extra_cost->alu.log_shift;
10043 *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
10044 return true;
10047 if (speed_p)
10048 *cost += extra_cost->alu.logical;
10049 return false;
10051 if (mode == DImode)
10053 *cost += COSTS_N_INSNS (1);
10054 return false;
10057 /* Vector mode? */
10059 *cost += LIBCALL_COST (1);
10060 return false;
10062 case IF_THEN_ELSE:
10064 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10066 *cost += COSTS_N_INSNS (3);
10067 return true;
10069 int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
10070 int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
10072 *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
10073 /* Assume that if one arm of the if_then_else is a register,
10074 that it will be tied with the result and eliminate the
10075 conditional insn. */
10076 if (REG_P (XEXP (x, 1)))
10077 *cost += op2cost;
10078 else if (REG_P (XEXP (x, 2)))
10079 *cost += op1cost;
10080 else
10082 if (speed_p)
10084 if (extra_cost->alu.non_exec_costs_exec)
10085 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10086 else
10087 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10089 else
10090 *cost += op1cost + op2cost;
10093 return true;
10095 case COMPARE:
10096 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10097 *cost = 0;
10098 else
10100 machine_mode op0mode;
10101 /* We'll mostly assume that the cost of a compare is the cost of the
10102 LHS. However, there are some notable exceptions. */
10104 /* Floating point compares are never done as side-effects. */
10105 op0mode = GET_MODE (XEXP (x, 0));
10106 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10107 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10109 if (speed_p)
10110 *cost += extra_cost->fp[op0mode != SFmode].compare;
10112 if (XEXP (x, 1) == CONST0_RTX (op0mode))
10114 *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
10115 return true;
10118 return false;
10120 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10122 *cost = LIBCALL_COST (2);
10123 return false;
10126 /* DImode compares normally take two insns. */
10127 if (op0mode == DImode)
10129 *cost += COSTS_N_INSNS (1);
10130 if (speed_p)
10131 *cost += 2 * extra_cost->alu.arith;
10132 return false;
10135 if (op0mode == SImode)
10137 rtx shift_op;
10138 rtx shift_reg;
10140 if (XEXP (x, 1) == const0_rtx
10141 && !(REG_P (XEXP (x, 0))
10142 || (GET_CODE (XEXP (x, 0)) == SUBREG
10143 && REG_P (SUBREG_REG (XEXP (x, 0))))))
10145 *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10147 /* Multiply operations that set the flags are often
10148 significantly more expensive. */
10149 if (speed_p
10150 && GET_CODE (XEXP (x, 0)) == MULT
10151 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10152 *cost += extra_cost->mult[0].flag_setting;
10154 if (speed_p
10155 && GET_CODE (XEXP (x, 0)) == PLUS
10156 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10157 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10158 0), 1), mode))
10159 *cost += extra_cost->mult[0].flag_setting;
10160 return true;
10163 shift_reg = NULL;
10164 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10165 if (shift_op != NULL)
10167 if (shift_reg != NULL)
10169 *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
10170 1, speed_p);
10171 if (speed_p)
10172 *cost += extra_cost->alu.arith_shift_reg;
10174 else if (speed_p)
10175 *cost += extra_cost->alu.arith_shift;
10176 *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
10177 *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
10178 return true;
10181 if (speed_p)
10182 *cost += extra_cost->alu.arith;
10183 if (CONST_INT_P (XEXP (x, 1))
10184 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10186 *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10187 return true;
10189 return false;
10192 /* Vector mode? */
10194 *cost = LIBCALL_COST (2);
10195 return false;
10197 return true;
10199 case EQ:
10200 case NE:
10201 case LT:
10202 case LE:
10203 case GT:
10204 case GE:
10205 case LTU:
10206 case LEU:
10207 case GEU:
10208 case GTU:
10209 case ORDERED:
10210 case UNORDERED:
10211 case UNEQ:
10212 case UNLE:
10213 case UNLT:
10214 case UNGE:
10215 case UNGT:
10216 case LTGT:
10217 if (outer_code == SET)
10219 /* Is it a store-flag operation? */
10220 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10221 && XEXP (x, 1) == const0_rtx)
10223 /* Thumb also needs an IT insn. */
10224 *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
10225 return true;
10227 if (XEXP (x, 1) == const0_rtx)
10229 switch (code)
10231 case LT:
10232 /* LSR Rd, Rn, #31. */
10233 if (speed_p)
10234 *cost += extra_cost->alu.shift;
10235 break;
10237 case EQ:
10238 /* RSBS T1, Rn, #0
10239 ADC Rd, Rn, T1. */
10241 case NE:
10242 /* SUBS T1, Rn, #1
10243 SBC Rd, Rn, T1. */
10244 *cost += COSTS_N_INSNS (1);
10245 break;
10247 case LE:
10248 /* RSBS T1, Rn, Rn, LSR #31
10249 ADC Rd, Rn, T1. */
10250 *cost += COSTS_N_INSNS (1);
10251 if (speed_p)
10252 *cost += extra_cost->alu.arith_shift;
10253 break;
10255 case GT:
10256 /* RSB Rd, Rn, Rn, ASR #1
10257 LSR Rd, Rd, #31. */
10258 *cost += COSTS_N_INSNS (1);
10259 if (speed_p)
10260 *cost += (extra_cost->alu.arith_shift
10261 + extra_cost->alu.shift);
10262 break;
10264 case GE:
10265 /* ASR Rd, Rn, #31
10266 ADD Rd, Rn, #1. */
10267 *cost += COSTS_N_INSNS (1);
10268 if (speed_p)
10269 *cost += extra_cost->alu.shift;
10270 break;
10272 default:
10273 /* Remaining cases are either meaningless or would take
10274 three insns anyway. */
10275 *cost = COSTS_N_INSNS (3);
10276 break;
10278 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10279 return true;
10281 else
10283 *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10284 if (CONST_INT_P (XEXP (x, 1))
10285 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10287 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10288 return true;
10291 return false;
10294 /* Not directly inside a set. If it involves the condition code
10295 register it must be the condition for a branch, cond_exec or
10296 I_T_E operation. Since the comparison is performed elsewhere
10297 this is just the control part which has no additional
10298 cost. */
10299 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10300 && XEXP (x, 1) == const0_rtx)
10302 *cost = 0;
10303 return true;
10305 return false;
10307 case ABS:
10308 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10309 && (mode == SFmode || !TARGET_VFP_SINGLE))
10311 if (speed_p)
10312 *cost += extra_cost->fp[mode != SFmode].neg;
10314 return false;
10316 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10318 *cost = LIBCALL_COST (1);
10319 return false;
10322 if (mode == SImode)
10324 if (speed_p)
10325 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10326 return false;
10328 /* Vector mode? */
10329 *cost = LIBCALL_COST (1);
10330 return false;
10332 case SIGN_EXTEND:
10333 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10334 && MEM_P (XEXP (x, 0)))
10336 if (mode == DImode)
10337 *cost += COSTS_N_INSNS (1);
10339 if (!speed_p)
10340 return true;
10342 if (GET_MODE (XEXP (x, 0)) == SImode)
10343 *cost += extra_cost->ldst.load;
10344 else
10345 *cost += extra_cost->ldst.load_sign_extend;
10347 if (mode == DImode)
10348 *cost += extra_cost->alu.shift;
10350 return true;
10353 /* Widening from less than 32-bits requires an extend operation. */
10354 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10356 /* We have SXTB/SXTH. */
10357 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10358 if (speed_p)
10359 *cost += extra_cost->alu.extend;
10361 else if (GET_MODE (XEXP (x, 0)) != SImode)
10363 /* Needs two shifts. */
10364 *cost += COSTS_N_INSNS (1);
10365 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10366 if (speed_p)
10367 *cost += 2 * extra_cost->alu.shift;
10370 /* Widening beyond 32-bits requires one more insn. */
10371 if (mode == DImode)
10373 *cost += COSTS_N_INSNS (1);
10374 if (speed_p)
10375 *cost += extra_cost->alu.shift;
10378 return true;
10380 case ZERO_EXTEND:
10381 if ((arm_arch4
10382 || GET_MODE (XEXP (x, 0)) == SImode
10383 || GET_MODE (XEXP (x, 0)) == QImode)
10384 && MEM_P (XEXP (x, 0)))
10386 *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10388 if (mode == DImode)
10389 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10391 return true;
10394 /* Widening from less than 32-bits requires an extend operation. */
10395 if (GET_MODE (XEXP (x, 0)) == QImode)
10397 /* UXTB can be a shorter instruction in Thumb2, but it might
10398 be slower than the AND Rd, Rn, #255 alternative. When
10399 optimizing for speed it should never be slower to use
10400 AND, and we don't really model 16-bit vs 32-bit insns
10401 here. */
10402 if (speed_p)
10403 *cost += extra_cost->alu.logical;
10405 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10407 /* We have UXTB/UXTH. */
10408 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10409 if (speed_p)
10410 *cost += extra_cost->alu.extend;
10412 else if (GET_MODE (XEXP (x, 0)) != SImode)
10414 /* Needs two shifts. It's marginally preferable to use
10415 shifts rather than two BIC instructions as the second
10416 shift may merge with a subsequent insn as a shifter
10417 op. */
10418 *cost = COSTS_N_INSNS (2);
10419 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10420 if (speed_p)
10421 *cost += 2 * extra_cost->alu.shift;
10424 /* Widening beyond 32-bits requires one more insn. */
10425 if (mode == DImode)
10427 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10430 return true;
10432 case CONST_INT:
10433 *cost = 0;
10434 /* CONST_INT has no mode, so we cannot tell for sure how many
10435 insns are really going to be needed. The best we can do is
10436 look at the value passed. If it fits in SImode, then assume
10437 that's the mode it will be used for. Otherwise assume it
10438 will be used in DImode. */
10439 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10440 mode = SImode;
10441 else
10442 mode = DImode;
10444 /* Avoid blowing up in arm_gen_constant (). */
10445 if (!(outer_code == PLUS
10446 || outer_code == AND
10447 || outer_code == IOR
10448 || outer_code == XOR
10449 || outer_code == MINUS))
10450 outer_code = SET;
10452 const_int_cost:
10453 if (mode == SImode)
10455 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10456 INTVAL (x), NULL, NULL,
10457 0, 0));
10458 /* Extra costs? */
10460 else
10462 *cost += COSTS_N_INSNS (arm_gen_constant
10463 (outer_code, SImode, NULL,
10464 trunc_int_for_mode (INTVAL (x), SImode),
10465 NULL, NULL, 0, 0)
10466 + arm_gen_constant (outer_code, SImode, NULL,
10467 INTVAL (x) >> 32, NULL,
10468 NULL, 0, 0));
10469 /* Extra costs? */
10472 return true;
10474 case CONST:
10475 case LABEL_REF:
10476 case SYMBOL_REF:
10477 if (speed_p)
10479 if (arm_arch_thumb2 && !flag_pic)
10480 *cost += COSTS_N_INSNS (1);
10481 else
10482 *cost += extra_cost->ldst.load;
10484 else
10485 *cost += COSTS_N_INSNS (1);
10487 if (flag_pic)
10489 *cost += COSTS_N_INSNS (1);
10490 if (speed_p)
10491 *cost += extra_cost->alu.arith;
10494 return true;
10496 case CONST_FIXED:
10497 *cost = COSTS_N_INSNS (4);
10498 /* Fixme. */
10499 return true;
10501 case CONST_DOUBLE:
10502 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10503 && (mode == SFmode || !TARGET_VFP_SINGLE))
10505 if (vfp3_const_double_rtx (x))
10507 if (speed_p)
10508 *cost += extra_cost->fp[mode == DFmode].fpconst;
10509 return true;
10512 if (speed_p)
10514 if (mode == DFmode)
10515 *cost += extra_cost->ldst.loadd;
10516 else
10517 *cost += extra_cost->ldst.loadf;
10519 else
10520 *cost += COSTS_N_INSNS (1 + (mode == DFmode));
10522 return true;
10524 *cost = COSTS_N_INSNS (4);
10525 return true;
10527 case CONST_VECTOR:
10528 /* Fixme. */
10529 if (TARGET_NEON
10530 && TARGET_HARD_FLOAT
10531 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10532 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10533 *cost = COSTS_N_INSNS (1);
10534 else
10535 *cost = COSTS_N_INSNS (4);
10536 return true;
10538 case HIGH:
10539 case LO_SUM:
10540 /* When optimizing for size, we prefer constant pool entries to
10541 MOVW/MOVT pairs, so bump the cost of these slightly. */
10542 if (!speed_p)
10543 *cost += 1;
10544 return true;
10546 case CLZ:
10547 if (speed_p)
10548 *cost += extra_cost->alu.clz;
10549 return false;
10551 case SMIN:
10552 if (XEXP (x, 1) == const0_rtx)
10554 if (speed_p)
10555 *cost += extra_cost->alu.log_shift;
10556 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10557 return true;
10559 /* Fall through. */
10560 case SMAX:
10561 case UMIN:
10562 case UMAX:
10563 *cost += COSTS_N_INSNS (1);
10564 return false;
10566 case TRUNCATE:
10567 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10568 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10569 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10570 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10571 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10572 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10573 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10574 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10575 == ZERO_EXTEND))))
10577 if (speed_p)
10578 *cost += extra_cost->mult[1].extend;
10579 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
10580 ZERO_EXTEND, 0, speed_p)
10581 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
10582 ZERO_EXTEND, 0, speed_p));
10583 return true;
10585 *cost = LIBCALL_COST (1);
10586 return false;
10588 case UNSPEC_VOLATILE:
10589 case UNSPEC:
10590 return arm_unspec_cost (x, outer_code, speed_p, cost);
10592 case PC:
10593 /* Reading the PC is like reading any other register. Writing it
10594 is more expensive, but we take that into account elsewhere. */
10595 *cost = 0;
10596 return true;
10598 case ZERO_EXTRACT:
10599 /* TODO: Simple zero_extract of bottom bits using AND. */
10600 /* Fall through. */
10601 case SIGN_EXTRACT:
10602 if (arm_arch6
10603 && mode == SImode
10604 && CONST_INT_P (XEXP (x, 1))
10605 && CONST_INT_P (XEXP (x, 2)))
10607 if (speed_p)
10608 *cost += extra_cost->alu.bfx;
10609 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10610 return true;
10612 /* Without UBFX/SBFX, need to resort to shift operations. */
10613 *cost += COSTS_N_INSNS (1);
10614 if (speed_p)
10615 *cost += 2 * extra_cost->alu.shift;
10616 *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
10617 return true;
10619 case FLOAT_EXTEND:
10620 if (TARGET_HARD_FLOAT)
10622 if (speed_p)
10623 *cost += extra_cost->fp[mode == DFmode].widen;
10624 if (!TARGET_FPU_ARMV8
10625 && GET_MODE (XEXP (x, 0)) == HFmode)
10627 /* Pre v8, widening HF->DF is a two-step process, first
10628 widening to SFmode. */
10629 *cost += COSTS_N_INSNS (1);
10630 if (speed_p)
10631 *cost += extra_cost->fp[0].widen;
10633 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10634 return true;
10637 *cost = LIBCALL_COST (1);
10638 return false;
10640 case FLOAT_TRUNCATE:
10641 if (TARGET_HARD_FLOAT)
10643 if (speed_p)
10644 *cost += extra_cost->fp[mode == DFmode].narrow;
10645 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10646 return true;
10647 /* Vector modes? */
10649 *cost = LIBCALL_COST (1);
10650 return false;
10652 case FMA:
10653 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
10655 rtx op0 = XEXP (x, 0);
10656 rtx op1 = XEXP (x, 1);
10657 rtx op2 = XEXP (x, 2);
10660 /* vfms or vfnma. */
10661 if (GET_CODE (op0) == NEG)
10662 op0 = XEXP (op0, 0);
10664 /* vfnms or vfnma. */
10665 if (GET_CODE (op2) == NEG)
10666 op2 = XEXP (op2, 0);
10668 *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
10669 *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
10670 *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
10672 if (speed_p)
10673 *cost += extra_cost->fp[mode ==DFmode].fma;
10675 return true;
10678 *cost = LIBCALL_COST (3);
10679 return false;
10681 case FIX:
10682 case UNSIGNED_FIX:
10683 if (TARGET_HARD_FLOAT)
10685 /* The *combine_vcvtf2i reduces a vmul+vcvt into
10686 a vcvt fixed-point conversion. */
10687 if (code == FIX && mode == SImode
10688 && GET_CODE (XEXP (x, 0)) == FIX
10689 && GET_MODE (XEXP (x, 0)) == SFmode
10690 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10691 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
10692 > 0)
10694 if (speed_p)
10695 *cost += extra_cost->fp[0].toint;
10697 *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10698 code, 0, speed_p);
10699 return true;
10702 if (GET_MODE_CLASS (mode) == MODE_INT)
10704 mode = GET_MODE (XEXP (x, 0));
10705 if (speed_p)
10706 *cost += extra_cost->fp[mode == DFmode].toint;
10707 /* Strip of the 'cost' of rounding towards zero. */
10708 if (GET_CODE (XEXP (x, 0)) == FIX)
10709 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
10710 0, speed_p);
10711 else
10712 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10713 /* ??? Increase the cost to deal with transferring from
10714 FP -> CORE registers? */
10715 return true;
10717 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
10718 && TARGET_FPU_ARMV8)
10720 if (speed_p)
10721 *cost += extra_cost->fp[mode == DFmode].roundint;
10722 return false;
10724 /* Vector costs? */
10726 *cost = LIBCALL_COST (1);
10727 return false;
10729 case FLOAT:
10730 case UNSIGNED_FLOAT:
10731 if (TARGET_HARD_FLOAT)
10733 /* ??? Increase the cost to deal with transferring from CORE
10734 -> FP registers? */
10735 if (speed_p)
10736 *cost += extra_cost->fp[mode == DFmode].fromint;
10737 return false;
10739 *cost = LIBCALL_COST (1);
10740 return false;
10742 case CALL:
10743 return true;
10745 case ASM_OPERANDS:
10747 /* Just a guess. Guess number of instructions in the asm
10748 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
10749 though (see PR60663). */
10750 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
10751 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
10753 *cost = COSTS_N_INSNS (asm_length + num_operands);
10754 return true;
10756 default:
10757 if (mode != VOIDmode)
10758 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
10759 else
10760 *cost = COSTS_N_INSNS (4); /* Who knows? */
10761 return false;
10765 #undef HANDLE_NARROW_SHIFT_ARITH
10767 /* RTX costs entry point. */
10769 static bool
10770 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
10771 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
10773 bool result;
10774 int code = GET_CODE (x);
10775 gcc_assert (current_tune->insn_extra_cost);
10777 result = arm_rtx_costs_internal (x, (enum rtx_code) code,
10778 (enum rtx_code) outer_code,
10779 current_tune->insn_extra_cost,
10780 total, speed);
10782 if (dump_file && (dump_flags & TDF_DETAILS))
10784 print_rtl_single (dump_file, x);
10785 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
10786 *total, result ? "final" : "partial");
10788 return result;
10791 /* All address computations that can be done are free, but rtx cost returns
10792 the same for practically all of them. So we weight the different types
10793 of address here in the order (most pref first):
10794 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
10795 static inline int
10796 arm_arm_address_cost (rtx x)
10798 enum rtx_code c = GET_CODE (x);
10800 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
10801 return 0;
10802 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
10803 return 10;
10805 if (c == PLUS)
10807 if (CONST_INT_P (XEXP (x, 1)))
10808 return 2;
10810 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
10811 return 3;
10813 return 4;
10816 return 6;
10819 static inline int
10820 arm_thumb_address_cost (rtx x)
10822 enum rtx_code c = GET_CODE (x);
10824 if (c == REG)
10825 return 1;
10826 if (c == PLUS
10827 && REG_P (XEXP (x, 0))
10828 && CONST_INT_P (XEXP (x, 1)))
10829 return 1;
10831 return 2;
10834 static int
10835 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
10836 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
10838 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
10841 /* Adjust cost hook for XScale. */
10842 static bool
10843 xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
10844 int * cost)
10846 /* Some true dependencies can have a higher cost depending
10847 on precisely how certain input operands are used. */
10848 if (dep_type == 0
10849 && recog_memoized (insn) >= 0
10850 && recog_memoized (dep) >= 0)
10852 int shift_opnum = get_attr_shift (insn);
10853 enum attr_type attr_type = get_attr_type (dep);
10855 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
10856 operand for INSN. If we have a shifted input operand and the
10857 instruction we depend on is another ALU instruction, then we may
10858 have to account for an additional stall. */
10859 if (shift_opnum != 0
10860 && (attr_type == TYPE_ALU_SHIFT_IMM
10861 || attr_type == TYPE_ALUS_SHIFT_IMM
10862 || attr_type == TYPE_LOGIC_SHIFT_IMM
10863 || attr_type == TYPE_LOGICS_SHIFT_IMM
10864 || attr_type == TYPE_ALU_SHIFT_REG
10865 || attr_type == TYPE_ALUS_SHIFT_REG
10866 || attr_type == TYPE_LOGIC_SHIFT_REG
10867 || attr_type == TYPE_LOGICS_SHIFT_REG
10868 || attr_type == TYPE_MOV_SHIFT
10869 || attr_type == TYPE_MVN_SHIFT
10870 || attr_type == TYPE_MOV_SHIFT_REG
10871 || attr_type == TYPE_MVN_SHIFT_REG))
10873 rtx shifted_operand;
10874 int opno;
10876 /* Get the shifted operand. */
10877 extract_insn (insn);
10878 shifted_operand = recog_data.operand[shift_opnum];
10880 /* Iterate over all the operands in DEP. If we write an operand
10881 that overlaps with SHIFTED_OPERAND, then we have increase the
10882 cost of this dependency. */
10883 extract_insn (dep);
10884 preprocess_constraints (dep);
10885 for (opno = 0; opno < recog_data.n_operands; opno++)
10887 /* We can ignore strict inputs. */
10888 if (recog_data.operand_type[opno] == OP_IN)
10889 continue;
10891 if (reg_overlap_mentioned_p (recog_data.operand[opno],
10892 shifted_operand))
10894 *cost = 2;
10895 return false;
10900 return true;
10903 /* Adjust cost hook for Cortex A9. */
10904 static bool
10905 cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
10906 int * cost)
10908 switch (dep_type)
10910 case REG_DEP_ANTI:
10911 *cost = 0;
10912 return false;
10914 case REG_DEP_TRUE:
10915 case REG_DEP_OUTPUT:
10916 if (recog_memoized (insn) >= 0
10917 && recog_memoized (dep) >= 0)
10919 if (GET_CODE (PATTERN (insn)) == SET)
10921 if (GET_MODE_CLASS
10922 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
10923 || GET_MODE_CLASS
10924 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
10926 enum attr_type attr_type_insn = get_attr_type (insn);
10927 enum attr_type attr_type_dep = get_attr_type (dep);
10929 /* By default all dependencies of the form
10930 s0 = s0 <op> s1
10931 s0 = s0 <op> s2
10932 have an extra latency of 1 cycle because
10933 of the input and output dependency in this
10934 case. However this gets modeled as an true
10935 dependency and hence all these checks. */
10936 if (REG_P (SET_DEST (PATTERN (insn)))
10937 && reg_set_p (SET_DEST (PATTERN (insn)), dep))
10939 /* FMACS is a special case where the dependent
10940 instruction can be issued 3 cycles before
10941 the normal latency in case of an output
10942 dependency. */
10943 if ((attr_type_insn == TYPE_FMACS
10944 || attr_type_insn == TYPE_FMACD)
10945 && (attr_type_dep == TYPE_FMACS
10946 || attr_type_dep == TYPE_FMACD))
10948 if (dep_type == REG_DEP_OUTPUT)
10949 *cost = insn_default_latency (dep) - 3;
10950 else
10951 *cost = insn_default_latency (dep);
10952 return false;
10954 else
10956 if (dep_type == REG_DEP_OUTPUT)
10957 *cost = insn_default_latency (dep) + 1;
10958 else
10959 *cost = insn_default_latency (dep);
10961 return false;
10966 break;
10968 default:
10969 gcc_unreachable ();
10972 return true;
10975 /* Adjust cost hook for FA726TE. */
10976 static bool
10977 fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
10978 int * cost)
10980 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
10981 have penalty of 3. */
10982 if (dep_type == REG_DEP_TRUE
10983 && recog_memoized (insn) >= 0
10984 && recog_memoized (dep) >= 0
10985 && get_attr_conds (dep) == CONDS_SET)
10987 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
10988 if (get_attr_conds (insn) == CONDS_USE
10989 && get_attr_type (insn) != TYPE_BRANCH)
10991 *cost = 3;
10992 return false;
10995 if (GET_CODE (PATTERN (insn)) == COND_EXEC
10996 || get_attr_conds (insn) == CONDS_USE)
10998 *cost = 0;
10999 return false;
11003 return true;
11006 /* Implement TARGET_REGISTER_MOVE_COST.
11008 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11009 it is typically more expensive than a single memory access. We set
11010 the cost to less than two memory accesses so that floating
11011 point to integer conversion does not go through memory. */
11014 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11015 reg_class_t from, reg_class_t to)
11017 if (TARGET_32BIT)
11019 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11020 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11021 return 15;
11022 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11023 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11024 return 4;
11025 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11026 return 20;
11027 else
11028 return 2;
11030 else
11032 if (from == HI_REGS || to == HI_REGS)
11033 return 4;
11034 else
11035 return 2;
11039 /* Implement TARGET_MEMORY_MOVE_COST. */
11042 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11043 bool in ATTRIBUTE_UNUSED)
11045 if (TARGET_32BIT)
11046 return 10;
11047 else
11049 if (GET_MODE_SIZE (mode) < 4)
11050 return 8;
11051 else
11052 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11056 /* Vectorizer cost model implementation. */
11058 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11059 static int
11060 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11061 tree vectype,
11062 int misalign ATTRIBUTE_UNUSED)
11064 unsigned elements;
11066 switch (type_of_cost)
11068 case scalar_stmt:
11069 return current_tune->vec_costs->scalar_stmt_cost;
11071 case scalar_load:
11072 return current_tune->vec_costs->scalar_load_cost;
11074 case scalar_store:
11075 return current_tune->vec_costs->scalar_store_cost;
11077 case vector_stmt:
11078 return current_tune->vec_costs->vec_stmt_cost;
11080 case vector_load:
11081 return current_tune->vec_costs->vec_align_load_cost;
11083 case vector_store:
11084 return current_tune->vec_costs->vec_store_cost;
11086 case vec_to_scalar:
11087 return current_tune->vec_costs->vec_to_scalar_cost;
11089 case scalar_to_vec:
11090 return current_tune->vec_costs->scalar_to_vec_cost;
11092 case unaligned_load:
11093 return current_tune->vec_costs->vec_unalign_load_cost;
11095 case unaligned_store:
11096 return current_tune->vec_costs->vec_unalign_store_cost;
11098 case cond_branch_taken:
11099 return current_tune->vec_costs->cond_taken_branch_cost;
11101 case cond_branch_not_taken:
11102 return current_tune->vec_costs->cond_not_taken_branch_cost;
11104 case vec_perm:
11105 case vec_promote_demote:
11106 return current_tune->vec_costs->vec_stmt_cost;
11108 case vec_construct:
11109 elements = TYPE_VECTOR_SUBPARTS (vectype);
11110 return elements / 2 + 1;
11112 default:
11113 gcc_unreachable ();
11117 /* Implement targetm.vectorize.add_stmt_cost. */
11119 static unsigned
11120 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11121 struct _stmt_vec_info *stmt_info, int misalign,
11122 enum vect_cost_model_location where)
11124 unsigned *cost = (unsigned *) data;
11125 unsigned retval = 0;
11127 if (flag_vect_cost_model)
11129 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11130 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11132 /* Statements in an inner loop relative to the loop being
11133 vectorized are weighted more heavily. The value here is
11134 arbitrary and could potentially be improved with analysis. */
11135 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11136 count *= 50; /* FIXME. */
11138 retval = (unsigned) (count * stmt_cost);
11139 cost[where] += retval;
11142 return retval;
11145 /* Return true if and only if this insn can dual-issue only as older. */
11146 static bool
11147 cortexa7_older_only (rtx_insn *insn)
11149 if (recog_memoized (insn) < 0)
11150 return false;
11152 switch (get_attr_type (insn))
11154 case TYPE_ALU_DSP_REG:
11155 case TYPE_ALU_SREG:
11156 case TYPE_ALUS_SREG:
11157 case TYPE_LOGIC_REG:
11158 case TYPE_LOGICS_REG:
11159 case TYPE_ADC_REG:
11160 case TYPE_ADCS_REG:
11161 case TYPE_ADR:
11162 case TYPE_BFM:
11163 case TYPE_REV:
11164 case TYPE_MVN_REG:
11165 case TYPE_SHIFT_IMM:
11166 case TYPE_SHIFT_REG:
11167 case TYPE_LOAD_BYTE:
11168 case TYPE_LOAD1:
11169 case TYPE_STORE1:
11170 case TYPE_FFARITHS:
11171 case TYPE_FADDS:
11172 case TYPE_FFARITHD:
11173 case TYPE_FADDD:
11174 case TYPE_FMOV:
11175 case TYPE_F_CVT:
11176 case TYPE_FCMPS:
11177 case TYPE_FCMPD:
11178 case TYPE_FCONSTS:
11179 case TYPE_FCONSTD:
11180 case TYPE_FMULS:
11181 case TYPE_FMACS:
11182 case TYPE_FMULD:
11183 case TYPE_FMACD:
11184 case TYPE_FDIVS:
11185 case TYPE_FDIVD:
11186 case TYPE_F_MRC:
11187 case TYPE_F_MRRC:
11188 case TYPE_F_FLAG:
11189 case TYPE_F_LOADS:
11190 case TYPE_F_STORES:
11191 return true;
11192 default:
11193 return false;
11197 /* Return true if and only if this insn can dual-issue as younger. */
11198 static bool
11199 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11201 if (recog_memoized (insn) < 0)
11203 if (verbose > 5)
11204 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11205 return false;
11208 switch (get_attr_type (insn))
11210 case TYPE_ALU_IMM:
11211 case TYPE_ALUS_IMM:
11212 case TYPE_LOGIC_IMM:
11213 case TYPE_LOGICS_IMM:
11214 case TYPE_EXTEND:
11215 case TYPE_MVN_IMM:
11216 case TYPE_MOV_IMM:
11217 case TYPE_MOV_REG:
11218 case TYPE_MOV_SHIFT:
11219 case TYPE_MOV_SHIFT_REG:
11220 case TYPE_BRANCH:
11221 case TYPE_CALL:
11222 return true;
11223 default:
11224 return false;
11229 /* Look for an instruction that can dual issue only as an older
11230 instruction, and move it in front of any instructions that can
11231 dual-issue as younger, while preserving the relative order of all
11232 other instructions in the ready list. This is a hueuristic to help
11233 dual-issue in later cycles, by postponing issue of more flexible
11234 instructions. This heuristic may affect dual issue opportunities
11235 in the current cycle. */
11236 static void
11237 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11238 int *n_readyp, int clock)
11240 int i;
11241 int first_older_only = -1, first_younger = -1;
11243 if (verbose > 5)
11244 fprintf (file,
11245 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11246 clock,
11247 *n_readyp);
11249 /* Traverse the ready list from the head (the instruction to issue
11250 first), and looking for the first instruction that can issue as
11251 younger and the first instruction that can dual-issue only as
11252 older. */
11253 for (i = *n_readyp - 1; i >= 0; i--)
11255 rtx_insn *insn = ready[i];
11256 if (cortexa7_older_only (insn))
11258 first_older_only = i;
11259 if (verbose > 5)
11260 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11261 break;
11263 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11264 first_younger = i;
11267 /* Nothing to reorder because either no younger insn found or insn
11268 that can dual-issue only as older appears before any insn that
11269 can dual-issue as younger. */
11270 if (first_younger == -1)
11272 if (verbose > 5)
11273 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11274 return;
11277 /* Nothing to reorder because no older-only insn in the ready list. */
11278 if (first_older_only == -1)
11280 if (verbose > 5)
11281 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11282 return;
11285 /* Move first_older_only insn before first_younger. */
11286 if (verbose > 5)
11287 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11288 INSN_UID(ready [first_older_only]),
11289 INSN_UID(ready [first_younger]));
11290 rtx_insn *first_older_only_insn = ready [first_older_only];
11291 for (i = first_older_only; i < first_younger; i++)
11293 ready[i] = ready[i+1];
11296 ready[i] = first_older_only_insn;
11297 return;
11300 /* Implement TARGET_SCHED_REORDER. */
11301 static int
11302 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
11303 int clock)
11305 switch (arm_tune)
11307 case TARGET_CPU_cortexa7:
11308 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11309 break;
11310 default:
11311 /* Do nothing for other cores. */
11312 break;
11315 return arm_issue_rate ();
11318 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11319 It corrects the value of COST based on the relationship between
11320 INSN and DEP through the dependence LINK. It returns the new
11321 value. There is a per-core adjust_cost hook to adjust scheduler costs
11322 and the per-core hook can choose to completely override the generic
11323 adjust_cost function. Only put bits of code into arm_adjust_cost that
11324 are common across all cores. */
11325 static int
11326 arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
11327 unsigned int)
11329 rtx i_pat, d_pat;
11331 /* When generating Thumb-1 code, we want to place flag-setting operations
11332 close to a conditional branch which depends on them, so that we can
11333 omit the comparison. */
11334 if (TARGET_THUMB1
11335 && dep_type == 0
11336 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11337 && recog_memoized (dep) >= 0
11338 && get_attr_conds (dep) == CONDS_SET)
11339 return 0;
11341 if (current_tune->sched_adjust_cost != NULL)
11343 if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost))
11344 return cost;
11347 /* XXX Is this strictly true? */
11348 if (dep_type == REG_DEP_ANTI
11349 || dep_type == REG_DEP_OUTPUT)
11350 return 0;
11352 /* Call insns don't incur a stall, even if they follow a load. */
11353 if (dep_type == 0
11354 && CALL_P (insn))
11355 return 1;
11357 if ((i_pat = single_set (insn)) != NULL
11358 && MEM_P (SET_SRC (i_pat))
11359 && (d_pat = single_set (dep)) != NULL
11360 && MEM_P (SET_DEST (d_pat)))
11362 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11363 /* This is a load after a store, there is no conflict if the load reads
11364 from a cached area. Assume that loads from the stack, and from the
11365 constant pool are cached, and that others will miss. This is a
11366 hack. */
11368 if ((GET_CODE (src_mem) == SYMBOL_REF
11369 && CONSTANT_POOL_ADDRESS_P (src_mem))
11370 || reg_mentioned_p (stack_pointer_rtx, src_mem)
11371 || reg_mentioned_p (frame_pointer_rtx, src_mem)
11372 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11373 return 1;
11376 return cost;
11380 arm_max_conditional_execute (void)
11382 return max_insns_skipped;
11385 static int
11386 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11388 if (TARGET_32BIT)
11389 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
11390 else
11391 return (optimize > 0) ? 2 : 0;
11394 static int
11395 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
11397 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11400 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11401 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11402 sequences of non-executed instructions in IT blocks probably take the same
11403 amount of time as executed instructions (and the IT instruction itself takes
11404 space in icache). This function was experimentally determined to give good
11405 results on a popular embedded benchmark. */
11407 static int
11408 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
11410 return (TARGET_32BIT && speed_p) ? 1
11411 : arm_default_branch_cost (speed_p, predictable_p);
11414 static int
11415 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
11417 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11420 static bool fp_consts_inited = false;
11422 static REAL_VALUE_TYPE value_fp0;
11424 static void
11425 init_fp_table (void)
11427 REAL_VALUE_TYPE r;
11429 r = REAL_VALUE_ATOF ("0", DFmode);
11430 value_fp0 = r;
11431 fp_consts_inited = true;
11434 /* Return TRUE if rtx X is a valid immediate FP constant. */
11436 arm_const_double_rtx (rtx x)
11438 const REAL_VALUE_TYPE *r;
11440 if (!fp_consts_inited)
11441 init_fp_table ();
11443 r = CONST_DOUBLE_REAL_VALUE (x);
11444 if (REAL_VALUE_MINUS_ZERO (*r))
11445 return 0;
11447 if (real_equal (r, &value_fp0))
11448 return 1;
11450 return 0;
11453 /* VFPv3 has a fairly wide range of representable immediates, formed from
11454 "quarter-precision" floating-point values. These can be evaluated using this
11455 formula (with ^ for exponentiation):
11457 -1^s * n * 2^-r
11459 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11460 16 <= n <= 31 and 0 <= r <= 7.
11462 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11464 - A (most-significant) is the sign bit.
11465 - BCD are the exponent (encoded as r XOR 3).
11466 - EFGH are the mantissa (encoded as n - 16).
11469 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11470 fconst[sd] instruction, or -1 if X isn't suitable. */
11471 static int
11472 vfp3_const_double_index (rtx x)
11474 REAL_VALUE_TYPE r, m;
11475 int sign, exponent;
11476 unsigned HOST_WIDE_INT mantissa, mant_hi;
11477 unsigned HOST_WIDE_INT mask;
11478 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
11479 bool fail;
11481 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
11482 return -1;
11484 r = *CONST_DOUBLE_REAL_VALUE (x);
11486 /* We can't represent these things, so detect them first. */
11487 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
11488 return -1;
11490 /* Extract sign, exponent and mantissa. */
11491 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
11492 r = real_value_abs (&r);
11493 exponent = REAL_EXP (&r);
11494 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11495 highest (sign) bit, with a fixed binary point at bit point_pos.
11496 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11497 bits for the mantissa, this may fail (low bits would be lost). */
11498 real_ldexp (&m, &r, point_pos - exponent);
11499 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
11500 mantissa = w.elt (0);
11501 mant_hi = w.elt (1);
11503 /* If there are bits set in the low part of the mantissa, we can't
11504 represent this value. */
11505 if (mantissa != 0)
11506 return -1;
11508 /* Now make it so that mantissa contains the most-significant bits, and move
11509 the point_pos to indicate that the least-significant bits have been
11510 discarded. */
11511 point_pos -= HOST_BITS_PER_WIDE_INT;
11512 mantissa = mant_hi;
11514 /* We can permit four significant bits of mantissa only, plus a high bit
11515 which is always 1. */
11516 mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
11517 if ((mantissa & mask) != 0)
11518 return -1;
11520 /* Now we know the mantissa is in range, chop off the unneeded bits. */
11521 mantissa >>= point_pos - 5;
11523 /* The mantissa may be zero. Disallow that case. (It's possible to load the
11524 floating-point immediate zero with Neon using an integer-zero load, but
11525 that case is handled elsewhere.) */
11526 if (mantissa == 0)
11527 return -1;
11529 gcc_assert (mantissa >= 16 && mantissa <= 31);
11531 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11532 normalized significands are in the range [1, 2). (Our mantissa is shifted
11533 left 4 places at this point relative to normalized IEEE754 values). GCC
11534 internally uses [0.5, 1) (see real.c), so the exponent returned from
11535 REAL_EXP must be altered. */
11536 exponent = 5 - exponent;
11538 if (exponent < 0 || exponent > 7)
11539 return -1;
11541 /* Sign, mantissa and exponent are now in the correct form to plug into the
11542 formula described in the comment above. */
11543 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
11546 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
11548 vfp3_const_double_rtx (rtx x)
11550 if (!TARGET_VFP3)
11551 return 0;
11553 return vfp3_const_double_index (x) != -1;
11556 /* Recognize immediates which can be used in various Neon instructions. Legal
11557 immediates are described by the following table (for VMVN variants, the
11558 bitwise inverse of the constant shown is recognized. In either case, VMOV
11559 is output and the correct instruction to use for a given constant is chosen
11560 by the assembler). The constant shown is replicated across all elements of
11561 the destination vector.
11563 insn elems variant constant (binary)
11564 ---- ----- ------- -----------------
11565 vmov i32 0 00000000 00000000 00000000 abcdefgh
11566 vmov i32 1 00000000 00000000 abcdefgh 00000000
11567 vmov i32 2 00000000 abcdefgh 00000000 00000000
11568 vmov i32 3 abcdefgh 00000000 00000000 00000000
11569 vmov i16 4 00000000 abcdefgh
11570 vmov i16 5 abcdefgh 00000000
11571 vmvn i32 6 00000000 00000000 00000000 abcdefgh
11572 vmvn i32 7 00000000 00000000 abcdefgh 00000000
11573 vmvn i32 8 00000000 abcdefgh 00000000 00000000
11574 vmvn i32 9 abcdefgh 00000000 00000000 00000000
11575 vmvn i16 10 00000000 abcdefgh
11576 vmvn i16 11 abcdefgh 00000000
11577 vmov i32 12 00000000 00000000 abcdefgh 11111111
11578 vmvn i32 13 00000000 00000000 abcdefgh 11111111
11579 vmov i32 14 00000000 abcdefgh 11111111 11111111
11580 vmvn i32 15 00000000 abcdefgh 11111111 11111111
11581 vmov i8 16 abcdefgh
11582 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
11583 eeeeeeee ffffffff gggggggg hhhhhhhh
11584 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
11585 vmov f32 19 00000000 00000000 00000000 00000000
11587 For case 18, B = !b. Representable values are exactly those accepted by
11588 vfp3_const_double_index, but are output as floating-point numbers rather
11589 than indices.
11591 For case 19, we will change it to vmov.i32 when assembling.
11593 Variants 0-5 (inclusive) may also be used as immediates for the second
11594 operand of VORR/VBIC instructions.
11596 The INVERSE argument causes the bitwise inverse of the given operand to be
11597 recognized instead (used for recognizing legal immediates for the VAND/VORN
11598 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11599 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11600 output, rather than the real insns vbic/vorr).
11602 INVERSE makes no difference to the recognition of float vectors.
11604 The return value is the variant of immediate as shown in the above table, or
11605 -1 if the given value doesn't match any of the listed patterns.
11607 static int
11608 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
11609 rtx *modconst, int *elementwidth)
11611 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
11612 matches = 1; \
11613 for (i = 0; i < idx; i += (STRIDE)) \
11614 if (!(TEST)) \
11615 matches = 0; \
11616 if (matches) \
11618 immtype = (CLASS); \
11619 elsize = (ELSIZE); \
11620 break; \
11623 unsigned int i, elsize = 0, idx = 0, n_elts;
11624 unsigned int innersize;
11625 unsigned char bytes[16];
11626 int immtype = -1, matches;
11627 unsigned int invmask = inverse ? 0xff : 0;
11628 bool vector = GET_CODE (op) == CONST_VECTOR;
11630 if (vector)
11631 n_elts = CONST_VECTOR_NUNITS (op);
11632 else
11634 n_elts = 1;
11635 if (mode == VOIDmode)
11636 mode = DImode;
11639 innersize = GET_MODE_UNIT_SIZE (mode);
11641 /* Vectors of float constants. */
11642 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
11644 rtx el0 = CONST_VECTOR_ELT (op, 0);
11646 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
11647 return -1;
11649 /* FP16 vectors cannot be represented. */
11650 if (GET_MODE_INNER (mode) == HFmode)
11651 return -1;
11653 /* All elements in the vector must be the same. Note that 0.0 and -0.0
11654 are distinct in this context. */
11655 if (!const_vec_duplicate_p (op))
11656 return -1;
11658 if (modconst)
11659 *modconst = CONST_VECTOR_ELT (op, 0);
11661 if (elementwidth)
11662 *elementwidth = 0;
11664 if (el0 == CONST0_RTX (GET_MODE (el0)))
11665 return 19;
11666 else
11667 return 18;
11670 /* The tricks done in the code below apply for little-endian vector layout.
11671 For big-endian vectors only allow vectors of the form { a, a, a..., a }.
11672 FIXME: Implement logic for big-endian vectors. */
11673 if (BYTES_BIG_ENDIAN && vector && !const_vec_duplicate_p (op))
11674 return -1;
11676 /* Splat vector constant out into a byte vector. */
11677 for (i = 0; i < n_elts; i++)
11679 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
11680 unsigned HOST_WIDE_INT elpart;
11682 gcc_assert (CONST_INT_P (el));
11683 elpart = INTVAL (el);
11685 for (unsigned int byte = 0; byte < innersize; byte++)
11687 bytes[idx++] = (elpart & 0xff) ^ invmask;
11688 elpart >>= BITS_PER_UNIT;
11692 /* Sanity check. */
11693 gcc_assert (idx == GET_MODE_SIZE (mode));
11697 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
11698 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11700 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11701 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11703 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
11704 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11706 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
11707 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
11709 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
11711 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
11713 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
11714 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11716 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11717 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11719 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
11720 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11722 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
11723 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
11725 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
11727 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
11729 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11730 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11732 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11733 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11735 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
11736 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11738 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
11739 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11741 CHECK (1, 8, 16, bytes[i] == bytes[0]);
11743 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
11744 && bytes[i] == bytes[(i + 8) % idx]);
11746 while (0);
11748 if (immtype == -1)
11749 return -1;
11751 if (elementwidth)
11752 *elementwidth = elsize;
11754 if (modconst)
11756 unsigned HOST_WIDE_INT imm = 0;
11758 /* Un-invert bytes of recognized vector, if necessary. */
11759 if (invmask != 0)
11760 for (i = 0; i < idx; i++)
11761 bytes[i] ^= invmask;
11763 if (immtype == 17)
11765 /* FIXME: Broken on 32-bit H_W_I hosts. */
11766 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
11768 for (i = 0; i < 8; i++)
11769 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
11770 << (i * BITS_PER_UNIT);
11772 *modconst = GEN_INT (imm);
11774 else
11776 unsigned HOST_WIDE_INT imm = 0;
11778 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
11779 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
11781 *modconst = GEN_INT (imm);
11785 return immtype;
11786 #undef CHECK
11789 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
11790 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
11791 float elements), and a modified constant (whatever should be output for a
11792 VMOV) in *MODCONST. */
11795 neon_immediate_valid_for_move (rtx op, machine_mode mode,
11796 rtx *modconst, int *elementwidth)
11798 rtx tmpconst;
11799 int tmpwidth;
11800 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
11802 if (retval == -1)
11803 return 0;
11805 if (modconst)
11806 *modconst = tmpconst;
11808 if (elementwidth)
11809 *elementwidth = tmpwidth;
11811 return 1;
11814 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
11815 the immediate is valid, write a constant suitable for using as an operand
11816 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
11817 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
11820 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
11821 rtx *modconst, int *elementwidth)
11823 rtx tmpconst;
11824 int tmpwidth;
11825 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
11827 if (retval < 0 || retval > 5)
11828 return 0;
11830 if (modconst)
11831 *modconst = tmpconst;
11833 if (elementwidth)
11834 *elementwidth = tmpwidth;
11836 return 1;
11839 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
11840 the immediate is valid, write a constant suitable for using as an operand
11841 to VSHR/VSHL to *MODCONST and the corresponding element width to
11842 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
11843 because they have different limitations. */
11846 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
11847 rtx *modconst, int *elementwidth,
11848 bool isleftshift)
11850 unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
11851 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
11852 unsigned HOST_WIDE_INT last_elt = 0;
11853 unsigned HOST_WIDE_INT maxshift;
11855 /* Split vector constant out into a byte vector. */
11856 for (i = 0; i < n_elts; i++)
11858 rtx el = CONST_VECTOR_ELT (op, i);
11859 unsigned HOST_WIDE_INT elpart;
11861 if (CONST_INT_P (el))
11862 elpart = INTVAL (el);
11863 else if (CONST_DOUBLE_P (el))
11864 return 0;
11865 else
11866 gcc_unreachable ();
11868 if (i != 0 && elpart != last_elt)
11869 return 0;
11871 last_elt = elpart;
11874 /* Shift less than element size. */
11875 maxshift = innersize * 8;
11877 if (isleftshift)
11879 /* Left shift immediate value can be from 0 to <size>-1. */
11880 if (last_elt >= maxshift)
11881 return 0;
11883 else
11885 /* Right shift immediate value can be from 1 to <size>. */
11886 if (last_elt == 0 || last_elt > maxshift)
11887 return 0;
11890 if (elementwidth)
11891 *elementwidth = innersize * 8;
11893 if (modconst)
11894 *modconst = CONST_VECTOR_ELT (op, 0);
11896 return 1;
11899 /* Return a string suitable for output of Neon immediate logic operation
11900 MNEM. */
11902 char *
11903 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
11904 int inverse, int quad)
11906 int width, is_valid;
11907 static char templ[40];
11909 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
11911 gcc_assert (is_valid != 0);
11913 if (quad)
11914 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
11915 else
11916 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
11918 return templ;
11921 /* Return a string suitable for output of Neon immediate shift operation
11922 (VSHR or VSHL) MNEM. */
11924 char *
11925 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
11926 machine_mode mode, int quad,
11927 bool isleftshift)
11929 int width, is_valid;
11930 static char templ[40];
11932 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
11933 gcc_assert (is_valid != 0);
11935 if (quad)
11936 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
11937 else
11938 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
11940 return templ;
11943 /* Output a sequence of pairwise operations to implement a reduction.
11944 NOTE: We do "too much work" here, because pairwise operations work on two
11945 registers-worth of operands in one go. Unfortunately we can't exploit those
11946 extra calculations to do the full operation in fewer steps, I don't think.
11947 Although all vector elements of the result but the first are ignored, we
11948 actually calculate the same result in each of the elements. An alternative
11949 such as initially loading a vector with zero to use as each of the second
11950 operands would use up an additional register and take an extra instruction,
11951 for no particular gain. */
11953 void
11954 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
11955 rtx (*reduc) (rtx, rtx, rtx))
11957 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
11958 rtx tmpsum = op1;
11960 for (i = parts / 2; i >= 1; i /= 2)
11962 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
11963 emit_insn (reduc (dest, tmpsum, tmpsum));
11964 tmpsum = dest;
11968 /* If VALS is a vector constant that can be loaded into a register
11969 using VDUP, generate instructions to do so and return an RTX to
11970 assign to the register. Otherwise return NULL_RTX. */
11972 static rtx
11973 neon_vdup_constant (rtx vals)
11975 machine_mode mode = GET_MODE (vals);
11976 machine_mode inner_mode = GET_MODE_INNER (mode);
11977 rtx x;
11979 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
11980 return NULL_RTX;
11982 if (!const_vec_duplicate_p (vals, &x))
11983 /* The elements are not all the same. We could handle repeating
11984 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
11985 {0, C, 0, C, 0, C, 0, C} which can be loaded using
11986 vdup.i16). */
11987 return NULL_RTX;
11989 /* We can load this constant by using VDUP and a constant in a
11990 single ARM register. This will be cheaper than a vector
11991 load. */
11993 x = copy_to_mode_reg (inner_mode, x);
11994 return gen_rtx_VEC_DUPLICATE (mode, x);
11997 /* Generate code to load VALS, which is a PARALLEL containing only
11998 constants (for vec_init) or CONST_VECTOR, efficiently into a
11999 register. Returns an RTX to copy into the register, or NULL_RTX
12000 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12003 neon_make_constant (rtx vals)
12005 machine_mode mode = GET_MODE (vals);
12006 rtx target;
12007 rtx const_vec = NULL_RTX;
12008 int n_elts = GET_MODE_NUNITS (mode);
12009 int n_const = 0;
12010 int i;
12012 if (GET_CODE (vals) == CONST_VECTOR)
12013 const_vec = vals;
12014 else if (GET_CODE (vals) == PARALLEL)
12016 /* A CONST_VECTOR must contain only CONST_INTs and
12017 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12018 Only store valid constants in a CONST_VECTOR. */
12019 for (i = 0; i < n_elts; ++i)
12021 rtx x = XVECEXP (vals, 0, i);
12022 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12023 n_const++;
12025 if (n_const == n_elts)
12026 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12028 else
12029 gcc_unreachable ();
12031 if (const_vec != NULL
12032 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12033 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12034 return const_vec;
12035 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12036 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12037 pipeline cycle; creating the constant takes one or two ARM
12038 pipeline cycles. */
12039 return target;
12040 else if (const_vec != NULL_RTX)
12041 /* Load from constant pool. On Cortex-A8 this takes two cycles
12042 (for either double or quad vectors). We can not take advantage
12043 of single-cycle VLD1 because we need a PC-relative addressing
12044 mode. */
12045 return const_vec;
12046 else
12047 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12048 We can not construct an initializer. */
12049 return NULL_RTX;
12052 /* Initialize vector TARGET to VALS. */
12054 void
12055 neon_expand_vector_init (rtx target, rtx vals)
12057 machine_mode mode = GET_MODE (target);
12058 machine_mode inner_mode = GET_MODE_INNER (mode);
12059 int n_elts = GET_MODE_NUNITS (mode);
12060 int n_var = 0, one_var = -1;
12061 bool all_same = true;
12062 rtx x, mem;
12063 int i;
12065 for (i = 0; i < n_elts; ++i)
12067 x = XVECEXP (vals, 0, i);
12068 if (!CONSTANT_P (x))
12069 ++n_var, one_var = i;
12071 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12072 all_same = false;
12075 if (n_var == 0)
12077 rtx constant = neon_make_constant (vals);
12078 if (constant != NULL_RTX)
12080 emit_move_insn (target, constant);
12081 return;
12085 /* Splat a single non-constant element if we can. */
12086 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12088 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12089 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
12090 return;
12093 /* One field is non-constant. Load constant then overwrite varying
12094 field. This is more efficient than using the stack. */
12095 if (n_var == 1)
12097 rtx copy = copy_rtx (vals);
12098 rtx index = GEN_INT (one_var);
12100 /* Load constant part of vector, substitute neighboring value for
12101 varying element. */
12102 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12103 neon_expand_vector_init (target, copy);
12105 /* Insert variable. */
12106 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12107 switch (mode)
12109 case V8QImode:
12110 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12111 break;
12112 case V16QImode:
12113 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12114 break;
12115 case V4HImode:
12116 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12117 break;
12118 case V8HImode:
12119 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12120 break;
12121 case V2SImode:
12122 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12123 break;
12124 case V4SImode:
12125 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12126 break;
12127 case V2SFmode:
12128 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12129 break;
12130 case V4SFmode:
12131 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12132 break;
12133 case V2DImode:
12134 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12135 break;
12136 default:
12137 gcc_unreachable ();
12139 return;
12142 /* Construct the vector in memory one field at a time
12143 and load the whole vector. */
12144 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12145 for (i = 0; i < n_elts; i++)
12146 emit_move_insn (adjust_address_nv (mem, inner_mode,
12147 i * GET_MODE_SIZE (inner_mode)),
12148 XVECEXP (vals, 0, i));
12149 emit_move_insn (target, mem);
12152 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12153 ERR if it doesn't. EXP indicates the source location, which includes the
12154 inlining history for intrinsics. */
12156 static void
12157 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12158 const_tree exp, const char *desc)
12160 HOST_WIDE_INT lane;
12162 gcc_assert (CONST_INT_P (operand));
12164 lane = INTVAL (operand);
12166 if (lane < low || lane >= high)
12168 if (exp)
12169 error ("%K%s %wd out of range %wd - %wd",
12170 exp, desc, lane, low, high - 1);
12171 else
12172 error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
12176 /* Bounds-check lanes. */
12178 void
12179 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12180 const_tree exp)
12182 bounds_check (operand, low, high, exp, "lane");
12185 /* Bounds-check constants. */
12187 void
12188 arm_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12190 bounds_check (operand, low, high, NULL_TREE, "constant");
12193 HOST_WIDE_INT
12194 neon_element_bits (machine_mode mode)
12196 return GET_MODE_UNIT_BITSIZE (mode);
12200 /* Predicates for `match_operand' and `match_operator'. */
12202 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12203 WB is true if full writeback address modes are allowed and is false
12204 if limited writeback address modes (POST_INC and PRE_DEC) are
12205 allowed. */
12208 arm_coproc_mem_operand (rtx op, bool wb)
12210 rtx ind;
12212 /* Reject eliminable registers. */
12213 if (! (reload_in_progress || reload_completed || lra_in_progress)
12214 && ( reg_mentioned_p (frame_pointer_rtx, op)
12215 || reg_mentioned_p (arg_pointer_rtx, op)
12216 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12217 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12218 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12219 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12220 return FALSE;
12222 /* Constants are converted into offsets from labels. */
12223 if (!MEM_P (op))
12224 return FALSE;
12226 ind = XEXP (op, 0);
12228 if (reload_completed
12229 && (GET_CODE (ind) == LABEL_REF
12230 || (GET_CODE (ind) == CONST
12231 && GET_CODE (XEXP (ind, 0)) == PLUS
12232 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12233 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12234 return TRUE;
12236 /* Match: (mem (reg)). */
12237 if (REG_P (ind))
12238 return arm_address_register_rtx_p (ind, 0);
12240 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12241 acceptable in any case (subject to verification by
12242 arm_address_register_rtx_p). We need WB to be true to accept
12243 PRE_INC and POST_DEC. */
12244 if (GET_CODE (ind) == POST_INC
12245 || GET_CODE (ind) == PRE_DEC
12246 || (wb
12247 && (GET_CODE (ind) == PRE_INC
12248 || GET_CODE (ind) == POST_DEC)))
12249 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12251 if (wb
12252 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12253 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12254 && GET_CODE (XEXP (ind, 1)) == PLUS
12255 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12256 ind = XEXP (ind, 1);
12258 /* Match:
12259 (plus (reg)
12260 (const)). */
12261 if (GET_CODE (ind) == PLUS
12262 && REG_P (XEXP (ind, 0))
12263 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12264 && CONST_INT_P (XEXP (ind, 1))
12265 && INTVAL (XEXP (ind, 1)) > -1024
12266 && INTVAL (XEXP (ind, 1)) < 1024
12267 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12268 return TRUE;
12270 return FALSE;
12273 /* Return TRUE if OP is a memory operand which we can load or store a vector
12274 to/from. TYPE is one of the following values:
12275 0 - Vector load/stor (vldr)
12276 1 - Core registers (ldm)
12277 2 - Element/structure loads (vld1)
12280 neon_vector_mem_operand (rtx op, int type, bool strict)
12282 rtx ind;
12284 /* Reject eliminable registers. */
12285 if (strict && ! (reload_in_progress || reload_completed)
12286 && (reg_mentioned_p (frame_pointer_rtx, op)
12287 || reg_mentioned_p (arg_pointer_rtx, op)
12288 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12289 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12290 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12291 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12292 return FALSE;
12294 /* Constants are converted into offsets from labels. */
12295 if (!MEM_P (op))
12296 return FALSE;
12298 ind = XEXP (op, 0);
12300 if (reload_completed
12301 && (GET_CODE (ind) == LABEL_REF
12302 || (GET_CODE (ind) == CONST
12303 && GET_CODE (XEXP (ind, 0)) == PLUS
12304 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12305 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12306 return TRUE;
12308 /* Match: (mem (reg)). */
12309 if (REG_P (ind))
12310 return arm_address_register_rtx_p (ind, 0);
12312 /* Allow post-increment with Neon registers. */
12313 if ((type != 1 && GET_CODE (ind) == POST_INC)
12314 || (type == 0 && GET_CODE (ind) == PRE_DEC))
12315 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12317 /* Allow post-increment by register for VLDn */
12318 if (type == 2 && GET_CODE (ind) == POST_MODIFY
12319 && GET_CODE (XEXP (ind, 1)) == PLUS
12320 && REG_P (XEXP (XEXP (ind, 1), 1)))
12321 return true;
12323 /* Match:
12324 (plus (reg)
12325 (const)). */
12326 if (type == 0
12327 && GET_CODE (ind) == PLUS
12328 && REG_P (XEXP (ind, 0))
12329 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12330 && CONST_INT_P (XEXP (ind, 1))
12331 && INTVAL (XEXP (ind, 1)) > -1024
12332 /* For quad modes, we restrict the constant offset to be slightly less
12333 than what the instruction format permits. We have no such constraint
12334 on double mode offsets. (This must match arm_legitimate_index_p.) */
12335 && (INTVAL (XEXP (ind, 1))
12336 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12337 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12338 return TRUE;
12340 return FALSE;
12343 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12344 type. */
12346 neon_struct_mem_operand (rtx op)
12348 rtx ind;
12350 /* Reject eliminable registers. */
12351 if (! (reload_in_progress || reload_completed)
12352 && ( reg_mentioned_p (frame_pointer_rtx, op)
12353 || reg_mentioned_p (arg_pointer_rtx, op)
12354 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12355 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12356 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12357 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12358 return FALSE;
12360 /* Constants are converted into offsets from labels. */
12361 if (!MEM_P (op))
12362 return FALSE;
12364 ind = XEXP (op, 0);
12366 if (reload_completed
12367 && (GET_CODE (ind) == LABEL_REF
12368 || (GET_CODE (ind) == CONST
12369 && GET_CODE (XEXP (ind, 0)) == PLUS
12370 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12371 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12372 return TRUE;
12374 /* Match: (mem (reg)). */
12375 if (REG_P (ind))
12376 return arm_address_register_rtx_p (ind, 0);
12378 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12379 if (GET_CODE (ind) == POST_INC
12380 || GET_CODE (ind) == PRE_DEC)
12381 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12383 return FALSE;
12386 /* Return true if X is a register that will be eliminated later on. */
12388 arm_eliminable_register (rtx x)
12390 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
12391 || REGNO (x) == ARG_POINTER_REGNUM
12392 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
12393 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
12396 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12397 coprocessor registers. Otherwise return NO_REGS. */
12399 enum reg_class
12400 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
12402 if (mode == HFmode)
12404 if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
12405 return GENERAL_REGS;
12406 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
12407 return NO_REGS;
12408 return GENERAL_REGS;
12411 /* The neon move patterns handle all legitimate vector and struct
12412 addresses. */
12413 if (TARGET_NEON
12414 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
12415 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
12416 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
12417 || VALID_NEON_STRUCT_MODE (mode)))
12418 return NO_REGS;
12420 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
12421 return NO_REGS;
12423 return GENERAL_REGS;
12426 /* Values which must be returned in the most-significant end of the return
12427 register. */
12429 static bool
12430 arm_return_in_msb (const_tree valtype)
12432 return (TARGET_AAPCS_BASED
12433 && BYTES_BIG_ENDIAN
12434 && (AGGREGATE_TYPE_P (valtype)
12435 || TREE_CODE (valtype) == COMPLEX_TYPE
12436 || FIXED_POINT_TYPE_P (valtype)));
12439 /* Return TRUE if X references a SYMBOL_REF. */
12441 symbol_mentioned_p (rtx x)
12443 const char * fmt;
12444 int i;
12446 if (GET_CODE (x) == SYMBOL_REF)
12447 return 1;
12449 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12450 are constant offsets, not symbols. */
12451 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12452 return 0;
12454 fmt = GET_RTX_FORMAT (GET_CODE (x));
12456 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12458 if (fmt[i] == 'E')
12460 int j;
12462 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12463 if (symbol_mentioned_p (XVECEXP (x, i, j)))
12464 return 1;
12466 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
12467 return 1;
12470 return 0;
12473 /* Return TRUE if X references a LABEL_REF. */
12475 label_mentioned_p (rtx x)
12477 const char * fmt;
12478 int i;
12480 if (GET_CODE (x) == LABEL_REF)
12481 return 1;
12483 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12484 instruction, but they are constant offsets, not symbols. */
12485 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12486 return 0;
12488 fmt = GET_RTX_FORMAT (GET_CODE (x));
12489 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12491 if (fmt[i] == 'E')
12493 int j;
12495 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12496 if (label_mentioned_p (XVECEXP (x, i, j)))
12497 return 1;
12499 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
12500 return 1;
12503 return 0;
12507 tls_mentioned_p (rtx x)
12509 switch (GET_CODE (x))
12511 case CONST:
12512 return tls_mentioned_p (XEXP (x, 0));
12514 case UNSPEC:
12515 if (XINT (x, 1) == UNSPEC_TLS)
12516 return 1;
12518 /* Fall through. */
12519 default:
12520 return 0;
12524 /* Must not copy any rtx that uses a pc-relative address.
12525 Also, disallow copying of load-exclusive instructions that
12526 may appear after splitting of compare-and-swap-style operations
12527 so as to prevent those loops from being transformed away from their
12528 canonical forms (see PR 69904). */
12530 static bool
12531 arm_cannot_copy_insn_p (rtx_insn *insn)
12533 /* The tls call insn cannot be copied, as it is paired with a data
12534 word. */
12535 if (recog_memoized (insn) == CODE_FOR_tlscall)
12536 return true;
12538 subrtx_iterator::array_type array;
12539 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
12541 const_rtx x = *iter;
12542 if (GET_CODE (x) == UNSPEC
12543 && (XINT (x, 1) == UNSPEC_PIC_BASE
12544 || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
12545 return true;
12548 rtx set = single_set (insn);
12549 if (set)
12551 rtx src = SET_SRC (set);
12552 if (GET_CODE (src) == ZERO_EXTEND)
12553 src = XEXP (src, 0);
12555 /* Catch the load-exclusive and load-acquire operations. */
12556 if (GET_CODE (src) == UNSPEC_VOLATILE
12557 && (XINT (src, 1) == VUNSPEC_LL
12558 || XINT (src, 1) == VUNSPEC_LAX))
12559 return true;
12561 return false;
12564 enum rtx_code
12565 minmax_code (rtx x)
12567 enum rtx_code code = GET_CODE (x);
12569 switch (code)
12571 case SMAX:
12572 return GE;
12573 case SMIN:
12574 return LE;
12575 case UMIN:
12576 return LEU;
12577 case UMAX:
12578 return GEU;
12579 default:
12580 gcc_unreachable ();
12584 /* Match pair of min/max operators that can be implemented via usat/ssat. */
12586 bool
12587 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
12588 int *mask, bool *signed_sat)
12590 /* The high bound must be a power of two minus one. */
12591 int log = exact_log2 (INTVAL (hi_bound) + 1);
12592 if (log == -1)
12593 return false;
12595 /* The low bound is either zero (for usat) or one less than the
12596 negation of the high bound (for ssat). */
12597 if (INTVAL (lo_bound) == 0)
12599 if (mask)
12600 *mask = log;
12601 if (signed_sat)
12602 *signed_sat = false;
12604 return true;
12607 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
12609 if (mask)
12610 *mask = log + 1;
12611 if (signed_sat)
12612 *signed_sat = true;
12614 return true;
12617 return false;
12620 /* Return 1 if memory locations are adjacent. */
12622 adjacent_mem_locations (rtx a, rtx b)
12624 /* We don't guarantee to preserve the order of these memory refs. */
12625 if (volatile_refs_p (a) || volatile_refs_p (b))
12626 return 0;
12628 if ((REG_P (XEXP (a, 0))
12629 || (GET_CODE (XEXP (a, 0)) == PLUS
12630 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
12631 && (REG_P (XEXP (b, 0))
12632 || (GET_CODE (XEXP (b, 0)) == PLUS
12633 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
12635 HOST_WIDE_INT val0 = 0, val1 = 0;
12636 rtx reg0, reg1;
12637 int val_diff;
12639 if (GET_CODE (XEXP (a, 0)) == PLUS)
12641 reg0 = XEXP (XEXP (a, 0), 0);
12642 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
12644 else
12645 reg0 = XEXP (a, 0);
12647 if (GET_CODE (XEXP (b, 0)) == PLUS)
12649 reg1 = XEXP (XEXP (b, 0), 0);
12650 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
12652 else
12653 reg1 = XEXP (b, 0);
12655 /* Don't accept any offset that will require multiple
12656 instructions to handle, since this would cause the
12657 arith_adjacentmem pattern to output an overlong sequence. */
12658 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
12659 return 0;
12661 /* Don't allow an eliminable register: register elimination can make
12662 the offset too large. */
12663 if (arm_eliminable_register (reg0))
12664 return 0;
12666 val_diff = val1 - val0;
12668 if (arm_ld_sched)
12670 /* If the target has load delay slots, then there's no benefit
12671 to using an ldm instruction unless the offset is zero and
12672 we are optimizing for size. */
12673 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
12674 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
12675 && (val_diff == 4 || val_diff == -4));
12678 return ((REGNO (reg0) == REGNO (reg1))
12679 && (val_diff == 4 || val_diff == -4));
12682 return 0;
12685 /* Return true if OP is a valid load or store multiple operation. LOAD is true
12686 for load operations, false for store operations. CONSECUTIVE is true
12687 if the register numbers in the operation must be consecutive in the register
12688 bank. RETURN_PC is true if value is to be loaded in PC.
12689 The pattern we are trying to match for load is:
12690 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
12691 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
12694 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
12696 where
12697 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
12698 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
12699 3. If consecutive is TRUE, then for kth register being loaded,
12700 REGNO (R_dk) = REGNO (R_d0) + k.
12701 The pattern for store is similar. */
12702 bool
12703 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
12704 bool consecutive, bool return_pc)
12706 HOST_WIDE_INT count = XVECLEN (op, 0);
12707 rtx reg, mem, addr;
12708 unsigned regno;
12709 unsigned first_regno;
12710 HOST_WIDE_INT i = 1, base = 0, offset = 0;
12711 rtx elt;
12712 bool addr_reg_in_reglist = false;
12713 bool update = false;
12714 int reg_increment;
12715 int offset_adj;
12716 int regs_per_val;
12718 /* If not in SImode, then registers must be consecutive
12719 (e.g., VLDM instructions for DFmode). */
12720 gcc_assert ((mode == SImode) || consecutive);
12721 /* Setting return_pc for stores is illegal. */
12722 gcc_assert (!return_pc || load);
12724 /* Set up the increments and the regs per val based on the mode. */
12725 reg_increment = GET_MODE_SIZE (mode);
12726 regs_per_val = reg_increment / 4;
12727 offset_adj = return_pc ? 1 : 0;
12729 if (count <= 1
12730 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
12731 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
12732 return false;
12734 /* Check if this is a write-back. */
12735 elt = XVECEXP (op, 0, offset_adj);
12736 if (GET_CODE (SET_SRC (elt)) == PLUS)
12738 i++;
12739 base = 1;
12740 update = true;
12742 /* The offset adjustment must be the number of registers being
12743 popped times the size of a single register. */
12744 if (!REG_P (SET_DEST (elt))
12745 || !REG_P (XEXP (SET_SRC (elt), 0))
12746 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
12747 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
12748 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
12749 ((count - 1 - offset_adj) * reg_increment))
12750 return false;
12753 i = i + offset_adj;
12754 base = base + offset_adj;
12755 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
12756 success depends on the type: VLDM can do just one reg,
12757 LDM must do at least two. */
12758 if ((count <= i) && (mode == SImode))
12759 return false;
12761 elt = XVECEXP (op, 0, i - 1);
12762 if (GET_CODE (elt) != SET)
12763 return false;
12765 if (load)
12767 reg = SET_DEST (elt);
12768 mem = SET_SRC (elt);
12770 else
12772 reg = SET_SRC (elt);
12773 mem = SET_DEST (elt);
12776 if (!REG_P (reg) || !MEM_P (mem))
12777 return false;
12779 regno = REGNO (reg);
12780 first_regno = regno;
12781 addr = XEXP (mem, 0);
12782 if (GET_CODE (addr) == PLUS)
12784 if (!CONST_INT_P (XEXP (addr, 1)))
12785 return false;
12787 offset = INTVAL (XEXP (addr, 1));
12788 addr = XEXP (addr, 0);
12791 if (!REG_P (addr))
12792 return false;
12794 /* Don't allow SP to be loaded unless it is also the base register. It
12795 guarantees that SP is reset correctly when an LDM instruction
12796 is interrupted. Otherwise, we might end up with a corrupt stack. */
12797 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
12798 return false;
12800 for (; i < count; i++)
12802 elt = XVECEXP (op, 0, i);
12803 if (GET_CODE (elt) != SET)
12804 return false;
12806 if (load)
12808 reg = SET_DEST (elt);
12809 mem = SET_SRC (elt);
12811 else
12813 reg = SET_SRC (elt);
12814 mem = SET_DEST (elt);
12817 if (!REG_P (reg)
12818 || GET_MODE (reg) != mode
12819 || REGNO (reg) <= regno
12820 || (consecutive
12821 && (REGNO (reg) !=
12822 (unsigned int) (first_regno + regs_per_val * (i - base))))
12823 /* Don't allow SP to be loaded unless it is also the base register. It
12824 guarantees that SP is reset correctly when an LDM instruction
12825 is interrupted. Otherwise, we might end up with a corrupt stack. */
12826 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
12827 || !MEM_P (mem)
12828 || GET_MODE (mem) != mode
12829 || ((GET_CODE (XEXP (mem, 0)) != PLUS
12830 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
12831 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
12832 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
12833 offset + (i - base) * reg_increment))
12834 && (!REG_P (XEXP (mem, 0))
12835 || offset + (i - base) * reg_increment != 0)))
12836 return false;
12838 regno = REGNO (reg);
12839 if (regno == REGNO (addr))
12840 addr_reg_in_reglist = true;
12843 if (load)
12845 if (update && addr_reg_in_reglist)
12846 return false;
12848 /* For Thumb-1, address register is always modified - either by write-back
12849 or by explicit load. If the pattern does not describe an update,
12850 then the address register must be in the list of loaded registers. */
12851 if (TARGET_THUMB1)
12852 return update || addr_reg_in_reglist;
12855 return true;
12858 /* Return true iff it would be profitable to turn a sequence of NOPS loads
12859 or stores (depending on IS_STORE) into a load-multiple or store-multiple
12860 instruction. ADD_OFFSET is nonzero if the base address register needs
12861 to be modified with an add instruction before we can use it. */
12863 static bool
12864 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
12865 int nops, HOST_WIDE_INT add_offset)
12867 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
12868 if the offset isn't small enough. The reason 2 ldrs are faster
12869 is because these ARMs are able to do more than one cache access
12870 in a single cycle. The ARM9 and StrongARM have Harvard caches,
12871 whilst the ARM8 has a double bandwidth cache. This means that
12872 these cores can do both an instruction fetch and a data fetch in
12873 a single cycle, so the trick of calculating the address into a
12874 scratch register (one of the result regs) and then doing a load
12875 multiple actually becomes slower (and no smaller in code size).
12876 That is the transformation
12878 ldr rd1, [rbase + offset]
12879 ldr rd2, [rbase + offset + 4]
12883 add rd1, rbase, offset
12884 ldmia rd1, {rd1, rd2}
12886 produces worse code -- '3 cycles + any stalls on rd2' instead of
12887 '2 cycles + any stalls on rd2'. On ARMs with only one cache
12888 access per cycle, the first sequence could never complete in less
12889 than 6 cycles, whereas the ldm sequence would only take 5 and
12890 would make better use of sequential accesses if not hitting the
12891 cache.
12893 We cheat here and test 'arm_ld_sched' which we currently know to
12894 only be true for the ARM8, ARM9 and StrongARM. If this ever
12895 changes, then the test below needs to be reworked. */
12896 if (nops == 2 && arm_ld_sched && add_offset != 0)
12897 return false;
12899 /* XScale has load-store double instructions, but they have stricter
12900 alignment requirements than load-store multiple, so we cannot
12901 use them.
12903 For XScale ldm requires 2 + NREGS cycles to complete and blocks
12904 the pipeline until completion.
12906 NREGS CYCLES
12912 An ldr instruction takes 1-3 cycles, but does not block the
12913 pipeline.
12915 NREGS CYCLES
12916 1 1-3
12917 2 2-6
12918 3 3-9
12919 4 4-12
12921 Best case ldr will always win. However, the more ldr instructions
12922 we issue, the less likely we are to be able to schedule them well.
12923 Using ldr instructions also increases code size.
12925 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
12926 for counts of 3 or 4 regs. */
12927 if (nops <= 2 && arm_tune_xscale && !optimize_size)
12928 return false;
12929 return true;
12932 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
12933 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
12934 an array ORDER which describes the sequence to use when accessing the
12935 offsets that produces an ascending order. In this sequence, each
12936 offset must be larger by exactly 4 than the previous one. ORDER[0]
12937 must have been filled in with the lowest offset by the caller.
12938 If UNSORTED_REGS is nonnull, it is an array of register numbers that
12939 we use to verify that ORDER produces an ascending order of registers.
12940 Return true if it was possible to construct such an order, false if
12941 not. */
12943 static bool
12944 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
12945 int *unsorted_regs)
12947 int i;
12948 for (i = 1; i < nops; i++)
12950 int j;
12952 order[i] = order[i - 1];
12953 for (j = 0; j < nops; j++)
12954 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
12956 /* We must find exactly one offset that is higher than the
12957 previous one by 4. */
12958 if (order[i] != order[i - 1])
12959 return false;
12960 order[i] = j;
12962 if (order[i] == order[i - 1])
12963 return false;
12964 /* The register numbers must be ascending. */
12965 if (unsorted_regs != NULL
12966 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
12967 return false;
12969 return true;
12972 /* Used to determine in a peephole whether a sequence of load
12973 instructions can be changed into a load-multiple instruction.
12974 NOPS is the number of separate load instructions we are examining. The
12975 first NOPS entries in OPERANDS are the destination registers, the
12976 next NOPS entries are memory operands. If this function is
12977 successful, *BASE is set to the common base register of the memory
12978 accesses; *LOAD_OFFSET is set to the first memory location's offset
12979 from that base register.
12980 REGS is an array filled in with the destination register numbers.
12981 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
12982 insn numbers to an ascending order of stores. If CHECK_REGS is true,
12983 the sequence of registers in REGS matches the loads from ascending memory
12984 locations, and the function verifies that the register numbers are
12985 themselves ascending. If CHECK_REGS is false, the register numbers
12986 are stored in the order they are found in the operands. */
12987 static int
12988 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
12989 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
12991 int unsorted_regs[MAX_LDM_STM_OPS];
12992 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
12993 int order[MAX_LDM_STM_OPS];
12994 rtx base_reg_rtx = NULL;
12995 int base_reg = -1;
12996 int i, ldm_case;
12998 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
12999 easily extended if required. */
13000 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13002 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13004 /* Loop over the operands and check that the memory references are
13005 suitable (i.e. immediate offsets from the same base register). At
13006 the same time, extract the target register, and the memory
13007 offsets. */
13008 for (i = 0; i < nops; i++)
13010 rtx reg;
13011 rtx offset;
13013 /* Convert a subreg of a mem into the mem itself. */
13014 if (GET_CODE (operands[nops + i]) == SUBREG)
13015 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13017 gcc_assert (MEM_P (operands[nops + i]));
13019 /* Don't reorder volatile memory references; it doesn't seem worth
13020 looking for the case where the order is ok anyway. */
13021 if (MEM_VOLATILE_P (operands[nops + i]))
13022 return 0;
13024 offset = const0_rtx;
13026 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13027 || (GET_CODE (reg) == SUBREG
13028 && REG_P (reg = SUBREG_REG (reg))))
13029 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13030 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13031 || (GET_CODE (reg) == SUBREG
13032 && REG_P (reg = SUBREG_REG (reg))))
13033 && (CONST_INT_P (offset
13034 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13036 if (i == 0)
13038 base_reg = REGNO (reg);
13039 base_reg_rtx = reg;
13040 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13041 return 0;
13043 else if (base_reg != (int) REGNO (reg))
13044 /* Not addressed from the same base register. */
13045 return 0;
13047 unsorted_regs[i] = (REG_P (operands[i])
13048 ? REGNO (operands[i])
13049 : REGNO (SUBREG_REG (operands[i])));
13051 /* If it isn't an integer register, or if it overwrites the
13052 base register but isn't the last insn in the list, then
13053 we can't do this. */
13054 if (unsorted_regs[i] < 0
13055 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13056 || unsorted_regs[i] > 14
13057 || (i != nops - 1 && unsorted_regs[i] == base_reg))
13058 return 0;
13060 /* Don't allow SP to be loaded unless it is also the base
13061 register. It guarantees that SP is reset correctly when
13062 an LDM instruction is interrupted. Otherwise, we might
13063 end up with a corrupt stack. */
13064 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13065 return 0;
13067 unsorted_offsets[i] = INTVAL (offset);
13068 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13069 order[0] = i;
13071 else
13072 /* Not a suitable memory address. */
13073 return 0;
13076 /* All the useful information has now been extracted from the
13077 operands into unsorted_regs and unsorted_offsets; additionally,
13078 order[0] has been set to the lowest offset in the list. Sort
13079 the offsets into order, verifying that they are adjacent, and
13080 check that the register numbers are ascending. */
13081 if (!compute_offset_order (nops, unsorted_offsets, order,
13082 check_regs ? unsorted_regs : NULL))
13083 return 0;
13085 if (saved_order)
13086 memcpy (saved_order, order, sizeof order);
13088 if (base)
13090 *base = base_reg;
13092 for (i = 0; i < nops; i++)
13093 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13095 *load_offset = unsorted_offsets[order[0]];
13098 if (TARGET_THUMB1
13099 && !peep2_reg_dead_p (nops, base_reg_rtx))
13100 return 0;
13102 if (unsorted_offsets[order[0]] == 0)
13103 ldm_case = 1; /* ldmia */
13104 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13105 ldm_case = 2; /* ldmib */
13106 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13107 ldm_case = 3; /* ldmda */
13108 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13109 ldm_case = 4; /* ldmdb */
13110 else if (const_ok_for_arm (unsorted_offsets[order[0]])
13111 || const_ok_for_arm (-unsorted_offsets[order[0]]))
13112 ldm_case = 5;
13113 else
13114 return 0;
13116 if (!multiple_operation_profitable_p (false, nops,
13117 ldm_case == 5
13118 ? unsorted_offsets[order[0]] : 0))
13119 return 0;
13121 return ldm_case;
13124 /* Used to determine in a peephole whether a sequence of store instructions can
13125 be changed into a store-multiple instruction.
13126 NOPS is the number of separate store instructions we are examining.
13127 NOPS_TOTAL is the total number of instructions recognized by the peephole
13128 pattern.
13129 The first NOPS entries in OPERANDS are the source registers, the next
13130 NOPS entries are memory operands. If this function is successful, *BASE is
13131 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13132 to the first memory location's offset from that base register. REGS is an
13133 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13134 likewise filled with the corresponding rtx's.
13135 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13136 numbers to an ascending order of stores.
13137 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13138 from ascending memory locations, and the function verifies that the register
13139 numbers are themselves ascending. If CHECK_REGS is false, the register
13140 numbers are stored in the order they are found in the operands. */
13141 static int
13142 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13143 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13144 HOST_WIDE_INT *load_offset, bool check_regs)
13146 int unsorted_regs[MAX_LDM_STM_OPS];
13147 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13148 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13149 int order[MAX_LDM_STM_OPS];
13150 int base_reg = -1;
13151 rtx base_reg_rtx = NULL;
13152 int i, stm_case;
13154 /* Write back of base register is currently only supported for Thumb 1. */
13155 int base_writeback = TARGET_THUMB1;
13157 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13158 easily extended if required. */
13159 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13161 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13163 /* Loop over the operands and check that the memory references are
13164 suitable (i.e. immediate offsets from the same base register). At
13165 the same time, extract the target register, and the memory
13166 offsets. */
13167 for (i = 0; i < nops; i++)
13169 rtx reg;
13170 rtx offset;
13172 /* Convert a subreg of a mem into the mem itself. */
13173 if (GET_CODE (operands[nops + i]) == SUBREG)
13174 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13176 gcc_assert (MEM_P (operands[nops + i]));
13178 /* Don't reorder volatile memory references; it doesn't seem worth
13179 looking for the case where the order is ok anyway. */
13180 if (MEM_VOLATILE_P (operands[nops + i]))
13181 return 0;
13183 offset = const0_rtx;
13185 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13186 || (GET_CODE (reg) == SUBREG
13187 && REG_P (reg = SUBREG_REG (reg))))
13188 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13189 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13190 || (GET_CODE (reg) == SUBREG
13191 && REG_P (reg = SUBREG_REG (reg))))
13192 && (CONST_INT_P (offset
13193 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13195 unsorted_reg_rtxs[i] = (REG_P (operands[i])
13196 ? operands[i] : SUBREG_REG (operands[i]));
13197 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13199 if (i == 0)
13201 base_reg = REGNO (reg);
13202 base_reg_rtx = reg;
13203 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13204 return 0;
13206 else if (base_reg != (int) REGNO (reg))
13207 /* Not addressed from the same base register. */
13208 return 0;
13210 /* If it isn't an integer register, then we can't do this. */
13211 if (unsorted_regs[i] < 0
13212 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13213 /* The effects are unpredictable if the base register is
13214 both updated and stored. */
13215 || (base_writeback && unsorted_regs[i] == base_reg)
13216 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13217 || unsorted_regs[i] > 14)
13218 return 0;
13220 unsorted_offsets[i] = INTVAL (offset);
13221 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13222 order[0] = i;
13224 else
13225 /* Not a suitable memory address. */
13226 return 0;
13229 /* All the useful information has now been extracted from the
13230 operands into unsorted_regs and unsorted_offsets; additionally,
13231 order[0] has been set to the lowest offset in the list. Sort
13232 the offsets into order, verifying that they are adjacent, and
13233 check that the register numbers are ascending. */
13234 if (!compute_offset_order (nops, unsorted_offsets, order,
13235 check_regs ? unsorted_regs : NULL))
13236 return 0;
13238 if (saved_order)
13239 memcpy (saved_order, order, sizeof order);
13241 if (base)
13243 *base = base_reg;
13245 for (i = 0; i < nops; i++)
13247 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13248 if (reg_rtxs)
13249 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13252 *load_offset = unsorted_offsets[order[0]];
13255 if (TARGET_THUMB1
13256 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13257 return 0;
13259 if (unsorted_offsets[order[0]] == 0)
13260 stm_case = 1; /* stmia */
13261 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13262 stm_case = 2; /* stmib */
13263 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13264 stm_case = 3; /* stmda */
13265 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13266 stm_case = 4; /* stmdb */
13267 else
13268 return 0;
13270 if (!multiple_operation_profitable_p (false, nops, 0))
13271 return 0;
13273 return stm_case;
13276 /* Routines for use in generating RTL. */
13278 /* Generate a load-multiple instruction. COUNT is the number of loads in
13279 the instruction; REGS and MEMS are arrays containing the operands.
13280 BASEREG is the base register to be used in addressing the memory operands.
13281 WBACK_OFFSET is nonzero if the instruction should update the base
13282 register. */
13284 static rtx
13285 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13286 HOST_WIDE_INT wback_offset)
13288 int i = 0, j;
13289 rtx result;
13291 if (!multiple_operation_profitable_p (false, count, 0))
13293 rtx seq;
13295 start_sequence ();
13297 for (i = 0; i < count; i++)
13298 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13300 if (wback_offset != 0)
13301 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13303 seq = get_insns ();
13304 end_sequence ();
13306 return seq;
13309 result = gen_rtx_PARALLEL (VOIDmode,
13310 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13311 if (wback_offset != 0)
13313 XVECEXP (result, 0, 0)
13314 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13315 i = 1;
13316 count++;
13319 for (j = 0; i < count; i++, j++)
13320 XVECEXP (result, 0, i)
13321 = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
13323 return result;
13326 /* Generate a store-multiple instruction. COUNT is the number of stores in
13327 the instruction; REGS and MEMS are arrays containing the operands.
13328 BASEREG is the base register to be used in addressing the memory operands.
13329 WBACK_OFFSET is nonzero if the instruction should update the base
13330 register. */
13332 static rtx
13333 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13334 HOST_WIDE_INT wback_offset)
13336 int i = 0, j;
13337 rtx result;
13339 if (GET_CODE (basereg) == PLUS)
13340 basereg = XEXP (basereg, 0);
13342 if (!multiple_operation_profitable_p (false, count, 0))
13344 rtx seq;
13346 start_sequence ();
13348 for (i = 0; i < count; i++)
13349 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13351 if (wback_offset != 0)
13352 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13354 seq = get_insns ();
13355 end_sequence ();
13357 return seq;
13360 result = gen_rtx_PARALLEL (VOIDmode,
13361 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13362 if (wback_offset != 0)
13364 XVECEXP (result, 0, 0)
13365 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13366 i = 1;
13367 count++;
13370 for (j = 0; i < count; i++, j++)
13371 XVECEXP (result, 0, i)
13372 = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
13374 return result;
13377 /* Generate either a load-multiple or a store-multiple instruction. This
13378 function can be used in situations where we can start with a single MEM
13379 rtx and adjust its address upwards.
13380 COUNT is the number of operations in the instruction, not counting a
13381 possible update of the base register. REGS is an array containing the
13382 register operands.
13383 BASEREG is the base register to be used in addressing the memory operands,
13384 which are constructed from BASEMEM.
13385 WRITE_BACK specifies whether the generated instruction should include an
13386 update of the base register.
13387 OFFSETP is used to pass an offset to and from this function; this offset
13388 is not used when constructing the address (instead BASEMEM should have an
13389 appropriate offset in its address), it is used only for setting
13390 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13392 static rtx
13393 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
13394 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
13396 rtx mems[MAX_LDM_STM_OPS];
13397 HOST_WIDE_INT offset = *offsetp;
13398 int i;
13400 gcc_assert (count <= MAX_LDM_STM_OPS);
13402 if (GET_CODE (basereg) == PLUS)
13403 basereg = XEXP (basereg, 0);
13405 for (i = 0; i < count; i++)
13407 rtx addr = plus_constant (Pmode, basereg, i * 4);
13408 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
13409 offset += 4;
13412 if (write_back)
13413 *offsetp = offset;
13415 if (is_load)
13416 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
13417 write_back ? 4 * count : 0);
13418 else
13419 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
13420 write_back ? 4 * count : 0);
13424 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
13425 rtx basemem, HOST_WIDE_INT *offsetp)
13427 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
13428 offsetp);
13432 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
13433 rtx basemem, HOST_WIDE_INT *offsetp)
13435 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
13436 offsetp);
13439 /* Called from a peephole2 expander to turn a sequence of loads into an
13440 LDM instruction. OPERANDS are the operands found by the peephole matcher;
13441 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
13442 is true if we can reorder the registers because they are used commutatively
13443 subsequently.
13444 Returns true iff we could generate a new instruction. */
13446 bool
13447 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
13449 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13450 rtx mems[MAX_LDM_STM_OPS];
13451 int i, j, base_reg;
13452 rtx base_reg_rtx;
13453 HOST_WIDE_INT offset;
13454 int write_back = FALSE;
13455 int ldm_case;
13456 rtx addr;
13458 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
13459 &base_reg, &offset, !sort_regs);
13461 if (ldm_case == 0)
13462 return false;
13464 if (sort_regs)
13465 for (i = 0; i < nops - 1; i++)
13466 for (j = i + 1; j < nops; j++)
13467 if (regs[i] > regs[j])
13469 int t = regs[i];
13470 regs[i] = regs[j];
13471 regs[j] = t;
13473 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13475 if (TARGET_THUMB1)
13477 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
13478 gcc_assert (ldm_case == 1 || ldm_case == 5);
13479 write_back = TRUE;
13482 if (ldm_case == 5)
13484 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
13485 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
13486 offset = 0;
13487 if (!TARGET_THUMB1)
13489 base_reg = regs[0];
13490 base_reg_rtx = newbase;
13494 for (i = 0; i < nops; i++)
13496 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13497 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13498 SImode, addr, 0);
13500 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
13501 write_back ? offset + i * 4 : 0));
13502 return true;
13505 /* Called from a peephole2 expander to turn a sequence of stores into an
13506 STM instruction. OPERANDS are the operands found by the peephole matcher;
13507 NOPS indicates how many separate stores we are trying to combine.
13508 Returns true iff we could generate a new instruction. */
13510 bool
13511 gen_stm_seq (rtx *operands, int nops)
13513 int i;
13514 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13515 rtx mems[MAX_LDM_STM_OPS];
13516 int base_reg;
13517 rtx base_reg_rtx;
13518 HOST_WIDE_INT offset;
13519 int write_back = FALSE;
13520 int stm_case;
13521 rtx addr;
13522 bool base_reg_dies;
13524 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
13525 mem_order, &base_reg, &offset, true);
13527 if (stm_case == 0)
13528 return false;
13530 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13532 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
13533 if (TARGET_THUMB1)
13535 gcc_assert (base_reg_dies);
13536 write_back = TRUE;
13539 if (stm_case == 5)
13541 gcc_assert (base_reg_dies);
13542 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13543 offset = 0;
13546 addr = plus_constant (Pmode, base_reg_rtx, offset);
13548 for (i = 0; i < nops; i++)
13550 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13551 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13552 SImode, addr, 0);
13554 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
13555 write_back ? offset + i * 4 : 0));
13556 return true;
13559 /* Called from a peephole2 expander to turn a sequence of stores that are
13560 preceded by constant loads into an STM instruction. OPERANDS are the
13561 operands found by the peephole matcher; NOPS indicates how many
13562 separate stores we are trying to combine; there are 2 * NOPS
13563 instructions in the peephole.
13564 Returns true iff we could generate a new instruction. */
13566 bool
13567 gen_const_stm_seq (rtx *operands, int nops)
13569 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
13570 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13571 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
13572 rtx mems[MAX_LDM_STM_OPS];
13573 int base_reg;
13574 rtx base_reg_rtx;
13575 HOST_WIDE_INT offset;
13576 int write_back = FALSE;
13577 int stm_case;
13578 rtx addr;
13579 bool base_reg_dies;
13580 int i, j;
13581 HARD_REG_SET allocated;
13583 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
13584 mem_order, &base_reg, &offset, false);
13586 if (stm_case == 0)
13587 return false;
13589 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
13591 /* If the same register is used more than once, try to find a free
13592 register. */
13593 CLEAR_HARD_REG_SET (allocated);
13594 for (i = 0; i < nops; i++)
13596 for (j = i + 1; j < nops; j++)
13597 if (regs[i] == regs[j])
13599 rtx t = peep2_find_free_register (0, nops * 2,
13600 TARGET_THUMB1 ? "l" : "r",
13601 SImode, &allocated);
13602 if (t == NULL_RTX)
13603 return false;
13604 reg_rtxs[i] = t;
13605 regs[i] = REGNO (t);
13609 /* Compute an ordering that maps the register numbers to an ascending
13610 sequence. */
13611 reg_order[0] = 0;
13612 for (i = 0; i < nops; i++)
13613 if (regs[i] < regs[reg_order[0]])
13614 reg_order[0] = i;
13616 for (i = 1; i < nops; i++)
13618 int this_order = reg_order[i - 1];
13619 for (j = 0; j < nops; j++)
13620 if (regs[j] > regs[reg_order[i - 1]]
13621 && (this_order == reg_order[i - 1]
13622 || regs[j] < regs[this_order]))
13623 this_order = j;
13624 reg_order[i] = this_order;
13627 /* Ensure that registers that must be live after the instruction end
13628 up with the correct value. */
13629 for (i = 0; i < nops; i++)
13631 int this_order = reg_order[i];
13632 if ((this_order != mem_order[i]
13633 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
13634 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
13635 return false;
13638 /* Load the constants. */
13639 for (i = 0; i < nops; i++)
13641 rtx op = operands[2 * nops + mem_order[i]];
13642 sorted_regs[i] = regs[reg_order[i]];
13643 emit_move_insn (reg_rtxs[reg_order[i]], op);
13646 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13648 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
13649 if (TARGET_THUMB1)
13651 gcc_assert (base_reg_dies);
13652 write_back = TRUE;
13655 if (stm_case == 5)
13657 gcc_assert (base_reg_dies);
13658 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13659 offset = 0;
13662 addr = plus_constant (Pmode, base_reg_rtx, offset);
13664 for (i = 0; i < nops; i++)
13666 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13667 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13668 SImode, addr, 0);
13670 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
13671 write_back ? offset + i * 4 : 0));
13672 return true;
13675 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
13676 unaligned copies on processors which support unaligned semantics for those
13677 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
13678 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
13679 An interleave factor of 1 (the minimum) will perform no interleaving.
13680 Load/store multiple are used for aligned addresses where possible. */
13682 static void
13683 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
13684 HOST_WIDE_INT length,
13685 unsigned int interleave_factor)
13687 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
13688 int *regnos = XALLOCAVEC (int, interleave_factor);
13689 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
13690 HOST_WIDE_INT i, j;
13691 HOST_WIDE_INT remaining = length, words;
13692 rtx halfword_tmp = NULL, byte_tmp = NULL;
13693 rtx dst, src;
13694 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
13695 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
13696 HOST_WIDE_INT srcoffset, dstoffset;
13697 HOST_WIDE_INT src_autoinc, dst_autoinc;
13698 rtx mem, addr;
13700 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
13702 /* Use hard registers if we have aligned source or destination so we can use
13703 load/store multiple with contiguous registers. */
13704 if (dst_aligned || src_aligned)
13705 for (i = 0; i < interleave_factor; i++)
13706 regs[i] = gen_rtx_REG (SImode, i);
13707 else
13708 for (i = 0; i < interleave_factor; i++)
13709 regs[i] = gen_reg_rtx (SImode);
13711 dst = copy_addr_to_reg (XEXP (dstbase, 0));
13712 src = copy_addr_to_reg (XEXP (srcbase, 0));
13714 srcoffset = dstoffset = 0;
13716 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
13717 For copying the last bytes we want to subtract this offset again. */
13718 src_autoinc = dst_autoinc = 0;
13720 for (i = 0; i < interleave_factor; i++)
13721 regnos[i] = i;
13723 /* Copy BLOCK_SIZE_BYTES chunks. */
13725 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
13727 /* Load words. */
13728 if (src_aligned && interleave_factor > 1)
13730 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
13731 TRUE, srcbase, &srcoffset));
13732 src_autoinc += UNITS_PER_WORD * interleave_factor;
13734 else
13736 for (j = 0; j < interleave_factor; j++)
13738 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
13739 - src_autoinc));
13740 mem = adjust_automodify_address (srcbase, SImode, addr,
13741 srcoffset + j * UNITS_PER_WORD);
13742 emit_insn (gen_unaligned_loadsi (regs[j], mem));
13744 srcoffset += block_size_bytes;
13747 /* Store words. */
13748 if (dst_aligned && interleave_factor > 1)
13750 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
13751 TRUE, dstbase, &dstoffset));
13752 dst_autoinc += UNITS_PER_WORD * interleave_factor;
13754 else
13756 for (j = 0; j < interleave_factor; j++)
13758 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
13759 - dst_autoinc));
13760 mem = adjust_automodify_address (dstbase, SImode, addr,
13761 dstoffset + j * UNITS_PER_WORD);
13762 emit_insn (gen_unaligned_storesi (mem, regs[j]));
13764 dstoffset += block_size_bytes;
13767 remaining -= block_size_bytes;
13770 /* Copy any whole words left (note these aren't interleaved with any
13771 subsequent halfword/byte load/stores in the interests of simplicity). */
13773 words = remaining / UNITS_PER_WORD;
13775 gcc_assert (words < interleave_factor);
13777 if (src_aligned && words > 1)
13779 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
13780 &srcoffset));
13781 src_autoinc += UNITS_PER_WORD * words;
13783 else
13785 for (j = 0; j < words; j++)
13787 addr = plus_constant (Pmode, src,
13788 srcoffset + j * UNITS_PER_WORD - src_autoinc);
13789 mem = adjust_automodify_address (srcbase, SImode, addr,
13790 srcoffset + j * UNITS_PER_WORD);
13791 if (src_aligned)
13792 emit_move_insn (regs[j], mem);
13793 else
13794 emit_insn (gen_unaligned_loadsi (regs[j], mem));
13796 srcoffset += words * UNITS_PER_WORD;
13799 if (dst_aligned && words > 1)
13801 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
13802 &dstoffset));
13803 dst_autoinc += words * UNITS_PER_WORD;
13805 else
13807 for (j = 0; j < words; j++)
13809 addr = plus_constant (Pmode, dst,
13810 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
13811 mem = adjust_automodify_address (dstbase, SImode, addr,
13812 dstoffset + j * UNITS_PER_WORD);
13813 if (dst_aligned)
13814 emit_move_insn (mem, regs[j]);
13815 else
13816 emit_insn (gen_unaligned_storesi (mem, regs[j]));
13818 dstoffset += words * UNITS_PER_WORD;
13821 remaining -= words * UNITS_PER_WORD;
13823 gcc_assert (remaining < 4);
13825 /* Copy a halfword if necessary. */
13827 if (remaining >= 2)
13829 halfword_tmp = gen_reg_rtx (SImode);
13831 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
13832 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
13833 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
13835 /* Either write out immediately, or delay until we've loaded the last
13836 byte, depending on interleave factor. */
13837 if (interleave_factor == 1)
13839 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
13840 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
13841 emit_insn (gen_unaligned_storehi (mem,
13842 gen_lowpart (HImode, halfword_tmp)));
13843 halfword_tmp = NULL;
13844 dstoffset += 2;
13847 remaining -= 2;
13848 srcoffset += 2;
13851 gcc_assert (remaining < 2);
13853 /* Copy last byte. */
13855 if ((remaining & 1) != 0)
13857 byte_tmp = gen_reg_rtx (SImode);
13859 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
13860 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
13861 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
13863 if (interleave_factor == 1)
13865 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
13866 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
13867 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
13868 byte_tmp = NULL;
13869 dstoffset++;
13872 remaining--;
13873 srcoffset++;
13876 /* Store last halfword if we haven't done so already. */
13878 if (halfword_tmp)
13880 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
13881 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
13882 emit_insn (gen_unaligned_storehi (mem,
13883 gen_lowpart (HImode, halfword_tmp)));
13884 dstoffset += 2;
13887 /* Likewise for last byte. */
13889 if (byte_tmp)
13891 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
13892 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
13893 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
13894 dstoffset++;
13897 gcc_assert (remaining == 0 && srcoffset == dstoffset);
13900 /* From mips_adjust_block_mem:
13902 Helper function for doing a loop-based block operation on memory
13903 reference MEM. Each iteration of the loop will operate on LENGTH
13904 bytes of MEM.
13906 Create a new base register for use within the loop and point it to
13907 the start of MEM. Create a new memory reference that uses this
13908 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
13910 static void
13911 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
13912 rtx *loop_mem)
13914 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
13916 /* Although the new mem does not refer to a known location,
13917 it does keep up to LENGTH bytes of alignment. */
13918 *loop_mem = change_address (mem, BLKmode, *loop_reg);
13919 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
13922 /* From mips_block_move_loop:
13924 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
13925 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
13926 the memory regions do not overlap. */
13928 static void
13929 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
13930 unsigned int interleave_factor,
13931 HOST_WIDE_INT bytes_per_iter)
13933 rtx src_reg, dest_reg, final_src, test;
13934 HOST_WIDE_INT leftover;
13936 leftover = length % bytes_per_iter;
13937 length -= leftover;
13939 /* Create registers and memory references for use within the loop. */
13940 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
13941 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
13943 /* Calculate the value that SRC_REG should have after the last iteration of
13944 the loop. */
13945 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
13946 0, 0, OPTAB_WIDEN);
13948 /* Emit the start of the loop. */
13949 rtx_code_label *label = gen_label_rtx ();
13950 emit_label (label);
13952 /* Emit the loop body. */
13953 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
13954 interleave_factor);
13956 /* Move on to the next block. */
13957 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
13958 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
13960 /* Emit the loop condition. */
13961 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
13962 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
13964 /* Mop up any left-over bytes. */
13965 if (leftover)
13966 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
13969 /* Emit a block move when either the source or destination is unaligned (not
13970 aligned to a four-byte boundary). This may need further tuning depending on
13971 core type, optimize_size setting, etc. */
13973 static int
13974 arm_movmemqi_unaligned (rtx *operands)
13976 HOST_WIDE_INT length = INTVAL (operands[2]);
13978 if (optimize_size)
13980 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
13981 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
13982 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
13983 size of code if optimizing for size. We'll use ldm/stm if src_aligned
13984 or dst_aligned though: allow more interleaving in those cases since the
13985 resulting code can be smaller. */
13986 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
13987 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
13989 if (length > 12)
13990 arm_block_move_unaligned_loop (operands[0], operands[1], length,
13991 interleave_factor, bytes_per_iter);
13992 else
13993 arm_block_move_unaligned_straight (operands[0], operands[1], length,
13994 interleave_factor);
13996 else
13998 /* Note that the loop created by arm_block_move_unaligned_loop may be
13999 subject to loop unrolling, which makes tuning this condition a little
14000 redundant. */
14001 if (length > 32)
14002 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14003 else
14004 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14007 return 1;
14011 arm_gen_movmemqi (rtx *operands)
14013 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14014 HOST_WIDE_INT srcoffset, dstoffset;
14015 int i;
14016 rtx src, dst, srcbase, dstbase;
14017 rtx part_bytes_reg = NULL;
14018 rtx mem;
14020 if (!CONST_INT_P (operands[2])
14021 || !CONST_INT_P (operands[3])
14022 || INTVAL (operands[2]) > 64)
14023 return 0;
14025 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14026 return arm_movmemqi_unaligned (operands);
14028 if (INTVAL (operands[3]) & 3)
14029 return 0;
14031 dstbase = operands[0];
14032 srcbase = operands[1];
14034 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14035 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14037 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14038 out_words_to_go = INTVAL (operands[2]) / 4;
14039 last_bytes = INTVAL (operands[2]) & 3;
14040 dstoffset = srcoffset = 0;
14042 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14043 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14045 for (i = 0; in_words_to_go >= 2; i+=4)
14047 if (in_words_to_go > 4)
14048 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14049 TRUE, srcbase, &srcoffset));
14050 else
14051 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14052 src, FALSE, srcbase,
14053 &srcoffset));
14055 if (out_words_to_go)
14057 if (out_words_to_go > 4)
14058 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14059 TRUE, dstbase, &dstoffset));
14060 else if (out_words_to_go != 1)
14061 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14062 out_words_to_go, dst,
14063 (last_bytes == 0
14064 ? FALSE : TRUE),
14065 dstbase, &dstoffset));
14066 else
14068 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14069 emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
14070 if (last_bytes != 0)
14072 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14073 dstoffset += 4;
14078 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14079 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14082 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14083 if (out_words_to_go)
14085 rtx sreg;
14087 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14088 sreg = copy_to_reg (mem);
14090 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14091 emit_move_insn (mem, sreg);
14092 in_words_to_go--;
14094 gcc_assert (!in_words_to_go); /* Sanity check */
14097 if (in_words_to_go)
14099 gcc_assert (in_words_to_go > 0);
14101 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14102 part_bytes_reg = copy_to_mode_reg (SImode, mem);
14105 gcc_assert (!last_bytes || part_bytes_reg);
14107 if (BYTES_BIG_ENDIAN && last_bytes)
14109 rtx tmp = gen_reg_rtx (SImode);
14111 /* The bytes we want are in the top end of the word. */
14112 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14113 GEN_INT (8 * (4 - last_bytes))));
14114 part_bytes_reg = tmp;
14116 while (last_bytes)
14118 mem = adjust_automodify_address (dstbase, QImode,
14119 plus_constant (Pmode, dst,
14120 last_bytes - 1),
14121 dstoffset + last_bytes - 1);
14122 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14124 if (--last_bytes)
14126 tmp = gen_reg_rtx (SImode);
14127 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14128 part_bytes_reg = tmp;
14133 else
14135 if (last_bytes > 1)
14137 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14138 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14139 last_bytes -= 2;
14140 if (last_bytes)
14142 rtx tmp = gen_reg_rtx (SImode);
14143 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14144 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14145 part_bytes_reg = tmp;
14146 dstoffset += 2;
14150 if (last_bytes)
14152 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14153 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14157 return 1;
14160 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14161 by mode size. */
14162 inline static rtx
14163 next_consecutive_mem (rtx mem)
14165 machine_mode mode = GET_MODE (mem);
14166 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14167 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14169 return adjust_automodify_address (mem, mode, addr, offset);
14172 /* Copy using LDRD/STRD instructions whenever possible.
14173 Returns true upon success. */
14174 bool
14175 gen_movmem_ldrd_strd (rtx *operands)
14177 unsigned HOST_WIDE_INT len;
14178 HOST_WIDE_INT align;
14179 rtx src, dst, base;
14180 rtx reg0;
14181 bool src_aligned, dst_aligned;
14182 bool src_volatile, dst_volatile;
14184 gcc_assert (CONST_INT_P (operands[2]));
14185 gcc_assert (CONST_INT_P (operands[3]));
14187 len = UINTVAL (operands[2]);
14188 if (len > 64)
14189 return false;
14191 /* Maximum alignment we can assume for both src and dst buffers. */
14192 align = INTVAL (operands[3]);
14194 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14195 return false;
14197 /* Place src and dst addresses in registers
14198 and update the corresponding mem rtx. */
14199 dst = operands[0];
14200 dst_volatile = MEM_VOLATILE_P (dst);
14201 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14202 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14203 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14205 src = operands[1];
14206 src_volatile = MEM_VOLATILE_P (src);
14207 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14208 base = copy_to_mode_reg (SImode, XEXP (src, 0));
14209 src = adjust_automodify_address (src, VOIDmode, base, 0);
14211 if (!unaligned_access && !(src_aligned && dst_aligned))
14212 return false;
14214 if (src_volatile || dst_volatile)
14215 return false;
14217 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14218 if (!(dst_aligned || src_aligned))
14219 return arm_gen_movmemqi (operands);
14221 /* If the either src or dst is unaligned we'll be accessing it as pairs
14222 of unaligned SImode accesses. Otherwise we can generate DImode
14223 ldrd/strd instructions. */
14224 src = adjust_address (src, src_aligned ? DImode : SImode, 0);
14225 dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
14227 while (len >= 8)
14229 len -= 8;
14230 reg0 = gen_reg_rtx (DImode);
14231 rtx low_reg = NULL_RTX;
14232 rtx hi_reg = NULL_RTX;
14234 if (!src_aligned || !dst_aligned)
14236 low_reg = gen_lowpart (SImode, reg0);
14237 hi_reg = gen_highpart_mode (SImode, DImode, reg0);
14239 if (src_aligned)
14240 emit_move_insn (reg0, src);
14241 else
14243 emit_insn (gen_unaligned_loadsi (low_reg, src));
14244 src = next_consecutive_mem (src);
14245 emit_insn (gen_unaligned_loadsi (hi_reg, src));
14248 if (dst_aligned)
14249 emit_move_insn (dst, reg0);
14250 else
14252 emit_insn (gen_unaligned_storesi (dst, low_reg));
14253 dst = next_consecutive_mem (dst);
14254 emit_insn (gen_unaligned_storesi (dst, hi_reg));
14257 src = next_consecutive_mem (src);
14258 dst = next_consecutive_mem (dst);
14261 gcc_assert (len < 8);
14262 if (len >= 4)
14264 /* More than a word but less than a double-word to copy. Copy a word. */
14265 reg0 = gen_reg_rtx (SImode);
14266 src = adjust_address (src, SImode, 0);
14267 dst = adjust_address (dst, SImode, 0);
14268 if (src_aligned)
14269 emit_move_insn (reg0, src);
14270 else
14271 emit_insn (gen_unaligned_loadsi (reg0, src));
14273 if (dst_aligned)
14274 emit_move_insn (dst, reg0);
14275 else
14276 emit_insn (gen_unaligned_storesi (dst, reg0));
14278 src = next_consecutive_mem (src);
14279 dst = next_consecutive_mem (dst);
14280 len -= 4;
14283 if (len == 0)
14284 return true;
14286 /* Copy the remaining bytes. */
14287 if (len >= 2)
14289 dst = adjust_address (dst, HImode, 0);
14290 src = adjust_address (src, HImode, 0);
14291 reg0 = gen_reg_rtx (SImode);
14292 if (src_aligned)
14293 emit_insn (gen_zero_extendhisi2 (reg0, src));
14294 else
14295 emit_insn (gen_unaligned_loadhiu (reg0, src));
14297 if (dst_aligned)
14298 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14299 else
14300 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14302 src = next_consecutive_mem (src);
14303 dst = next_consecutive_mem (dst);
14304 if (len == 2)
14305 return true;
14308 dst = adjust_address (dst, QImode, 0);
14309 src = adjust_address (src, QImode, 0);
14310 reg0 = gen_reg_rtx (QImode);
14311 emit_move_insn (reg0, src);
14312 emit_move_insn (dst, reg0);
14313 return true;
14316 /* Select a dominance comparison mode if possible for a test of the general
14317 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14318 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14319 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14320 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14321 In all cases OP will be either EQ or NE, but we don't need to know which
14322 here. If we are unable to support a dominance comparison we return
14323 CC mode. This will then fail to match for the RTL expressions that
14324 generate this call. */
14325 machine_mode
14326 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14328 enum rtx_code cond1, cond2;
14329 int swapped = 0;
14331 /* Currently we will probably get the wrong result if the individual
14332 comparisons are not simple. This also ensures that it is safe to
14333 reverse a comparison if necessary. */
14334 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14335 != CCmode)
14336 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14337 != CCmode))
14338 return CCmode;
14340 /* The if_then_else variant of this tests the second condition if the
14341 first passes, but is true if the first fails. Reverse the first
14342 condition to get a true "inclusive-or" expression. */
14343 if (cond_or == DOM_CC_NX_OR_Y)
14344 cond1 = reverse_condition (cond1);
14346 /* If the comparisons are not equal, and one doesn't dominate the other,
14347 then we can't do this. */
14348 if (cond1 != cond2
14349 && !comparison_dominates_p (cond1, cond2)
14350 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14351 return CCmode;
14353 if (swapped)
14354 std::swap (cond1, cond2);
14356 switch (cond1)
14358 case EQ:
14359 if (cond_or == DOM_CC_X_AND_Y)
14360 return CC_DEQmode;
14362 switch (cond2)
14364 case EQ: return CC_DEQmode;
14365 case LE: return CC_DLEmode;
14366 case LEU: return CC_DLEUmode;
14367 case GE: return CC_DGEmode;
14368 case GEU: return CC_DGEUmode;
14369 default: gcc_unreachable ();
14372 case LT:
14373 if (cond_or == DOM_CC_X_AND_Y)
14374 return CC_DLTmode;
14376 switch (cond2)
14378 case LT:
14379 return CC_DLTmode;
14380 case LE:
14381 return CC_DLEmode;
14382 case NE:
14383 return CC_DNEmode;
14384 default:
14385 gcc_unreachable ();
14388 case GT:
14389 if (cond_or == DOM_CC_X_AND_Y)
14390 return CC_DGTmode;
14392 switch (cond2)
14394 case GT:
14395 return CC_DGTmode;
14396 case GE:
14397 return CC_DGEmode;
14398 case NE:
14399 return CC_DNEmode;
14400 default:
14401 gcc_unreachable ();
14404 case LTU:
14405 if (cond_or == DOM_CC_X_AND_Y)
14406 return CC_DLTUmode;
14408 switch (cond2)
14410 case LTU:
14411 return CC_DLTUmode;
14412 case LEU:
14413 return CC_DLEUmode;
14414 case NE:
14415 return CC_DNEmode;
14416 default:
14417 gcc_unreachable ();
14420 case GTU:
14421 if (cond_or == DOM_CC_X_AND_Y)
14422 return CC_DGTUmode;
14424 switch (cond2)
14426 case GTU:
14427 return CC_DGTUmode;
14428 case GEU:
14429 return CC_DGEUmode;
14430 case NE:
14431 return CC_DNEmode;
14432 default:
14433 gcc_unreachable ();
14436 /* The remaining cases only occur when both comparisons are the
14437 same. */
14438 case NE:
14439 gcc_assert (cond1 == cond2);
14440 return CC_DNEmode;
14442 case LE:
14443 gcc_assert (cond1 == cond2);
14444 return CC_DLEmode;
14446 case GE:
14447 gcc_assert (cond1 == cond2);
14448 return CC_DGEmode;
14450 case LEU:
14451 gcc_assert (cond1 == cond2);
14452 return CC_DLEUmode;
14454 case GEU:
14455 gcc_assert (cond1 == cond2);
14456 return CC_DGEUmode;
14458 default:
14459 gcc_unreachable ();
14463 machine_mode
14464 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
14466 /* All floating point compares return CCFP if it is an equality
14467 comparison, and CCFPE otherwise. */
14468 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14470 switch (op)
14472 case EQ:
14473 case NE:
14474 case UNORDERED:
14475 case ORDERED:
14476 case UNLT:
14477 case UNLE:
14478 case UNGT:
14479 case UNGE:
14480 case UNEQ:
14481 case LTGT:
14482 return CCFPmode;
14484 case LT:
14485 case LE:
14486 case GT:
14487 case GE:
14488 return CCFPEmode;
14490 default:
14491 gcc_unreachable ();
14495 /* A compare with a shifted operand. Because of canonicalization, the
14496 comparison will have to be swapped when we emit the assembler. */
14497 if (GET_MODE (y) == SImode
14498 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14499 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14500 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
14501 || GET_CODE (x) == ROTATERT))
14502 return CC_SWPmode;
14504 /* This operation is performed swapped, but since we only rely on the Z
14505 flag we don't need an additional mode. */
14506 if (GET_MODE (y) == SImode
14507 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14508 && GET_CODE (x) == NEG
14509 && (op == EQ || op == NE))
14510 return CC_Zmode;
14512 /* This is a special case that is used by combine to allow a
14513 comparison of a shifted byte load to be split into a zero-extend
14514 followed by a comparison of the shifted integer (only valid for
14515 equalities and unsigned inequalities). */
14516 if (GET_MODE (x) == SImode
14517 && GET_CODE (x) == ASHIFT
14518 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
14519 && GET_CODE (XEXP (x, 0)) == SUBREG
14520 && MEM_P (SUBREG_REG (XEXP (x, 0)))
14521 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
14522 && (op == EQ || op == NE
14523 || op == GEU || op == GTU || op == LTU || op == LEU)
14524 && CONST_INT_P (y))
14525 return CC_Zmode;
14527 /* A construct for a conditional compare, if the false arm contains
14528 0, then both conditions must be true, otherwise either condition
14529 must be true. Not all conditions are possible, so CCmode is
14530 returned if it can't be done. */
14531 if (GET_CODE (x) == IF_THEN_ELSE
14532 && (XEXP (x, 2) == const0_rtx
14533 || XEXP (x, 2) == const1_rtx)
14534 && COMPARISON_P (XEXP (x, 0))
14535 && COMPARISON_P (XEXP (x, 1)))
14536 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14537 INTVAL (XEXP (x, 2)));
14539 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
14540 if (GET_CODE (x) == AND
14541 && (op == EQ || op == NE)
14542 && COMPARISON_P (XEXP (x, 0))
14543 && COMPARISON_P (XEXP (x, 1)))
14544 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14545 DOM_CC_X_AND_Y);
14547 if (GET_CODE (x) == IOR
14548 && (op == EQ || op == NE)
14549 && COMPARISON_P (XEXP (x, 0))
14550 && COMPARISON_P (XEXP (x, 1)))
14551 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14552 DOM_CC_X_OR_Y);
14554 /* An operation (on Thumb) where we want to test for a single bit.
14555 This is done by shifting that bit up into the top bit of a
14556 scratch register; we can then branch on the sign bit. */
14557 if (TARGET_THUMB1
14558 && GET_MODE (x) == SImode
14559 && (op == EQ || op == NE)
14560 && GET_CODE (x) == ZERO_EXTRACT
14561 && XEXP (x, 1) == const1_rtx)
14562 return CC_Nmode;
14564 /* An operation that sets the condition codes as a side-effect, the
14565 V flag is not set correctly, so we can only use comparisons where
14566 this doesn't matter. (For LT and GE we can use "mi" and "pl"
14567 instead.) */
14568 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
14569 if (GET_MODE (x) == SImode
14570 && y == const0_rtx
14571 && (op == EQ || op == NE || op == LT || op == GE)
14572 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
14573 || GET_CODE (x) == AND || GET_CODE (x) == IOR
14574 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
14575 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
14576 || GET_CODE (x) == LSHIFTRT
14577 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14578 || GET_CODE (x) == ROTATERT
14579 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
14580 return CC_NOOVmode;
14582 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
14583 return CC_Zmode;
14585 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
14586 && GET_CODE (x) == PLUS
14587 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
14588 return CC_Cmode;
14590 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
14592 switch (op)
14594 case EQ:
14595 case NE:
14596 /* A DImode comparison against zero can be implemented by
14597 or'ing the two halves together. */
14598 if (y == const0_rtx)
14599 return CC_Zmode;
14601 /* We can do an equality test in three Thumb instructions. */
14602 if (!TARGET_32BIT)
14603 return CC_Zmode;
14605 /* FALLTHROUGH */
14607 case LTU:
14608 case LEU:
14609 case GTU:
14610 case GEU:
14611 /* DImode unsigned comparisons can be implemented by cmp +
14612 cmpeq without a scratch register. Not worth doing in
14613 Thumb-2. */
14614 if (TARGET_32BIT)
14615 return CC_CZmode;
14617 /* FALLTHROUGH */
14619 case LT:
14620 case LE:
14621 case GT:
14622 case GE:
14623 /* DImode signed and unsigned comparisons can be implemented
14624 by cmp + sbcs with a scratch register, but that does not
14625 set the Z flag - we must reverse GT/LE/GTU/LEU. */
14626 gcc_assert (op != EQ && op != NE);
14627 return CC_NCVmode;
14629 default:
14630 gcc_unreachable ();
14634 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
14635 return GET_MODE (x);
14637 return CCmode;
14640 /* X and Y are two things to compare using CODE. Emit the compare insn and
14641 return the rtx for register 0 in the proper mode. FP means this is a
14642 floating point compare: I don't think that it is needed on the arm. */
14644 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
14646 machine_mode mode;
14647 rtx cc_reg;
14648 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
14650 /* We might have X as a constant, Y as a register because of the predicates
14651 used for cmpdi. If so, force X to a register here. */
14652 if (dimode_comparison && !REG_P (x))
14653 x = force_reg (DImode, x);
14655 mode = SELECT_CC_MODE (code, x, y);
14656 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
14658 if (dimode_comparison
14659 && mode != CC_CZmode)
14661 rtx clobber, set;
14663 /* To compare two non-zero values for equality, XOR them and
14664 then compare against zero. Not used for ARM mode; there
14665 CC_CZmode is cheaper. */
14666 if (mode == CC_Zmode && y != const0_rtx)
14668 gcc_assert (!reload_completed);
14669 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
14670 y = const0_rtx;
14673 /* A scratch register is required. */
14674 if (reload_completed)
14675 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
14676 else
14677 scratch = gen_rtx_SCRATCH (SImode);
14679 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
14680 set = gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y));
14681 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
14683 else
14684 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
14686 return cc_reg;
14689 /* Generate a sequence of insns that will generate the correct return
14690 address mask depending on the physical architecture that the program
14691 is running on. */
14693 arm_gen_return_addr_mask (void)
14695 rtx reg = gen_reg_rtx (Pmode);
14697 emit_insn (gen_return_addr_mask (reg));
14698 return reg;
14701 void
14702 arm_reload_in_hi (rtx *operands)
14704 rtx ref = operands[1];
14705 rtx base, scratch;
14706 HOST_WIDE_INT offset = 0;
14708 if (GET_CODE (ref) == SUBREG)
14710 offset = SUBREG_BYTE (ref);
14711 ref = SUBREG_REG (ref);
14714 if (REG_P (ref))
14716 /* We have a pseudo which has been spilt onto the stack; there
14717 are two cases here: the first where there is a simple
14718 stack-slot replacement and a second where the stack-slot is
14719 out of range, or is used as a subreg. */
14720 if (reg_equiv_mem (REGNO (ref)))
14722 ref = reg_equiv_mem (REGNO (ref));
14723 base = find_replacement (&XEXP (ref, 0));
14725 else
14726 /* The slot is out of range, or was dressed up in a SUBREG. */
14727 base = reg_equiv_address (REGNO (ref));
14729 /* PR 62554: If there is no equivalent memory location then just move
14730 the value as an SImode register move. This happens when the target
14731 architecture variant does not have an HImode register move. */
14732 if (base == NULL)
14734 gcc_assert (REG_P (operands[0]));
14735 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
14736 gen_rtx_SUBREG (SImode, ref, 0)));
14737 return;
14740 else
14741 base = find_replacement (&XEXP (ref, 0));
14743 /* Handle the case where the address is too complex to be offset by 1. */
14744 if (GET_CODE (base) == MINUS
14745 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
14747 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14749 emit_set_insn (base_plus, base);
14750 base = base_plus;
14752 else if (GET_CODE (base) == PLUS)
14754 /* The addend must be CONST_INT, or we would have dealt with it above. */
14755 HOST_WIDE_INT hi, lo;
14757 offset += INTVAL (XEXP (base, 1));
14758 base = XEXP (base, 0);
14760 /* Rework the address into a legal sequence of insns. */
14761 /* Valid range for lo is -4095 -> 4095 */
14762 lo = (offset >= 0
14763 ? (offset & 0xfff)
14764 : -((-offset) & 0xfff));
14766 /* Corner case, if lo is the max offset then we would be out of range
14767 once we have added the additional 1 below, so bump the msb into the
14768 pre-loading insn(s). */
14769 if (lo == 4095)
14770 lo &= 0x7ff;
14772 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
14773 ^ (HOST_WIDE_INT) 0x80000000)
14774 - (HOST_WIDE_INT) 0x80000000);
14776 gcc_assert (hi + lo == offset);
14778 if (hi != 0)
14780 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14782 /* Get the base address; addsi3 knows how to handle constants
14783 that require more than one insn. */
14784 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
14785 base = base_plus;
14786 offset = lo;
14790 /* Operands[2] may overlap operands[0] (though it won't overlap
14791 operands[1]), that's why we asked for a DImode reg -- so we can
14792 use the bit that does not overlap. */
14793 if (REGNO (operands[2]) == REGNO (operands[0]))
14794 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14795 else
14796 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
14798 emit_insn (gen_zero_extendqisi2 (scratch,
14799 gen_rtx_MEM (QImode,
14800 plus_constant (Pmode, base,
14801 offset))));
14802 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
14803 gen_rtx_MEM (QImode,
14804 plus_constant (Pmode, base,
14805 offset + 1))));
14806 if (!BYTES_BIG_ENDIAN)
14807 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
14808 gen_rtx_IOR (SImode,
14809 gen_rtx_ASHIFT
14810 (SImode,
14811 gen_rtx_SUBREG (SImode, operands[0], 0),
14812 GEN_INT (8)),
14813 scratch));
14814 else
14815 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
14816 gen_rtx_IOR (SImode,
14817 gen_rtx_ASHIFT (SImode, scratch,
14818 GEN_INT (8)),
14819 gen_rtx_SUBREG (SImode, operands[0], 0)));
14822 /* Handle storing a half-word to memory during reload by synthesizing as two
14823 byte stores. Take care not to clobber the input values until after we
14824 have moved them somewhere safe. This code assumes that if the DImode
14825 scratch in operands[2] overlaps either the input value or output address
14826 in some way, then that value must die in this insn (we absolutely need
14827 two scratch registers for some corner cases). */
14828 void
14829 arm_reload_out_hi (rtx *operands)
14831 rtx ref = operands[0];
14832 rtx outval = operands[1];
14833 rtx base, scratch;
14834 HOST_WIDE_INT offset = 0;
14836 if (GET_CODE (ref) == SUBREG)
14838 offset = SUBREG_BYTE (ref);
14839 ref = SUBREG_REG (ref);
14842 if (REG_P (ref))
14844 /* We have a pseudo which has been spilt onto the stack; there
14845 are two cases here: the first where there is a simple
14846 stack-slot replacement and a second where the stack-slot is
14847 out of range, or is used as a subreg. */
14848 if (reg_equiv_mem (REGNO (ref)))
14850 ref = reg_equiv_mem (REGNO (ref));
14851 base = find_replacement (&XEXP (ref, 0));
14853 else
14854 /* The slot is out of range, or was dressed up in a SUBREG. */
14855 base = reg_equiv_address (REGNO (ref));
14857 /* PR 62254: If there is no equivalent memory location then just move
14858 the value as an SImode register move. This happens when the target
14859 architecture variant does not have an HImode register move. */
14860 if (base == NULL)
14862 gcc_assert (REG_P (outval) || SUBREG_P (outval));
14864 if (REG_P (outval))
14866 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
14867 gen_rtx_SUBREG (SImode, outval, 0)));
14869 else /* SUBREG_P (outval) */
14871 if (GET_MODE (SUBREG_REG (outval)) == SImode)
14872 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
14873 SUBREG_REG (outval)));
14874 else
14875 /* FIXME: Handle other cases ? */
14876 gcc_unreachable ();
14878 return;
14881 else
14882 base = find_replacement (&XEXP (ref, 0));
14884 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
14886 /* Handle the case where the address is too complex to be offset by 1. */
14887 if (GET_CODE (base) == MINUS
14888 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
14890 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14892 /* Be careful not to destroy OUTVAL. */
14893 if (reg_overlap_mentioned_p (base_plus, outval))
14895 /* Updating base_plus might destroy outval, see if we can
14896 swap the scratch and base_plus. */
14897 if (!reg_overlap_mentioned_p (scratch, outval))
14898 std::swap (scratch, base_plus);
14899 else
14901 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
14903 /* Be conservative and copy OUTVAL into the scratch now,
14904 this should only be necessary if outval is a subreg
14905 of something larger than a word. */
14906 /* XXX Might this clobber base? I can't see how it can,
14907 since scratch is known to overlap with OUTVAL, and
14908 must be wider than a word. */
14909 emit_insn (gen_movhi (scratch_hi, outval));
14910 outval = scratch_hi;
14914 emit_set_insn (base_plus, base);
14915 base = base_plus;
14917 else if (GET_CODE (base) == PLUS)
14919 /* The addend must be CONST_INT, or we would have dealt with it above. */
14920 HOST_WIDE_INT hi, lo;
14922 offset += INTVAL (XEXP (base, 1));
14923 base = XEXP (base, 0);
14925 /* Rework the address into a legal sequence of insns. */
14926 /* Valid range for lo is -4095 -> 4095 */
14927 lo = (offset >= 0
14928 ? (offset & 0xfff)
14929 : -((-offset) & 0xfff));
14931 /* Corner case, if lo is the max offset then we would be out of range
14932 once we have added the additional 1 below, so bump the msb into the
14933 pre-loading insn(s). */
14934 if (lo == 4095)
14935 lo &= 0x7ff;
14937 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
14938 ^ (HOST_WIDE_INT) 0x80000000)
14939 - (HOST_WIDE_INT) 0x80000000);
14941 gcc_assert (hi + lo == offset);
14943 if (hi != 0)
14945 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14947 /* Be careful not to destroy OUTVAL. */
14948 if (reg_overlap_mentioned_p (base_plus, outval))
14950 /* Updating base_plus might destroy outval, see if we
14951 can swap the scratch and base_plus. */
14952 if (!reg_overlap_mentioned_p (scratch, outval))
14953 std::swap (scratch, base_plus);
14954 else
14956 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
14958 /* Be conservative and copy outval into scratch now,
14959 this should only be necessary if outval is a
14960 subreg of something larger than a word. */
14961 /* XXX Might this clobber base? I can't see how it
14962 can, since scratch is known to overlap with
14963 outval. */
14964 emit_insn (gen_movhi (scratch_hi, outval));
14965 outval = scratch_hi;
14969 /* Get the base address; addsi3 knows how to handle constants
14970 that require more than one insn. */
14971 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
14972 base = base_plus;
14973 offset = lo;
14977 if (BYTES_BIG_ENDIAN)
14979 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
14980 plus_constant (Pmode, base,
14981 offset + 1)),
14982 gen_lowpart (QImode, outval)));
14983 emit_insn (gen_lshrsi3 (scratch,
14984 gen_rtx_SUBREG (SImode, outval, 0),
14985 GEN_INT (8)));
14986 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
14987 offset)),
14988 gen_lowpart (QImode, scratch)));
14990 else
14992 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
14993 offset)),
14994 gen_lowpart (QImode, outval)));
14995 emit_insn (gen_lshrsi3 (scratch,
14996 gen_rtx_SUBREG (SImode, outval, 0),
14997 GEN_INT (8)));
14998 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
14999 plus_constant (Pmode, base,
15000 offset + 1)),
15001 gen_lowpart (QImode, scratch)));
15005 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15006 (padded to the size of a word) should be passed in a register. */
15008 static bool
15009 arm_must_pass_in_stack (machine_mode mode, const_tree type)
15011 if (TARGET_AAPCS_BASED)
15012 return must_pass_in_stack_var_size (mode, type);
15013 else
15014 return must_pass_in_stack_var_size_or_pad (mode, type);
15018 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15019 Return true if an argument passed on the stack should be padded upwards,
15020 i.e. if the least-significant byte has useful data.
15021 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15022 aggregate types are placed in the lowest memory address. */
15024 bool
15025 arm_pad_arg_upward (machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
15027 if (!TARGET_AAPCS_BASED)
15028 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
15030 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15031 return false;
15033 return true;
15037 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15038 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15039 register has useful data, and return the opposite if the most
15040 significant byte does. */
15042 bool
15043 arm_pad_reg_upward (machine_mode mode,
15044 tree type, int first ATTRIBUTE_UNUSED)
15046 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15048 /* For AAPCS, small aggregates, small fixed-point types,
15049 and small complex types are always padded upwards. */
15050 if (type)
15052 if ((AGGREGATE_TYPE_P (type)
15053 || TREE_CODE (type) == COMPLEX_TYPE
15054 || FIXED_POINT_TYPE_P (type))
15055 && int_size_in_bytes (type) <= 4)
15056 return true;
15058 else
15060 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15061 && GET_MODE_SIZE (mode) <= 4)
15062 return true;
15066 /* Otherwise, use default padding. */
15067 return !BYTES_BIG_ENDIAN;
15070 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15071 assuming that the address in the base register is word aligned. */
15072 bool
15073 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15075 HOST_WIDE_INT max_offset;
15077 /* Offset must be a multiple of 4 in Thumb mode. */
15078 if (TARGET_THUMB2 && ((offset & 3) != 0))
15079 return false;
15081 if (TARGET_THUMB2)
15082 max_offset = 1020;
15083 else if (TARGET_ARM)
15084 max_offset = 255;
15085 else
15086 return false;
15088 return ((offset <= max_offset) && (offset >= -max_offset));
15091 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15092 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15093 Assumes that the address in the base register RN is word aligned. Pattern
15094 guarantees that both memory accesses use the same base register,
15095 the offsets are constants within the range, and the gap between the offsets is 4.
15096 If preload complete then check that registers are legal. WBACK indicates whether
15097 address is updated. LOAD indicates whether memory access is load or store. */
15098 bool
15099 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15100 bool wback, bool load)
15102 unsigned int t, t2, n;
15104 if (!reload_completed)
15105 return true;
15107 if (!offset_ok_for_ldrd_strd (offset))
15108 return false;
15110 t = REGNO (rt);
15111 t2 = REGNO (rt2);
15112 n = REGNO (rn);
15114 if ((TARGET_THUMB2)
15115 && ((wback && (n == t || n == t2))
15116 || (t == SP_REGNUM)
15117 || (t == PC_REGNUM)
15118 || (t2 == SP_REGNUM)
15119 || (t2 == PC_REGNUM)
15120 || (!load && (n == PC_REGNUM))
15121 || (load && (t == t2))
15122 /* Triggers Cortex-M3 LDRD errata. */
15123 || (!wback && load && fix_cm3_ldrd && (n == t))))
15124 return false;
15126 if ((TARGET_ARM)
15127 && ((wback && (n == t || n == t2))
15128 || (t2 == PC_REGNUM)
15129 || (t % 2 != 0) /* First destination register is not even. */
15130 || (t2 != t + 1)
15131 /* PC can be used as base register (for offset addressing only),
15132 but it is depricated. */
15133 || (n == PC_REGNUM)))
15134 return false;
15136 return true;
15139 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15140 operand MEM's address contains an immediate offset from the base
15141 register and has no side effects, in which case it sets BASE and
15142 OFFSET accordingly. */
15143 static bool
15144 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset)
15146 rtx addr;
15148 gcc_assert (base != NULL && offset != NULL);
15150 /* TODO: Handle more general memory operand patterns, such as
15151 PRE_DEC and PRE_INC. */
15153 if (side_effects_p (mem))
15154 return false;
15156 /* Can't deal with subregs. */
15157 if (GET_CODE (mem) == SUBREG)
15158 return false;
15160 gcc_assert (MEM_P (mem));
15162 *offset = const0_rtx;
15164 addr = XEXP (mem, 0);
15166 /* If addr isn't valid for DImode, then we can't handle it. */
15167 if (!arm_legitimate_address_p (DImode, addr,
15168 reload_in_progress || reload_completed))
15169 return false;
15171 if (REG_P (addr))
15173 *base = addr;
15174 return true;
15176 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15178 *base = XEXP (addr, 0);
15179 *offset = XEXP (addr, 1);
15180 return (REG_P (*base) && CONST_INT_P (*offset));
15183 return false;
15186 /* Called from a peephole2 to replace two word-size accesses with a
15187 single LDRD/STRD instruction. Returns true iff we can generate a
15188 new instruction sequence. That is, both accesses use the same base
15189 register and the gap between constant offsets is 4. This function
15190 may reorder its operands to match ldrd/strd RTL templates.
15191 OPERANDS are the operands found by the peephole matcher;
15192 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15193 corresponding memory operands. LOAD indicaates whether the access
15194 is load or store. CONST_STORE indicates a store of constant
15195 integer values held in OPERANDS[4,5] and assumes that the pattern
15196 is of length 4 insn, for the purpose of checking dead registers.
15197 COMMUTE indicates that register operands may be reordered. */
15198 bool
15199 gen_operands_ldrd_strd (rtx *operands, bool load,
15200 bool const_store, bool commute)
15202 int nops = 2;
15203 HOST_WIDE_INT offsets[2], offset;
15204 rtx base = NULL_RTX;
15205 rtx cur_base, cur_offset, tmp;
15206 int i, gap;
15207 HARD_REG_SET regset;
15209 gcc_assert (!const_store || !load);
15210 /* Check that the memory references are immediate offsets from the
15211 same base register. Extract the base register, the destination
15212 registers, and the corresponding memory offsets. */
15213 for (i = 0; i < nops; i++)
15215 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
15216 return false;
15218 if (i == 0)
15219 base = cur_base;
15220 else if (REGNO (base) != REGNO (cur_base))
15221 return false;
15223 offsets[i] = INTVAL (cur_offset);
15224 if (GET_CODE (operands[i]) == SUBREG)
15226 tmp = SUBREG_REG (operands[i]);
15227 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15228 operands[i] = tmp;
15232 /* Make sure there is no dependency between the individual loads. */
15233 if (load && REGNO (operands[0]) == REGNO (base))
15234 return false; /* RAW */
15236 if (load && REGNO (operands[0]) == REGNO (operands[1]))
15237 return false; /* WAW */
15239 /* If the same input register is used in both stores
15240 when storing different constants, try to find a free register.
15241 For example, the code
15242 mov r0, 0
15243 str r0, [r2]
15244 mov r0, 1
15245 str r0, [r2, #4]
15246 can be transformed into
15247 mov r1, 0
15248 mov r0, 1
15249 strd r1, r0, [r2]
15250 in Thumb mode assuming that r1 is free.
15251 For ARM mode do the same but only if the starting register
15252 can be made to be even. */
15253 if (const_store
15254 && REGNO (operands[0]) == REGNO (operands[1])
15255 && INTVAL (operands[4]) != INTVAL (operands[5]))
15257 if (TARGET_THUMB2)
15259 CLEAR_HARD_REG_SET (regset);
15260 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15261 if (tmp == NULL_RTX)
15262 return false;
15264 /* Use the new register in the first load to ensure that
15265 if the original input register is not dead after peephole,
15266 then it will have the correct constant value. */
15267 operands[0] = tmp;
15269 else if (TARGET_ARM)
15271 int regno = REGNO (operands[0]);
15272 if (!peep2_reg_dead_p (4, operands[0]))
15274 /* When the input register is even and is not dead after the
15275 pattern, it has to hold the second constant but we cannot
15276 form a legal STRD in ARM mode with this register as the second
15277 register. */
15278 if (regno % 2 == 0)
15279 return false;
15281 /* Is regno-1 free? */
15282 SET_HARD_REG_SET (regset);
15283 CLEAR_HARD_REG_BIT(regset, regno - 1);
15284 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15285 if (tmp == NULL_RTX)
15286 return false;
15288 operands[0] = tmp;
15290 else
15292 /* Find a DImode register. */
15293 CLEAR_HARD_REG_SET (regset);
15294 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15295 if (tmp != NULL_RTX)
15297 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15298 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15300 else
15302 /* Can we use the input register to form a DI register? */
15303 SET_HARD_REG_SET (regset);
15304 CLEAR_HARD_REG_BIT(regset,
15305 regno % 2 == 0 ? regno + 1 : regno - 1);
15306 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15307 if (tmp == NULL_RTX)
15308 return false;
15309 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15313 gcc_assert (operands[0] != NULL_RTX);
15314 gcc_assert (operands[1] != NULL_RTX);
15315 gcc_assert (REGNO (operands[0]) % 2 == 0);
15316 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15320 /* Make sure the instructions are ordered with lower memory access first. */
15321 if (offsets[0] > offsets[1])
15323 gap = offsets[0] - offsets[1];
15324 offset = offsets[1];
15326 /* Swap the instructions such that lower memory is accessed first. */
15327 std::swap (operands[0], operands[1]);
15328 std::swap (operands[2], operands[3]);
15329 if (const_store)
15330 std::swap (operands[4], operands[5]);
15332 else
15334 gap = offsets[1] - offsets[0];
15335 offset = offsets[0];
15338 /* Make sure accesses are to consecutive memory locations. */
15339 if (gap != 4)
15340 return false;
15342 /* Make sure we generate legal instructions. */
15343 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15344 false, load))
15345 return true;
15347 /* In Thumb state, where registers are almost unconstrained, there
15348 is little hope to fix it. */
15349 if (TARGET_THUMB2)
15350 return false;
15352 if (load && commute)
15354 /* Try reordering registers. */
15355 std::swap (operands[0], operands[1]);
15356 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15357 false, load))
15358 return true;
15361 if (const_store)
15363 /* If input registers are dead after this pattern, they can be
15364 reordered or replaced by other registers that are free in the
15365 current pattern. */
15366 if (!peep2_reg_dead_p (4, operands[0])
15367 || !peep2_reg_dead_p (4, operands[1]))
15368 return false;
15370 /* Try to reorder the input registers. */
15371 /* For example, the code
15372 mov r0, 0
15373 mov r1, 1
15374 str r1, [r2]
15375 str r0, [r2, #4]
15376 can be transformed into
15377 mov r1, 0
15378 mov r0, 1
15379 strd r0, [r2]
15381 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15382 false, false))
15384 std::swap (operands[0], operands[1]);
15385 return true;
15388 /* Try to find a free DI register. */
15389 CLEAR_HARD_REG_SET (regset);
15390 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15391 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15392 while (true)
15394 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15395 if (tmp == NULL_RTX)
15396 return false;
15398 /* DREG must be an even-numbered register in DImode.
15399 Split it into SI registers. */
15400 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15401 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15402 gcc_assert (operands[0] != NULL_RTX);
15403 gcc_assert (operands[1] != NULL_RTX);
15404 gcc_assert (REGNO (operands[0]) % 2 == 0);
15405 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15407 return (operands_ok_ldrd_strd (operands[0], operands[1],
15408 base, offset,
15409 false, load));
15413 return false;
15419 /* Print a symbolic form of X to the debug file, F. */
15420 static void
15421 arm_print_value (FILE *f, rtx x)
15423 switch (GET_CODE (x))
15425 case CONST_INT:
15426 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15427 return;
15429 case CONST_DOUBLE:
15430 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15431 return;
15433 case CONST_VECTOR:
15435 int i;
15437 fprintf (f, "<");
15438 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15440 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15441 if (i < (CONST_VECTOR_NUNITS (x) - 1))
15442 fputc (',', f);
15444 fprintf (f, ">");
15446 return;
15448 case CONST_STRING:
15449 fprintf (f, "\"%s\"", XSTR (x, 0));
15450 return;
15452 case SYMBOL_REF:
15453 fprintf (f, "`%s'", XSTR (x, 0));
15454 return;
15456 case LABEL_REF:
15457 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
15458 return;
15460 case CONST:
15461 arm_print_value (f, XEXP (x, 0));
15462 return;
15464 case PLUS:
15465 arm_print_value (f, XEXP (x, 0));
15466 fprintf (f, "+");
15467 arm_print_value (f, XEXP (x, 1));
15468 return;
15470 case PC:
15471 fprintf (f, "pc");
15472 return;
15474 default:
15475 fprintf (f, "????");
15476 return;
15480 /* Routines for manipulation of the constant pool. */
15482 /* Arm instructions cannot load a large constant directly into a
15483 register; they have to come from a pc relative load. The constant
15484 must therefore be placed in the addressable range of the pc
15485 relative load. Depending on the precise pc relative load
15486 instruction the range is somewhere between 256 bytes and 4k. This
15487 means that we often have to dump a constant inside a function, and
15488 generate code to branch around it.
15490 It is important to minimize this, since the branches will slow
15491 things down and make the code larger.
15493 Normally we can hide the table after an existing unconditional
15494 branch so that there is no interruption of the flow, but in the
15495 worst case the code looks like this:
15497 ldr rn, L1
15499 b L2
15500 align
15501 L1: .long value
15505 ldr rn, L3
15507 b L4
15508 align
15509 L3: .long value
15513 We fix this by performing a scan after scheduling, which notices
15514 which instructions need to have their operands fetched from the
15515 constant table and builds the table.
15517 The algorithm starts by building a table of all the constants that
15518 need fixing up and all the natural barriers in the function (places
15519 where a constant table can be dropped without breaking the flow).
15520 For each fixup we note how far the pc-relative replacement will be
15521 able to reach and the offset of the instruction into the function.
15523 Having built the table we then group the fixes together to form
15524 tables that are as large as possible (subject to addressing
15525 constraints) and emit each table of constants after the last
15526 barrier that is within range of all the instructions in the group.
15527 If a group does not contain a barrier, then we forcibly create one
15528 by inserting a jump instruction into the flow. Once the table has
15529 been inserted, the insns are then modified to reference the
15530 relevant entry in the pool.
15532 Possible enhancements to the algorithm (not implemented) are:
15534 1) For some processors and object formats, there may be benefit in
15535 aligning the pools to the start of cache lines; this alignment
15536 would need to be taken into account when calculating addressability
15537 of a pool. */
15539 /* These typedefs are located at the start of this file, so that
15540 they can be used in the prototypes there. This comment is to
15541 remind readers of that fact so that the following structures
15542 can be understood more easily.
15544 typedef struct minipool_node Mnode;
15545 typedef struct minipool_fixup Mfix; */
15547 struct minipool_node
15549 /* Doubly linked chain of entries. */
15550 Mnode * next;
15551 Mnode * prev;
15552 /* The maximum offset into the code that this entry can be placed. While
15553 pushing fixes for forward references, all entries are sorted in order
15554 of increasing max_address. */
15555 HOST_WIDE_INT max_address;
15556 /* Similarly for an entry inserted for a backwards ref. */
15557 HOST_WIDE_INT min_address;
15558 /* The number of fixes referencing this entry. This can become zero
15559 if we "unpush" an entry. In this case we ignore the entry when we
15560 come to emit the code. */
15561 int refcount;
15562 /* The offset from the start of the minipool. */
15563 HOST_WIDE_INT offset;
15564 /* The value in table. */
15565 rtx value;
15566 /* The mode of value. */
15567 machine_mode mode;
15568 /* The size of the value. With iWMMXt enabled
15569 sizes > 4 also imply an alignment of 8-bytes. */
15570 int fix_size;
15573 struct minipool_fixup
15575 Mfix * next;
15576 rtx_insn * insn;
15577 HOST_WIDE_INT address;
15578 rtx * loc;
15579 machine_mode mode;
15580 int fix_size;
15581 rtx value;
15582 Mnode * minipool;
15583 HOST_WIDE_INT forwards;
15584 HOST_WIDE_INT backwards;
15587 /* Fixes less than a word need padding out to a word boundary. */
15588 #define MINIPOOL_FIX_SIZE(mode) \
15589 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15591 static Mnode * minipool_vector_head;
15592 static Mnode * minipool_vector_tail;
15593 static rtx_code_label *minipool_vector_label;
15594 static int minipool_pad;
15596 /* The linked list of all minipool fixes required for this function. */
15597 Mfix * minipool_fix_head;
15598 Mfix * minipool_fix_tail;
15599 /* The fix entry for the current minipool, once it has been placed. */
15600 Mfix * minipool_barrier;
15602 #ifndef JUMP_TABLES_IN_TEXT_SECTION
15603 #define JUMP_TABLES_IN_TEXT_SECTION 0
15604 #endif
15606 static HOST_WIDE_INT
15607 get_jump_table_size (rtx_jump_table_data *insn)
15609 /* ADDR_VECs only take room if read-only data does into the text
15610 section. */
15611 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
15613 rtx body = PATTERN (insn);
15614 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
15615 HOST_WIDE_INT size;
15616 HOST_WIDE_INT modesize;
15618 modesize = GET_MODE_SIZE (GET_MODE (body));
15619 size = modesize * XVECLEN (body, elt);
15620 switch (modesize)
15622 case 1:
15623 /* Round up size of TBB table to a halfword boundary. */
15624 size = (size + 1) & ~HOST_WIDE_INT_1;
15625 break;
15626 case 2:
15627 /* No padding necessary for TBH. */
15628 break;
15629 case 4:
15630 /* Add two bytes for alignment on Thumb. */
15631 if (TARGET_THUMB)
15632 size += 2;
15633 break;
15634 default:
15635 gcc_unreachable ();
15637 return size;
15640 return 0;
15643 /* Return the maximum amount of padding that will be inserted before
15644 label LABEL. */
15646 static HOST_WIDE_INT
15647 get_label_padding (rtx label)
15649 HOST_WIDE_INT align, min_insn_size;
15651 align = 1 << label_to_alignment (label);
15652 min_insn_size = TARGET_THUMB ? 2 : 4;
15653 return align > min_insn_size ? align - min_insn_size : 0;
15656 /* Move a minipool fix MP from its current location to before MAX_MP.
15657 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
15658 constraints may need updating. */
15659 static Mnode *
15660 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
15661 HOST_WIDE_INT max_address)
15663 /* The code below assumes these are different. */
15664 gcc_assert (mp != max_mp);
15666 if (max_mp == NULL)
15668 if (max_address < mp->max_address)
15669 mp->max_address = max_address;
15671 else
15673 if (max_address > max_mp->max_address - mp->fix_size)
15674 mp->max_address = max_mp->max_address - mp->fix_size;
15675 else
15676 mp->max_address = max_address;
15678 /* Unlink MP from its current position. Since max_mp is non-null,
15679 mp->prev must be non-null. */
15680 mp->prev->next = mp->next;
15681 if (mp->next != NULL)
15682 mp->next->prev = mp->prev;
15683 else
15684 minipool_vector_tail = mp->prev;
15686 /* Re-insert it before MAX_MP. */
15687 mp->next = max_mp;
15688 mp->prev = max_mp->prev;
15689 max_mp->prev = mp;
15691 if (mp->prev != NULL)
15692 mp->prev->next = mp;
15693 else
15694 minipool_vector_head = mp;
15697 /* Save the new entry. */
15698 max_mp = mp;
15700 /* Scan over the preceding entries and adjust their addresses as
15701 required. */
15702 while (mp->prev != NULL
15703 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
15705 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
15706 mp = mp->prev;
15709 return max_mp;
15712 /* Add a constant to the minipool for a forward reference. Returns the
15713 node added or NULL if the constant will not fit in this pool. */
15714 static Mnode *
15715 add_minipool_forward_ref (Mfix *fix)
15717 /* If set, max_mp is the first pool_entry that has a lower
15718 constraint than the one we are trying to add. */
15719 Mnode * max_mp = NULL;
15720 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
15721 Mnode * mp;
15723 /* If the minipool starts before the end of FIX->INSN then this FIX
15724 can not be placed into the current pool. Furthermore, adding the
15725 new constant pool entry may cause the pool to start FIX_SIZE bytes
15726 earlier. */
15727 if (minipool_vector_head &&
15728 (fix->address + get_attr_length (fix->insn)
15729 >= minipool_vector_head->max_address - fix->fix_size))
15730 return NULL;
15732 /* Scan the pool to see if a constant with the same value has
15733 already been added. While we are doing this, also note the
15734 location where we must insert the constant if it doesn't already
15735 exist. */
15736 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
15738 if (GET_CODE (fix->value) == GET_CODE (mp->value)
15739 && fix->mode == mp->mode
15740 && (!LABEL_P (fix->value)
15741 || (CODE_LABEL_NUMBER (fix->value)
15742 == CODE_LABEL_NUMBER (mp->value)))
15743 && rtx_equal_p (fix->value, mp->value))
15745 /* More than one fix references this entry. */
15746 mp->refcount++;
15747 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
15750 /* Note the insertion point if necessary. */
15751 if (max_mp == NULL
15752 && mp->max_address > max_address)
15753 max_mp = mp;
15755 /* If we are inserting an 8-bytes aligned quantity and
15756 we have not already found an insertion point, then
15757 make sure that all such 8-byte aligned quantities are
15758 placed at the start of the pool. */
15759 if (ARM_DOUBLEWORD_ALIGN
15760 && max_mp == NULL
15761 && fix->fix_size >= 8
15762 && mp->fix_size < 8)
15764 max_mp = mp;
15765 max_address = mp->max_address;
15769 /* The value is not currently in the minipool, so we need to create
15770 a new entry for it. If MAX_MP is NULL, the entry will be put on
15771 the end of the list since the placement is less constrained than
15772 any existing entry. Otherwise, we insert the new fix before
15773 MAX_MP and, if necessary, adjust the constraints on the other
15774 entries. */
15775 mp = XNEW (Mnode);
15776 mp->fix_size = fix->fix_size;
15777 mp->mode = fix->mode;
15778 mp->value = fix->value;
15779 mp->refcount = 1;
15780 /* Not yet required for a backwards ref. */
15781 mp->min_address = -65536;
15783 if (max_mp == NULL)
15785 mp->max_address = max_address;
15786 mp->next = NULL;
15787 mp->prev = minipool_vector_tail;
15789 if (mp->prev == NULL)
15791 minipool_vector_head = mp;
15792 minipool_vector_label = gen_label_rtx ();
15794 else
15795 mp->prev->next = mp;
15797 minipool_vector_tail = mp;
15799 else
15801 if (max_address > max_mp->max_address - mp->fix_size)
15802 mp->max_address = max_mp->max_address - mp->fix_size;
15803 else
15804 mp->max_address = max_address;
15806 mp->next = max_mp;
15807 mp->prev = max_mp->prev;
15808 max_mp->prev = mp;
15809 if (mp->prev != NULL)
15810 mp->prev->next = mp;
15811 else
15812 minipool_vector_head = mp;
15815 /* Save the new entry. */
15816 max_mp = mp;
15818 /* Scan over the preceding entries and adjust their addresses as
15819 required. */
15820 while (mp->prev != NULL
15821 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
15823 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
15824 mp = mp->prev;
15827 return max_mp;
15830 static Mnode *
15831 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
15832 HOST_WIDE_INT min_address)
15834 HOST_WIDE_INT offset;
15836 /* The code below assumes these are different. */
15837 gcc_assert (mp != min_mp);
15839 if (min_mp == NULL)
15841 if (min_address > mp->min_address)
15842 mp->min_address = min_address;
15844 else
15846 /* We will adjust this below if it is too loose. */
15847 mp->min_address = min_address;
15849 /* Unlink MP from its current position. Since min_mp is non-null,
15850 mp->next must be non-null. */
15851 mp->next->prev = mp->prev;
15852 if (mp->prev != NULL)
15853 mp->prev->next = mp->next;
15854 else
15855 minipool_vector_head = mp->next;
15857 /* Reinsert it after MIN_MP. */
15858 mp->prev = min_mp;
15859 mp->next = min_mp->next;
15860 min_mp->next = mp;
15861 if (mp->next != NULL)
15862 mp->next->prev = mp;
15863 else
15864 minipool_vector_tail = mp;
15867 min_mp = mp;
15869 offset = 0;
15870 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
15872 mp->offset = offset;
15873 if (mp->refcount > 0)
15874 offset += mp->fix_size;
15876 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
15877 mp->next->min_address = mp->min_address + mp->fix_size;
15880 return min_mp;
15883 /* Add a constant to the minipool for a backward reference. Returns the
15884 node added or NULL if the constant will not fit in this pool.
15886 Note that the code for insertion for a backwards reference can be
15887 somewhat confusing because the calculated offsets for each fix do
15888 not take into account the size of the pool (which is still under
15889 construction. */
15890 static Mnode *
15891 add_minipool_backward_ref (Mfix *fix)
15893 /* If set, min_mp is the last pool_entry that has a lower constraint
15894 than the one we are trying to add. */
15895 Mnode *min_mp = NULL;
15896 /* This can be negative, since it is only a constraint. */
15897 HOST_WIDE_INT min_address = fix->address - fix->backwards;
15898 Mnode *mp;
15900 /* If we can't reach the current pool from this insn, or if we can't
15901 insert this entry at the end of the pool without pushing other
15902 fixes out of range, then we don't try. This ensures that we
15903 can't fail later on. */
15904 if (min_address >= minipool_barrier->address
15905 || (minipool_vector_tail->min_address + fix->fix_size
15906 >= minipool_barrier->address))
15907 return NULL;
15909 /* Scan the pool to see if a constant with the same value has
15910 already been added. While we are doing this, also note the
15911 location where we must insert the constant if it doesn't already
15912 exist. */
15913 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
15915 if (GET_CODE (fix->value) == GET_CODE (mp->value)
15916 && fix->mode == mp->mode
15917 && (!LABEL_P (fix->value)
15918 || (CODE_LABEL_NUMBER (fix->value)
15919 == CODE_LABEL_NUMBER (mp->value)))
15920 && rtx_equal_p (fix->value, mp->value)
15921 /* Check that there is enough slack to move this entry to the
15922 end of the table (this is conservative). */
15923 && (mp->max_address
15924 > (minipool_barrier->address
15925 + minipool_vector_tail->offset
15926 + minipool_vector_tail->fix_size)))
15928 mp->refcount++;
15929 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
15932 if (min_mp != NULL)
15933 mp->min_address += fix->fix_size;
15934 else
15936 /* Note the insertion point if necessary. */
15937 if (mp->min_address < min_address)
15939 /* For now, we do not allow the insertion of 8-byte alignment
15940 requiring nodes anywhere but at the start of the pool. */
15941 if (ARM_DOUBLEWORD_ALIGN
15942 && fix->fix_size >= 8 && mp->fix_size < 8)
15943 return NULL;
15944 else
15945 min_mp = mp;
15947 else if (mp->max_address
15948 < minipool_barrier->address + mp->offset + fix->fix_size)
15950 /* Inserting before this entry would push the fix beyond
15951 its maximum address (which can happen if we have
15952 re-located a forwards fix); force the new fix to come
15953 after it. */
15954 if (ARM_DOUBLEWORD_ALIGN
15955 && fix->fix_size >= 8 && mp->fix_size < 8)
15956 return NULL;
15957 else
15959 min_mp = mp;
15960 min_address = mp->min_address + fix->fix_size;
15963 /* Do not insert a non-8-byte aligned quantity before 8-byte
15964 aligned quantities. */
15965 else if (ARM_DOUBLEWORD_ALIGN
15966 && fix->fix_size < 8
15967 && mp->fix_size >= 8)
15969 min_mp = mp;
15970 min_address = mp->min_address + fix->fix_size;
15975 /* We need to create a new entry. */
15976 mp = XNEW (Mnode);
15977 mp->fix_size = fix->fix_size;
15978 mp->mode = fix->mode;
15979 mp->value = fix->value;
15980 mp->refcount = 1;
15981 mp->max_address = minipool_barrier->address + 65536;
15983 mp->min_address = min_address;
15985 if (min_mp == NULL)
15987 mp->prev = NULL;
15988 mp->next = minipool_vector_head;
15990 if (mp->next == NULL)
15992 minipool_vector_tail = mp;
15993 minipool_vector_label = gen_label_rtx ();
15995 else
15996 mp->next->prev = mp;
15998 minipool_vector_head = mp;
16000 else
16002 mp->next = min_mp->next;
16003 mp->prev = min_mp;
16004 min_mp->next = mp;
16006 if (mp->next != NULL)
16007 mp->next->prev = mp;
16008 else
16009 minipool_vector_tail = mp;
16012 /* Save the new entry. */
16013 min_mp = mp;
16015 if (mp->prev)
16016 mp = mp->prev;
16017 else
16018 mp->offset = 0;
16020 /* Scan over the following entries and adjust their offsets. */
16021 while (mp->next != NULL)
16023 if (mp->next->min_address < mp->min_address + mp->fix_size)
16024 mp->next->min_address = mp->min_address + mp->fix_size;
16026 if (mp->refcount)
16027 mp->next->offset = mp->offset + mp->fix_size;
16028 else
16029 mp->next->offset = mp->offset;
16031 mp = mp->next;
16034 return min_mp;
16037 static void
16038 assign_minipool_offsets (Mfix *barrier)
16040 HOST_WIDE_INT offset = 0;
16041 Mnode *mp;
16043 minipool_barrier = barrier;
16045 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16047 mp->offset = offset;
16049 if (mp->refcount > 0)
16050 offset += mp->fix_size;
16054 /* Output the literal table */
16055 static void
16056 dump_minipool (rtx_insn *scan)
16058 Mnode * mp;
16059 Mnode * nmp;
16060 int align64 = 0;
16062 if (ARM_DOUBLEWORD_ALIGN)
16063 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16064 if (mp->refcount > 0 && mp->fix_size >= 8)
16066 align64 = 1;
16067 break;
16070 if (dump_file)
16071 fprintf (dump_file,
16072 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16073 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16075 scan = emit_label_after (gen_label_rtx (), scan);
16076 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16077 scan = emit_label_after (minipool_vector_label, scan);
16079 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16081 if (mp->refcount > 0)
16083 if (dump_file)
16085 fprintf (dump_file,
16086 ";; Offset %u, min %ld, max %ld ",
16087 (unsigned) mp->offset, (unsigned long) mp->min_address,
16088 (unsigned long) mp->max_address);
16089 arm_print_value (dump_file, mp->value);
16090 fputc ('\n', dump_file);
16093 rtx val = copy_rtx (mp->value);
16095 switch (GET_MODE_SIZE (mp->mode))
16097 #ifdef HAVE_consttable_1
16098 case 1:
16099 scan = emit_insn_after (gen_consttable_1 (val), scan);
16100 break;
16102 #endif
16103 #ifdef HAVE_consttable_2
16104 case 2:
16105 scan = emit_insn_after (gen_consttable_2 (val), scan);
16106 break;
16108 #endif
16109 #ifdef HAVE_consttable_4
16110 case 4:
16111 scan = emit_insn_after (gen_consttable_4 (val), scan);
16112 break;
16114 #endif
16115 #ifdef HAVE_consttable_8
16116 case 8:
16117 scan = emit_insn_after (gen_consttable_8 (val), scan);
16118 break;
16120 #endif
16121 #ifdef HAVE_consttable_16
16122 case 16:
16123 scan = emit_insn_after (gen_consttable_16 (val), scan);
16124 break;
16126 #endif
16127 default:
16128 gcc_unreachable ();
16132 nmp = mp->next;
16133 free (mp);
16136 minipool_vector_head = minipool_vector_tail = NULL;
16137 scan = emit_insn_after (gen_consttable_end (), scan);
16138 scan = emit_barrier_after (scan);
16141 /* Return the cost of forcibly inserting a barrier after INSN. */
16142 static int
16143 arm_barrier_cost (rtx_insn *insn)
16145 /* Basing the location of the pool on the loop depth is preferable,
16146 but at the moment, the basic block information seems to be
16147 corrupt by this stage of the compilation. */
16148 int base_cost = 50;
16149 rtx_insn *next = next_nonnote_insn (insn);
16151 if (next != NULL && LABEL_P (next))
16152 base_cost -= 20;
16154 switch (GET_CODE (insn))
16156 case CODE_LABEL:
16157 /* It will always be better to place the table before the label, rather
16158 than after it. */
16159 return 50;
16161 case INSN:
16162 case CALL_INSN:
16163 return base_cost;
16165 case JUMP_INSN:
16166 return base_cost - 10;
16168 default:
16169 return base_cost + 10;
16173 /* Find the best place in the insn stream in the range
16174 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16175 Create the barrier by inserting a jump and add a new fix entry for
16176 it. */
16177 static Mfix *
16178 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16180 HOST_WIDE_INT count = 0;
16181 rtx_barrier *barrier;
16182 rtx_insn *from = fix->insn;
16183 /* The instruction after which we will insert the jump. */
16184 rtx_insn *selected = NULL;
16185 int selected_cost;
16186 /* The address at which the jump instruction will be placed. */
16187 HOST_WIDE_INT selected_address;
16188 Mfix * new_fix;
16189 HOST_WIDE_INT max_count = max_address - fix->address;
16190 rtx_code_label *label = gen_label_rtx ();
16192 selected_cost = arm_barrier_cost (from);
16193 selected_address = fix->address;
16195 while (from && count < max_count)
16197 rtx_jump_table_data *tmp;
16198 int new_cost;
16200 /* This code shouldn't have been called if there was a natural barrier
16201 within range. */
16202 gcc_assert (!BARRIER_P (from));
16204 /* Count the length of this insn. This must stay in sync with the
16205 code that pushes minipool fixes. */
16206 if (LABEL_P (from))
16207 count += get_label_padding (from);
16208 else
16209 count += get_attr_length (from);
16211 /* If there is a jump table, add its length. */
16212 if (tablejump_p (from, NULL, &tmp))
16214 count += get_jump_table_size (tmp);
16216 /* Jump tables aren't in a basic block, so base the cost on
16217 the dispatch insn. If we select this location, we will
16218 still put the pool after the table. */
16219 new_cost = arm_barrier_cost (from);
16221 if (count < max_count
16222 && (!selected || new_cost <= selected_cost))
16224 selected = tmp;
16225 selected_cost = new_cost;
16226 selected_address = fix->address + count;
16229 /* Continue after the dispatch table. */
16230 from = NEXT_INSN (tmp);
16231 continue;
16234 new_cost = arm_barrier_cost (from);
16236 if (count < max_count
16237 && (!selected || new_cost <= selected_cost))
16239 selected = from;
16240 selected_cost = new_cost;
16241 selected_address = fix->address + count;
16244 from = NEXT_INSN (from);
16247 /* Make sure that we found a place to insert the jump. */
16248 gcc_assert (selected);
16250 /* Make sure we do not split a call and its corresponding
16251 CALL_ARG_LOCATION note. */
16252 if (CALL_P (selected))
16254 rtx_insn *next = NEXT_INSN (selected);
16255 if (next && NOTE_P (next)
16256 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16257 selected = next;
16260 /* Create a new JUMP_INSN that branches around a barrier. */
16261 from = emit_jump_insn_after (gen_jump (label), selected);
16262 JUMP_LABEL (from) = label;
16263 barrier = emit_barrier_after (from);
16264 emit_label_after (label, barrier);
16266 /* Create a minipool barrier entry for the new barrier. */
16267 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16268 new_fix->insn = barrier;
16269 new_fix->address = selected_address;
16270 new_fix->next = fix->next;
16271 fix->next = new_fix;
16273 return new_fix;
16276 /* Record that there is a natural barrier in the insn stream at
16277 ADDRESS. */
16278 static void
16279 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
16281 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16283 fix->insn = insn;
16284 fix->address = address;
16286 fix->next = NULL;
16287 if (minipool_fix_head != NULL)
16288 minipool_fix_tail->next = fix;
16289 else
16290 minipool_fix_head = fix;
16292 minipool_fix_tail = fix;
16295 /* Record INSN, which will need fixing up to load a value from the
16296 minipool. ADDRESS is the offset of the insn since the start of the
16297 function; LOC is a pointer to the part of the insn which requires
16298 fixing; VALUE is the constant that must be loaded, which is of type
16299 MODE. */
16300 static void
16301 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
16302 machine_mode mode, rtx value)
16304 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16306 fix->insn = insn;
16307 fix->address = address;
16308 fix->loc = loc;
16309 fix->mode = mode;
16310 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16311 fix->value = value;
16312 fix->forwards = get_attr_pool_range (insn);
16313 fix->backwards = get_attr_neg_pool_range (insn);
16314 fix->minipool = NULL;
16316 /* If an insn doesn't have a range defined for it, then it isn't
16317 expecting to be reworked by this code. Better to stop now than
16318 to generate duff assembly code. */
16319 gcc_assert (fix->forwards || fix->backwards);
16321 /* If an entry requires 8-byte alignment then assume all constant pools
16322 require 4 bytes of padding. Trying to do this later on a per-pool
16323 basis is awkward because existing pool entries have to be modified. */
16324 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16325 minipool_pad = 4;
16327 if (dump_file)
16329 fprintf (dump_file,
16330 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16331 GET_MODE_NAME (mode),
16332 INSN_UID (insn), (unsigned long) address,
16333 -1 * (long)fix->backwards, (long)fix->forwards);
16334 arm_print_value (dump_file, fix->value);
16335 fprintf (dump_file, "\n");
16338 /* Add it to the chain of fixes. */
16339 fix->next = NULL;
16341 if (minipool_fix_head != NULL)
16342 minipool_fix_tail->next = fix;
16343 else
16344 minipool_fix_head = fix;
16346 minipool_fix_tail = fix;
16349 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16350 Returns the number of insns needed, or 99 if we always want to synthesize
16351 the value. */
16353 arm_max_const_double_inline_cost ()
16355 /* Let the value get synthesized to avoid the use of literal pools. */
16356 if (arm_disable_literal_pool)
16357 return 99;
16359 return ((optimize_size || arm_ld_sched) ? 3 : 4);
16362 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16363 Returns the number of insns needed, or 99 if we don't know how to
16364 do it. */
16366 arm_const_double_inline_cost (rtx val)
16368 rtx lowpart, highpart;
16369 machine_mode mode;
16371 mode = GET_MODE (val);
16373 if (mode == VOIDmode)
16374 mode = DImode;
16376 gcc_assert (GET_MODE_SIZE (mode) == 8);
16378 lowpart = gen_lowpart (SImode, val);
16379 highpart = gen_highpart_mode (SImode, mode, val);
16381 gcc_assert (CONST_INT_P (lowpart));
16382 gcc_assert (CONST_INT_P (highpart));
16384 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16385 NULL_RTX, NULL_RTX, 0, 0)
16386 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16387 NULL_RTX, NULL_RTX, 0, 0));
16390 /* Cost of loading a SImode constant. */
16391 static inline int
16392 arm_const_inline_cost (enum rtx_code code, rtx val)
16394 return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
16395 NULL_RTX, NULL_RTX, 1, 0);
16398 /* Return true if it is worthwhile to split a 64-bit constant into two
16399 32-bit operations. This is the case if optimizing for size, or
16400 if we have load delay slots, or if one 32-bit part can be done with
16401 a single data operation. */
16402 bool
16403 arm_const_double_by_parts (rtx val)
16405 machine_mode mode = GET_MODE (val);
16406 rtx part;
16408 if (optimize_size || arm_ld_sched)
16409 return true;
16411 if (mode == VOIDmode)
16412 mode = DImode;
16414 part = gen_highpart_mode (SImode, mode, val);
16416 gcc_assert (CONST_INT_P (part));
16418 if (const_ok_for_arm (INTVAL (part))
16419 || const_ok_for_arm (~INTVAL (part)))
16420 return true;
16422 part = gen_lowpart (SImode, val);
16424 gcc_assert (CONST_INT_P (part));
16426 if (const_ok_for_arm (INTVAL (part))
16427 || const_ok_for_arm (~INTVAL (part)))
16428 return true;
16430 return false;
16433 /* Return true if it is possible to inline both the high and low parts
16434 of a 64-bit constant into 32-bit data processing instructions. */
16435 bool
16436 arm_const_double_by_immediates (rtx val)
16438 machine_mode mode = GET_MODE (val);
16439 rtx part;
16441 if (mode == VOIDmode)
16442 mode = DImode;
16444 part = gen_highpart_mode (SImode, mode, val);
16446 gcc_assert (CONST_INT_P (part));
16448 if (!const_ok_for_arm (INTVAL (part)))
16449 return false;
16451 part = gen_lowpart (SImode, val);
16453 gcc_assert (CONST_INT_P (part));
16455 if (!const_ok_for_arm (INTVAL (part)))
16456 return false;
16458 return true;
16461 /* Scan INSN and note any of its operands that need fixing.
16462 If DO_PUSHES is false we do not actually push any of the fixups
16463 needed. */
16464 static void
16465 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
16467 int opno;
16469 extract_constrain_insn (insn);
16471 if (recog_data.n_alternatives == 0)
16472 return;
16474 /* Fill in recog_op_alt with information about the constraints of
16475 this insn. */
16476 preprocess_constraints (insn);
16478 const operand_alternative *op_alt = which_op_alt ();
16479 for (opno = 0; opno < recog_data.n_operands; opno++)
16481 /* Things we need to fix can only occur in inputs. */
16482 if (recog_data.operand_type[opno] != OP_IN)
16483 continue;
16485 /* If this alternative is a memory reference, then any mention
16486 of constants in this alternative is really to fool reload
16487 into allowing us to accept one there. We need to fix them up
16488 now so that we output the right code. */
16489 if (op_alt[opno].memory_ok)
16491 rtx op = recog_data.operand[opno];
16493 if (CONSTANT_P (op))
16495 if (do_pushes)
16496 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
16497 recog_data.operand_mode[opno], op);
16499 else if (MEM_P (op)
16500 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
16501 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
16503 if (do_pushes)
16505 rtx cop = avoid_constant_pool_reference (op);
16507 /* Casting the address of something to a mode narrower
16508 than a word can cause avoid_constant_pool_reference()
16509 to return the pool reference itself. That's no good to
16510 us here. Lets just hope that we can use the
16511 constant pool value directly. */
16512 if (op == cop)
16513 cop = get_pool_constant (XEXP (op, 0));
16515 push_minipool_fix (insn, address,
16516 recog_data.operand_loc[opno],
16517 recog_data.operand_mode[opno], cop);
16524 return;
16527 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
16528 and unions in the context of ARMv8-M Security Extensions. It is used as a
16529 helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
16530 functions. The PADDING_BITS_TO_CLEAR pointer can be the base to either one
16531 or four masks, depending on whether it is being computed for a
16532 'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
16533 respectively. The tree for the type of the argument or a field within an
16534 argument is passed in ARG_TYPE, the current register this argument or field
16535 starts in is kept in the pointer REGNO and updated accordingly, the bit this
16536 argument or field starts at is passed in STARTING_BIT and the last used bit
16537 is kept in LAST_USED_BIT which is also updated accordingly. */
16539 static unsigned HOST_WIDE_INT
16540 comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
16541 uint32_t * padding_bits_to_clear,
16542 unsigned starting_bit, int * last_used_bit)
16545 unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0;
16547 if (TREE_CODE (arg_type) == RECORD_TYPE)
16549 unsigned current_bit = starting_bit;
16550 tree field;
16551 long int offset, size;
16554 field = TYPE_FIELDS (arg_type);
16555 while (field)
16557 /* The offset within a structure is always an offset from
16558 the start of that structure. Make sure we take that into the
16559 calculation of the register based offset that we use here. */
16560 offset = starting_bit;
16561 offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0);
16562 offset %= 32;
16564 /* This is the actual size of the field, for bitfields this is the
16565 bitfield width and not the container size. */
16566 size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16568 if (*last_used_bit != offset)
16570 if (offset < *last_used_bit)
16572 /* This field's offset is before the 'last_used_bit', that
16573 means this field goes on the next register. So we need to
16574 pad the rest of the current register and increase the
16575 register number. */
16576 uint32_t mask;
16577 mask = ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit);
16578 mask++;
16580 padding_bits_to_clear[*regno] |= mask;
16581 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16582 (*regno)++;
16584 else
16586 /* Otherwise we pad the bits between the last field's end and
16587 the start of the new field. */
16588 uint32_t mask;
16590 mask = ((uint32_t)-1) >> (32 - offset);
16591 mask -= ((uint32_t) 1 << *last_used_bit) - 1;
16592 padding_bits_to_clear[*regno] |= mask;
16594 current_bit = offset;
16597 /* Calculate further padding bits for inner structs/unions too. */
16598 if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)))
16600 *last_used_bit = current_bit;
16601 not_to_clear_reg_mask
16602 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno,
16603 padding_bits_to_clear, offset,
16604 last_used_bit);
16606 else
16608 /* Update 'current_bit' with this field's size. If the
16609 'current_bit' lies in a subsequent register, update 'regno' and
16610 reset 'current_bit' to point to the current bit in that new
16611 register. */
16612 current_bit += size;
16613 while (current_bit >= 32)
16615 current_bit-=32;
16616 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16617 (*regno)++;
16619 *last_used_bit = current_bit;
16622 field = TREE_CHAIN (field);
16624 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16626 else if (TREE_CODE (arg_type) == UNION_TYPE)
16628 tree field, field_t;
16629 int i, regno_t, field_size;
16630 int max_reg = -1;
16631 int max_bit = -1;
16632 uint32_t mask;
16633 uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
16634 = {-1, -1, -1, -1};
16636 /* To compute the padding bits in a union we only consider bits as
16637 padding bits if they are always either a padding bit or fall outside a
16638 fields size for all fields in the union. */
16639 field = TYPE_FIELDS (arg_type);
16640 while (field)
16642 uint32_t padding_bits_to_clear_t[NUM_ARG_REGS]
16643 = {0U, 0U, 0U, 0U};
16644 int last_used_bit_t = *last_used_bit;
16645 regno_t = *regno;
16646 field_t = TREE_TYPE (field);
16648 /* If the field's type is either a record or a union make sure to
16649 compute their padding bits too. */
16650 if (RECORD_OR_UNION_TYPE_P (field_t))
16651 not_to_clear_reg_mask
16652 |= comp_not_to_clear_mask_str_un (field_t, &regno_t,
16653 &padding_bits_to_clear_t[0],
16654 starting_bit, &last_used_bit_t);
16655 else
16657 field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16658 regno_t = (field_size / 32) + *regno;
16659 last_used_bit_t = (starting_bit + field_size) % 32;
16662 for (i = *regno; i < regno_t; i++)
16664 /* For all but the last register used by this field only keep the
16665 padding bits that were padding bits in this field. */
16666 padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i];
16669 /* For the last register, keep all padding bits that were padding
16670 bits in this field and any padding bits that are still valid
16671 as padding bits but fall outside of this field's size. */
16672 mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1;
16673 padding_bits_to_clear_res[regno_t]
16674 &= padding_bits_to_clear_t[regno_t] | mask;
16676 /* Update the maximum size of the fields in terms of registers used
16677 ('max_reg') and the 'last_used_bit' in said register. */
16678 if (max_reg < regno_t)
16680 max_reg = regno_t;
16681 max_bit = last_used_bit_t;
16683 else if (max_reg == regno_t && max_bit < last_used_bit_t)
16684 max_bit = last_used_bit_t;
16686 field = TREE_CHAIN (field);
16689 /* Update the current padding_bits_to_clear using the intersection of the
16690 padding bits of all the fields. */
16691 for (i=*regno; i < max_reg; i++)
16692 padding_bits_to_clear[i] |= padding_bits_to_clear_res[i];
16694 /* Do not keep trailing padding bits, we do not know yet whether this
16695 is the end of the argument. */
16696 mask = ((uint32_t) 1 << max_bit) - 1;
16697 padding_bits_to_clear[max_reg]
16698 |= padding_bits_to_clear_res[max_reg] & mask;
16700 *regno = max_reg;
16701 *last_used_bit = max_bit;
16703 else
16704 /* This function should only be used for structs and unions. */
16705 gcc_unreachable ();
16707 return not_to_clear_reg_mask;
16710 /* In the context of ARMv8-M Security Extensions, this function is used for both
16711 'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
16712 registers are used when returning or passing arguments, which is then
16713 returned as a mask. It will also compute a mask to indicate padding/unused
16714 bits for each of these registers, and passes this through the
16715 PADDING_BITS_TO_CLEAR pointer. The tree of the argument type is passed in
16716 ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
16717 the starting register used to pass this argument or return value is passed
16718 in REGNO. It makes use of 'comp_not_to_clear_mask_str_un' to compute these
16719 for struct and union types. */
16721 static unsigned HOST_WIDE_INT
16722 compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
16723 uint32_t * padding_bits_to_clear)
16726 int last_used_bit = 0;
16727 unsigned HOST_WIDE_INT not_to_clear_mask;
16729 if (RECORD_OR_UNION_TYPE_P (arg_type))
16731 not_to_clear_mask
16732 = comp_not_to_clear_mask_str_un (arg_type, &regno,
16733 padding_bits_to_clear, 0,
16734 &last_used_bit);
16737 /* If the 'last_used_bit' is not zero, that means we are still using a
16738 part of the last 'regno'. In such cases we must clear the trailing
16739 bits. Otherwise we are not using regno and we should mark it as to
16740 clear. */
16741 if (last_used_bit != 0)
16742 padding_bits_to_clear[regno]
16743 |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit) + 1;
16744 else
16745 not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno);
16747 else
16749 not_to_clear_mask = 0;
16750 /* We are not dealing with structs nor unions. So these arguments may be
16751 passed in floating point registers too. In some cases a BLKmode is
16752 used when returning or passing arguments in multiple VFP registers. */
16753 if (GET_MODE (arg_rtx) == BLKmode)
16755 int i, arg_regs;
16756 rtx reg;
16758 /* This should really only occur when dealing with the hard-float
16759 ABI. */
16760 gcc_assert (TARGET_HARD_FLOAT_ABI);
16762 for (i = 0; i < XVECLEN (arg_rtx, 0); i++)
16764 reg = XEXP (XVECEXP (arg_rtx, 0, i), 0);
16765 gcc_assert (REG_P (reg));
16767 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg);
16769 /* If we are dealing with DF mode, make sure we don't
16770 clear either of the registers it addresses. */
16771 arg_regs = ARM_NUM_REGS (GET_MODE (reg));
16772 if (arg_regs > 1)
16774 unsigned HOST_WIDE_INT mask;
16775 mask = HOST_WIDE_INT_1U << (REGNO (reg) + arg_regs);
16776 mask -= HOST_WIDE_INT_1U << REGNO (reg);
16777 not_to_clear_mask |= mask;
16781 else
16783 /* Otherwise we can rely on the MODE to determine how many registers
16784 are being used by this argument. */
16785 int arg_regs = ARM_NUM_REGS (GET_MODE (arg_rtx));
16786 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (arg_rtx);
16787 if (arg_regs > 1)
16789 unsigned HOST_WIDE_INT
16790 mask = HOST_WIDE_INT_1U << (REGNO (arg_rtx) + arg_regs);
16791 mask -= HOST_WIDE_INT_1U << REGNO (arg_rtx);
16792 not_to_clear_mask |= mask;
16797 return not_to_clear_mask;
16800 /* Saves callee saved registers, clears callee saved registers and caller saved
16801 registers not used to pass arguments before a cmse_nonsecure_call. And
16802 restores the callee saved registers after. */
16804 static void
16805 cmse_nonsecure_call_clear_caller_saved (void)
16807 basic_block bb;
16809 FOR_EACH_BB_FN (bb, cfun)
16811 rtx_insn *insn;
16813 FOR_BB_INSNS (bb, insn)
16815 uint64_t to_clear_mask, float_mask;
16816 rtx_insn *seq;
16817 rtx pat, call, unspec, reg, cleared_reg, tmp;
16818 unsigned int regno, maxregno;
16819 rtx address;
16820 CUMULATIVE_ARGS args_so_far_v;
16821 cumulative_args_t args_so_far;
16822 tree arg_type, fntype;
16823 bool using_r4, first_param = true;
16824 function_args_iterator args_iter;
16825 uint32_t padding_bits_to_clear[4] = {0U, 0U, 0U, 0U};
16826 uint32_t * padding_bits_to_clear_ptr = &padding_bits_to_clear[0];
16828 if (!NONDEBUG_INSN_P (insn))
16829 continue;
16831 if (!CALL_P (insn))
16832 continue;
16834 pat = PATTERN (insn);
16835 gcc_assert (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 0);
16836 call = XVECEXP (pat, 0, 0);
16838 /* Get the real call RTX if the insn sets a value, ie. returns. */
16839 if (GET_CODE (call) == SET)
16840 call = SET_SRC (call);
16842 /* Check if it is a cmse_nonsecure_call. */
16843 unspec = XEXP (call, 0);
16844 if (GET_CODE (unspec) != UNSPEC
16845 || XINT (unspec, 1) != UNSPEC_NONSECURE_MEM)
16846 continue;
16848 /* Determine the caller-saved registers we need to clear. */
16849 to_clear_mask = (1LL << (NUM_ARG_REGS)) - 1;
16850 maxregno = NUM_ARG_REGS - 1;
16851 /* Only look at the caller-saved floating point registers in case of
16852 -mfloat-abi=hard. For -mfloat-abi=softfp we will be using the
16853 lazy store and loads which clear both caller- and callee-saved
16854 registers. */
16855 if (TARGET_HARD_FLOAT_ABI)
16857 float_mask = (1LL << (D7_VFP_REGNUM + 1)) - 1;
16858 float_mask &= ~((1LL << FIRST_VFP_REGNUM) - 1);
16859 to_clear_mask |= float_mask;
16860 maxregno = D7_VFP_REGNUM;
16863 /* Make sure the register used to hold the function address is not
16864 cleared. */
16865 address = RTVEC_ELT (XVEC (unspec, 0), 0);
16866 gcc_assert (MEM_P (address));
16867 gcc_assert (REG_P (XEXP (address, 0)));
16868 to_clear_mask &= ~(1LL << REGNO (XEXP (address, 0)));
16870 /* Set basic block of call insn so that df rescan is performed on
16871 insns inserted here. */
16872 set_block_for_insn (insn, bb);
16873 df_set_flags (DF_DEFER_INSN_RESCAN);
16874 start_sequence ();
16876 /* Make sure the scheduler doesn't schedule other insns beyond
16877 here. */
16878 emit_insn (gen_blockage ());
16880 /* Walk through all arguments and clear registers appropriately.
16882 fntype = TREE_TYPE (MEM_EXPR (address));
16883 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX,
16884 NULL_TREE);
16885 args_so_far = pack_cumulative_args (&args_so_far_v);
16886 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
16888 rtx arg_rtx;
16889 machine_mode arg_mode = TYPE_MODE (arg_type);
16891 if (VOID_TYPE_P (arg_type))
16892 continue;
16894 if (!first_param)
16895 arm_function_arg_advance (args_so_far, arg_mode, arg_type,
16896 true);
16898 arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type,
16899 true);
16900 gcc_assert (REG_P (arg_rtx));
16901 to_clear_mask
16902 &= ~compute_not_to_clear_mask (arg_type, arg_rtx,
16903 REGNO (arg_rtx),
16904 padding_bits_to_clear_ptr);
16906 first_param = false;
16909 /* Clear padding bits where needed. */
16910 cleared_reg = XEXP (address, 0);
16911 reg = gen_rtx_REG (SImode, IP_REGNUM);
16912 using_r4 = false;
16913 for (regno = R0_REGNUM; regno < NUM_ARG_REGS; regno++)
16915 if (padding_bits_to_clear[regno] == 0)
16916 continue;
16918 /* If this is a Thumb-1 target copy the address of the function
16919 we are calling from 'r4' into 'ip' such that we can use r4 to
16920 clear the unused bits in the arguments. */
16921 if (TARGET_THUMB1 && !using_r4)
16923 using_r4 = true;
16924 reg = cleared_reg;
16925 emit_move_insn (gen_rtx_REG (SImode, IP_REGNUM),
16926 reg);
16929 tmp = GEN_INT ((((~padding_bits_to_clear[regno]) << 16u) >> 16u));
16930 emit_move_insn (reg, tmp);
16931 /* Also fill the top half of the negated
16932 padding_bits_to_clear. */
16933 if (((~padding_bits_to_clear[regno]) >> 16) > 0)
16935 tmp = GEN_INT ((~padding_bits_to_clear[regno]) >> 16);
16936 emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode, reg,
16937 GEN_INT (16),
16938 GEN_INT (16)),
16939 tmp));
16942 emit_insn (gen_andsi3 (gen_rtx_REG (SImode, regno),
16943 gen_rtx_REG (SImode, regno),
16944 reg));
16947 if (using_r4)
16948 emit_move_insn (cleared_reg,
16949 gen_rtx_REG (SImode, IP_REGNUM));
16951 /* We use right shift and left shift to clear the LSB of the address
16952 we jump to instead of using bic, to avoid having to use an extra
16953 register on Thumb-1. */
16954 tmp = gen_rtx_LSHIFTRT (SImode, cleared_reg, const1_rtx);
16955 emit_insn (gen_rtx_SET (cleared_reg, tmp));
16956 tmp = gen_rtx_ASHIFT (SImode, cleared_reg, const1_rtx);
16957 emit_insn (gen_rtx_SET (cleared_reg, tmp));
16959 /* Clearing all registers that leak before doing a non-secure
16960 call. */
16961 for (regno = R0_REGNUM; regno <= maxregno; regno++)
16963 if (!(to_clear_mask & (1LL << regno)))
16964 continue;
16966 /* If regno is an even vfp register and its successor is also to
16967 be cleared, use vmov. */
16968 if (IS_VFP_REGNUM (regno))
16970 if (TARGET_VFP_DOUBLE
16971 && VFP_REGNO_OK_FOR_DOUBLE (regno)
16972 && to_clear_mask & (1LL << (regno + 1)))
16973 emit_move_insn (gen_rtx_REG (DFmode, regno++),
16974 CONST0_RTX (DFmode));
16975 else
16976 emit_move_insn (gen_rtx_REG (SFmode, regno),
16977 CONST0_RTX (SFmode));
16979 else
16980 emit_move_insn (gen_rtx_REG (SImode, regno), cleared_reg);
16983 seq = get_insns ();
16984 end_sequence ();
16985 emit_insn_before (seq, insn);
16991 /* Rewrite move insn into subtract of 0 if the condition codes will
16992 be useful in next conditional jump insn. */
16994 static void
16995 thumb1_reorg (void)
16997 basic_block bb;
16999 FOR_EACH_BB_FN (bb, cfun)
17001 rtx dest, src;
17002 rtx cmp, op0, op1, set = NULL;
17003 rtx_insn *prev, *insn = BB_END (bb);
17004 bool insn_clobbered = false;
17006 while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17007 insn = PREV_INSN (insn);
17009 /* Find the last cbranchsi4_insn in basic block BB. */
17010 if (insn == BB_HEAD (bb)
17011 || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17012 continue;
17014 /* Get the register with which we are comparing. */
17015 cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
17016 op0 = XEXP (cmp, 0);
17017 op1 = XEXP (cmp, 1);
17019 /* Check that comparison is against ZERO. */
17020 if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
17021 continue;
17023 /* Find the first flag setting insn before INSN in basic block BB. */
17024 gcc_assert (insn != BB_HEAD (bb));
17025 for (prev = PREV_INSN (insn);
17026 (!insn_clobbered
17027 && prev != BB_HEAD (bb)
17028 && (NOTE_P (prev)
17029 || DEBUG_INSN_P (prev)
17030 || ((set = single_set (prev)) != NULL
17031 && get_attr_conds (prev) == CONDS_NOCOND)));
17032 prev = PREV_INSN (prev))
17034 if (reg_set_p (op0, prev))
17035 insn_clobbered = true;
17038 /* Skip if op0 is clobbered by insn other than prev. */
17039 if (insn_clobbered)
17040 continue;
17042 if (!set)
17043 continue;
17045 dest = SET_DEST (set);
17046 src = SET_SRC (set);
17047 if (!low_register_operand (dest, SImode)
17048 || !low_register_operand (src, SImode))
17049 continue;
17051 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17052 in INSN. Both src and dest of the move insn are checked. */
17053 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17055 dest = copy_rtx (dest);
17056 src = copy_rtx (src);
17057 src = gen_rtx_MINUS (SImode, src, const0_rtx);
17058 PATTERN (prev) = gen_rtx_SET (dest, src);
17059 INSN_CODE (prev) = -1;
17060 /* Set test register in INSN to dest. */
17061 XEXP (cmp, 0) = copy_rtx (dest);
17062 INSN_CODE (insn) = -1;
17067 /* Convert instructions to their cc-clobbering variant if possible, since
17068 that allows us to use smaller encodings. */
17070 static void
17071 thumb2_reorg (void)
17073 basic_block bb;
17074 regset_head live;
17076 INIT_REG_SET (&live);
17078 /* We are freeing block_for_insn in the toplev to keep compatibility
17079 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17080 compute_bb_for_insn ();
17081 df_analyze ();
17083 enum Convert_Action {SKIP, CONV, SWAP_CONV};
17085 FOR_EACH_BB_FN (bb, cfun)
17087 if ((current_tune->disparage_flag_setting_t16_encodings
17088 == tune_params::DISPARAGE_FLAGS_ALL)
17089 && optimize_bb_for_speed_p (bb))
17090 continue;
17092 rtx_insn *insn;
17093 Convert_Action action = SKIP;
17094 Convert_Action action_for_partial_flag_setting
17095 = ((current_tune->disparage_flag_setting_t16_encodings
17096 != tune_params::DISPARAGE_FLAGS_NEITHER)
17097 && optimize_bb_for_speed_p (bb))
17098 ? SKIP : CONV;
17100 COPY_REG_SET (&live, DF_LR_OUT (bb));
17101 df_simulate_initialize_backwards (bb, &live);
17102 FOR_BB_INSNS_REVERSE (bb, insn)
17104 if (NONJUMP_INSN_P (insn)
17105 && !REGNO_REG_SET_P (&live, CC_REGNUM)
17106 && GET_CODE (PATTERN (insn)) == SET)
17108 action = SKIP;
17109 rtx pat = PATTERN (insn);
17110 rtx dst = XEXP (pat, 0);
17111 rtx src = XEXP (pat, 1);
17112 rtx op0 = NULL_RTX, op1 = NULL_RTX;
17114 if (UNARY_P (src) || BINARY_P (src))
17115 op0 = XEXP (src, 0);
17117 if (BINARY_P (src))
17118 op1 = XEXP (src, 1);
17120 if (low_register_operand (dst, SImode))
17122 switch (GET_CODE (src))
17124 case PLUS:
17125 /* Adding two registers and storing the result
17126 in the first source is already a 16-bit
17127 operation. */
17128 if (rtx_equal_p (dst, op0)
17129 && register_operand (op1, SImode))
17130 break;
17132 if (low_register_operand (op0, SImode))
17134 /* ADDS <Rd>,<Rn>,<Rm> */
17135 if (low_register_operand (op1, SImode))
17136 action = CONV;
17137 /* ADDS <Rdn>,#<imm8> */
17138 /* SUBS <Rdn>,#<imm8> */
17139 else if (rtx_equal_p (dst, op0)
17140 && CONST_INT_P (op1)
17141 && IN_RANGE (INTVAL (op1), -255, 255))
17142 action = CONV;
17143 /* ADDS <Rd>,<Rn>,#<imm3> */
17144 /* SUBS <Rd>,<Rn>,#<imm3> */
17145 else if (CONST_INT_P (op1)
17146 && IN_RANGE (INTVAL (op1), -7, 7))
17147 action = CONV;
17149 /* ADCS <Rd>, <Rn> */
17150 else if (GET_CODE (XEXP (src, 0)) == PLUS
17151 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17152 && low_register_operand (XEXP (XEXP (src, 0), 1),
17153 SImode)
17154 && COMPARISON_P (op1)
17155 && cc_register (XEXP (op1, 0), VOIDmode)
17156 && maybe_get_arm_condition_code (op1) == ARM_CS
17157 && XEXP (op1, 1) == const0_rtx)
17158 action = CONV;
17159 break;
17161 case MINUS:
17162 /* RSBS <Rd>,<Rn>,#0
17163 Not handled here: see NEG below. */
17164 /* SUBS <Rd>,<Rn>,#<imm3>
17165 SUBS <Rdn>,#<imm8>
17166 Not handled here: see PLUS above. */
17167 /* SUBS <Rd>,<Rn>,<Rm> */
17168 if (low_register_operand (op0, SImode)
17169 && low_register_operand (op1, SImode))
17170 action = CONV;
17171 break;
17173 case MULT:
17174 /* MULS <Rdm>,<Rn>,<Rdm>
17175 As an exception to the rule, this is only used
17176 when optimizing for size since MULS is slow on all
17177 known implementations. We do not even want to use
17178 MULS in cold code, if optimizing for speed, so we
17179 test the global flag here. */
17180 if (!optimize_size)
17181 break;
17182 /* Fall through. */
17183 case AND:
17184 case IOR:
17185 case XOR:
17186 /* ANDS <Rdn>,<Rm> */
17187 if (rtx_equal_p (dst, op0)
17188 && low_register_operand (op1, SImode))
17189 action = action_for_partial_flag_setting;
17190 else if (rtx_equal_p (dst, op1)
17191 && low_register_operand (op0, SImode))
17192 action = action_for_partial_flag_setting == SKIP
17193 ? SKIP : SWAP_CONV;
17194 break;
17196 case ASHIFTRT:
17197 case ASHIFT:
17198 case LSHIFTRT:
17199 /* ASRS <Rdn>,<Rm> */
17200 /* LSRS <Rdn>,<Rm> */
17201 /* LSLS <Rdn>,<Rm> */
17202 if (rtx_equal_p (dst, op0)
17203 && low_register_operand (op1, SImode))
17204 action = action_for_partial_flag_setting;
17205 /* ASRS <Rd>,<Rm>,#<imm5> */
17206 /* LSRS <Rd>,<Rm>,#<imm5> */
17207 /* LSLS <Rd>,<Rm>,#<imm5> */
17208 else if (low_register_operand (op0, SImode)
17209 && CONST_INT_P (op1)
17210 && IN_RANGE (INTVAL (op1), 0, 31))
17211 action = action_for_partial_flag_setting;
17212 break;
17214 case ROTATERT:
17215 /* RORS <Rdn>,<Rm> */
17216 if (rtx_equal_p (dst, op0)
17217 && low_register_operand (op1, SImode))
17218 action = action_for_partial_flag_setting;
17219 break;
17221 case NOT:
17222 /* MVNS <Rd>,<Rm> */
17223 if (low_register_operand (op0, SImode))
17224 action = action_for_partial_flag_setting;
17225 break;
17227 case NEG:
17228 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17229 if (low_register_operand (op0, SImode))
17230 action = CONV;
17231 break;
17233 case CONST_INT:
17234 /* MOVS <Rd>,#<imm8> */
17235 if (CONST_INT_P (src)
17236 && IN_RANGE (INTVAL (src), 0, 255))
17237 action = action_for_partial_flag_setting;
17238 break;
17240 case REG:
17241 /* MOVS and MOV<c> with registers have different
17242 encodings, so are not relevant here. */
17243 break;
17245 default:
17246 break;
17250 if (action != SKIP)
17252 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17253 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17254 rtvec vec;
17256 if (action == SWAP_CONV)
17258 src = copy_rtx (src);
17259 XEXP (src, 0) = op1;
17260 XEXP (src, 1) = op0;
17261 pat = gen_rtx_SET (dst, src);
17262 vec = gen_rtvec (2, pat, clobber);
17264 else /* action == CONV */
17265 vec = gen_rtvec (2, pat, clobber);
17267 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17268 INSN_CODE (insn) = -1;
17272 if (NONDEBUG_INSN_P (insn))
17273 df_simulate_one_insn_backwards (bb, insn, &live);
17277 CLEAR_REG_SET (&live);
17280 /* Gcc puts the pool in the wrong place for ARM, since we can only
17281 load addresses a limited distance around the pc. We do some
17282 special munging to move the constant pool values to the correct
17283 point in the code. */
17284 static void
17285 arm_reorg (void)
17287 rtx_insn *insn;
17288 HOST_WIDE_INT address = 0;
17289 Mfix * fix;
17291 if (use_cmse)
17292 cmse_nonsecure_call_clear_caller_saved ();
17293 if (TARGET_THUMB1)
17294 thumb1_reorg ();
17295 else if (TARGET_THUMB2)
17296 thumb2_reorg ();
17298 /* Ensure all insns that must be split have been split at this point.
17299 Otherwise, the pool placement code below may compute incorrect
17300 insn lengths. Note that when optimizing, all insns have already
17301 been split at this point. */
17302 if (!optimize)
17303 split_all_insns_noflow ();
17305 minipool_fix_head = minipool_fix_tail = NULL;
17307 /* The first insn must always be a note, or the code below won't
17308 scan it properly. */
17309 insn = get_insns ();
17310 gcc_assert (NOTE_P (insn));
17311 minipool_pad = 0;
17313 /* Scan all the insns and record the operands that will need fixing. */
17314 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17316 if (BARRIER_P (insn))
17317 push_minipool_barrier (insn, address);
17318 else if (INSN_P (insn))
17320 rtx_jump_table_data *table;
17322 note_invalid_constants (insn, address, true);
17323 address += get_attr_length (insn);
17325 /* If the insn is a vector jump, add the size of the table
17326 and skip the table. */
17327 if (tablejump_p (insn, NULL, &table))
17329 address += get_jump_table_size (table);
17330 insn = table;
17333 else if (LABEL_P (insn))
17334 /* Add the worst-case padding due to alignment. We don't add
17335 the _current_ padding because the minipool insertions
17336 themselves might change it. */
17337 address += get_label_padding (insn);
17340 fix = minipool_fix_head;
17342 /* Now scan the fixups and perform the required changes. */
17343 while (fix)
17345 Mfix * ftmp;
17346 Mfix * fdel;
17347 Mfix * last_added_fix;
17348 Mfix * last_barrier = NULL;
17349 Mfix * this_fix;
17351 /* Skip any further barriers before the next fix. */
17352 while (fix && BARRIER_P (fix->insn))
17353 fix = fix->next;
17355 /* No more fixes. */
17356 if (fix == NULL)
17357 break;
17359 last_added_fix = NULL;
17361 for (ftmp = fix; ftmp; ftmp = ftmp->next)
17363 if (BARRIER_P (ftmp->insn))
17365 if (ftmp->address >= minipool_vector_head->max_address)
17366 break;
17368 last_barrier = ftmp;
17370 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17371 break;
17373 last_added_fix = ftmp; /* Keep track of the last fix added. */
17376 /* If we found a barrier, drop back to that; any fixes that we
17377 could have reached but come after the barrier will now go in
17378 the next mini-pool. */
17379 if (last_barrier != NULL)
17381 /* Reduce the refcount for those fixes that won't go into this
17382 pool after all. */
17383 for (fdel = last_barrier->next;
17384 fdel && fdel != ftmp;
17385 fdel = fdel->next)
17387 fdel->minipool->refcount--;
17388 fdel->minipool = NULL;
17391 ftmp = last_barrier;
17393 else
17395 /* ftmp is first fix that we can't fit into this pool and
17396 there no natural barriers that we could use. Insert a
17397 new barrier in the code somewhere between the previous
17398 fix and this one, and arrange to jump around it. */
17399 HOST_WIDE_INT max_address;
17401 /* The last item on the list of fixes must be a barrier, so
17402 we can never run off the end of the list of fixes without
17403 last_barrier being set. */
17404 gcc_assert (ftmp);
17406 max_address = minipool_vector_head->max_address;
17407 /* Check that there isn't another fix that is in range that
17408 we couldn't fit into this pool because the pool was
17409 already too large: we need to put the pool before such an
17410 instruction. The pool itself may come just after the
17411 fix because create_fix_barrier also allows space for a
17412 jump instruction. */
17413 if (ftmp->address < max_address)
17414 max_address = ftmp->address + 1;
17416 last_barrier = create_fix_barrier (last_added_fix, max_address);
17419 assign_minipool_offsets (last_barrier);
17421 while (ftmp)
17423 if (!BARRIER_P (ftmp->insn)
17424 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17425 == NULL))
17426 break;
17428 ftmp = ftmp->next;
17431 /* Scan over the fixes we have identified for this pool, fixing them
17432 up and adding the constants to the pool itself. */
17433 for (this_fix = fix; this_fix && ftmp != this_fix;
17434 this_fix = this_fix->next)
17435 if (!BARRIER_P (this_fix->insn))
17437 rtx addr
17438 = plus_constant (Pmode,
17439 gen_rtx_LABEL_REF (VOIDmode,
17440 minipool_vector_label),
17441 this_fix->minipool->offset);
17442 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17445 dump_minipool (last_barrier->insn);
17446 fix = ftmp;
17449 /* From now on we must synthesize any constants that we can't handle
17450 directly. This can happen if the RTL gets split during final
17451 instruction generation. */
17452 cfun->machine->after_arm_reorg = 1;
17454 /* Free the minipool memory. */
17455 obstack_free (&minipool_obstack, minipool_startobj);
17458 /* Routines to output assembly language. */
17460 /* Return string representation of passed in real value. */
17461 static const char *
17462 fp_const_from_val (REAL_VALUE_TYPE *r)
17464 if (!fp_consts_inited)
17465 init_fp_table ();
17467 gcc_assert (real_equal (r, &value_fp0));
17468 return "0";
17471 /* OPERANDS[0] is the entire list of insns that constitute pop,
17472 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17473 is in the list, UPDATE is true iff the list contains explicit
17474 update of base register. */
17475 void
17476 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17477 bool update)
17479 int i;
17480 char pattern[100];
17481 int offset;
17482 const char *conditional;
17483 int num_saves = XVECLEN (operands[0], 0);
17484 unsigned int regno;
17485 unsigned int regno_base = REGNO (operands[1]);
17486 bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
17488 offset = 0;
17489 offset += update ? 1 : 0;
17490 offset += return_pc ? 1 : 0;
17492 /* Is the base register in the list? */
17493 for (i = offset; i < num_saves; i++)
17495 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17496 /* If SP is in the list, then the base register must be SP. */
17497 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17498 /* If base register is in the list, there must be no explicit update. */
17499 if (regno == regno_base)
17500 gcc_assert (!update);
17503 conditional = reverse ? "%?%D0" : "%?%d0";
17504 /* Can't use POP if returning from an interrupt. */
17505 if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
17506 sprintf (pattern, "pop%s\t{", conditional);
17507 else
17509 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17510 It's just a convention, their semantics are identical. */
17511 if (regno_base == SP_REGNUM)
17512 sprintf (pattern, "ldmfd%s\t", conditional);
17513 else if (update)
17514 sprintf (pattern, "ldmia%s\t", conditional);
17515 else
17516 sprintf (pattern, "ldm%s\t", conditional);
17518 strcat (pattern, reg_names[regno_base]);
17519 if (update)
17520 strcat (pattern, "!, {");
17521 else
17522 strcat (pattern, ", {");
17525 /* Output the first destination register. */
17526 strcat (pattern,
17527 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17529 /* Output the rest of the destination registers. */
17530 for (i = offset + 1; i < num_saves; i++)
17532 strcat (pattern, ", ");
17533 strcat (pattern,
17534 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17537 strcat (pattern, "}");
17539 if (interrupt_p && return_pc)
17540 strcat (pattern, "^");
17542 output_asm_insn (pattern, &cond);
17546 /* Output the assembly for a store multiple. */
17548 const char *
17549 vfp_output_vstmd (rtx * operands)
17551 char pattern[100];
17552 int p;
17553 int base;
17554 int i;
17555 rtx addr_reg = REG_P (XEXP (operands[0], 0))
17556 ? XEXP (operands[0], 0)
17557 : XEXP (XEXP (operands[0], 0), 0);
17558 bool push_p = REGNO (addr_reg) == SP_REGNUM;
17560 if (push_p)
17561 strcpy (pattern, "vpush%?.64\t{%P1");
17562 else
17563 strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
17565 p = strlen (pattern);
17567 gcc_assert (REG_P (operands[1]));
17569 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17570 for (i = 1; i < XVECLEN (operands[2], 0); i++)
17572 p += sprintf (&pattern[p], ", d%d", base + i);
17574 strcpy (&pattern[p], "}");
17576 output_asm_insn (pattern, operands);
17577 return "";
17581 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17582 number of bytes pushed. */
17584 static int
17585 vfp_emit_fstmd (int base_reg, int count)
17587 rtx par;
17588 rtx dwarf;
17589 rtx tmp, reg;
17590 int i;
17592 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17593 register pairs are stored by a store multiple insn. We avoid this
17594 by pushing an extra pair. */
17595 if (count == 2 && !arm_arch6)
17597 if (base_reg == LAST_VFP_REGNUM - 3)
17598 base_reg -= 2;
17599 count++;
17602 /* FSTMD may not store more than 16 doubleword registers at once. Split
17603 larger stores into multiple parts (up to a maximum of two, in
17604 practice). */
17605 if (count > 16)
17607 int saved;
17608 /* NOTE: base_reg is an internal register number, so each D register
17609 counts as 2. */
17610 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17611 saved += vfp_emit_fstmd (base_reg, 16);
17612 return saved;
17615 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17616 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17618 reg = gen_rtx_REG (DFmode, base_reg);
17619 base_reg += 2;
17621 XVECEXP (par, 0, 0)
17622 = gen_rtx_SET (gen_frame_mem
17623 (BLKmode,
17624 gen_rtx_PRE_MODIFY (Pmode,
17625 stack_pointer_rtx,
17626 plus_constant
17627 (Pmode, stack_pointer_rtx,
17628 - (count * 8)))
17630 gen_rtx_UNSPEC (BLKmode,
17631 gen_rtvec (1, reg),
17632 UNSPEC_PUSH_MULT));
17634 tmp = gen_rtx_SET (stack_pointer_rtx,
17635 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17636 RTX_FRAME_RELATED_P (tmp) = 1;
17637 XVECEXP (dwarf, 0, 0) = tmp;
17639 tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
17640 RTX_FRAME_RELATED_P (tmp) = 1;
17641 XVECEXP (dwarf, 0, 1) = tmp;
17643 for (i = 1; i < count; i++)
17645 reg = gen_rtx_REG (DFmode, base_reg);
17646 base_reg += 2;
17647 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
17649 tmp = gen_rtx_SET (gen_frame_mem (DFmode,
17650 plus_constant (Pmode,
17651 stack_pointer_rtx,
17652 i * 8)),
17653 reg);
17654 RTX_FRAME_RELATED_P (tmp) = 1;
17655 XVECEXP (dwarf, 0, i + 1) = tmp;
17658 par = emit_insn (par);
17659 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17660 RTX_FRAME_RELATED_P (par) = 1;
17662 return count * 8;
17665 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
17666 has the cmse_nonsecure_call attribute and returns false otherwise. */
17668 bool
17669 detect_cmse_nonsecure_call (tree addr)
17671 if (!addr)
17672 return FALSE;
17674 tree fntype = TREE_TYPE (addr);
17675 if (use_cmse && lookup_attribute ("cmse_nonsecure_call",
17676 TYPE_ATTRIBUTES (fntype)))
17677 return TRUE;
17678 return FALSE;
17682 /* Emit a call instruction with pattern PAT. ADDR is the address of
17683 the call target. */
17685 void
17686 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
17688 rtx insn;
17690 insn = emit_call_insn (pat);
17692 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17693 If the call might use such an entry, add a use of the PIC register
17694 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17695 if (TARGET_VXWORKS_RTP
17696 && flag_pic
17697 && !sibcall
17698 && GET_CODE (addr) == SYMBOL_REF
17699 && (SYMBOL_REF_DECL (addr)
17700 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
17701 : !SYMBOL_REF_LOCAL_P (addr)))
17703 require_pic_register ();
17704 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
17707 if (TARGET_AAPCS_BASED)
17709 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17710 linker. We need to add an IP clobber to allow setting
17711 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
17712 is not needed since it's a fixed register. */
17713 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
17714 clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
17718 /* Output a 'call' insn. */
17719 const char *
17720 output_call (rtx *operands)
17722 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
17724 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17725 if (REGNO (operands[0]) == LR_REGNUM)
17727 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
17728 output_asm_insn ("mov%?\t%0, %|lr", operands);
17731 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17733 if (TARGET_INTERWORK || arm_arch4t)
17734 output_asm_insn ("bx%?\t%0", operands);
17735 else
17736 output_asm_insn ("mov%?\t%|pc, %0", operands);
17738 return "";
17741 /* Output a move from arm registers to arm registers of a long double
17742 OPERANDS[0] is the destination.
17743 OPERANDS[1] is the source. */
17744 const char *
17745 output_mov_long_double_arm_from_arm (rtx *operands)
17747 /* We have to be careful here because the two might overlap. */
17748 int dest_start = REGNO (operands[0]);
17749 int src_start = REGNO (operands[1]);
17750 rtx ops[2];
17751 int i;
17753 if (dest_start < src_start)
17755 for (i = 0; i < 3; i++)
17757 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17758 ops[1] = gen_rtx_REG (SImode, src_start + i);
17759 output_asm_insn ("mov%?\t%0, %1", ops);
17762 else
17764 for (i = 2; i >= 0; i--)
17766 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17767 ops[1] = gen_rtx_REG (SImode, src_start + i);
17768 output_asm_insn ("mov%?\t%0, %1", ops);
17772 return "";
17775 void
17776 arm_emit_movpair (rtx dest, rtx src)
17778 /* If the src is an immediate, simplify it. */
17779 if (CONST_INT_P (src))
17781 HOST_WIDE_INT val = INTVAL (src);
17782 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
17783 if ((val >> 16) & 0x0000ffff)
17785 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
17786 GEN_INT (16)),
17787 GEN_INT ((val >> 16) & 0x0000ffff));
17788 rtx_insn *insn = get_last_insn ();
17789 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
17791 return;
17793 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
17794 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
17795 rtx_insn *insn = get_last_insn ();
17796 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
17799 /* Output a move between double words. It must be REG<-MEM
17800 or MEM<-REG. */
17801 const char *
17802 output_move_double (rtx *operands, bool emit, int *count)
17804 enum rtx_code code0 = GET_CODE (operands[0]);
17805 enum rtx_code code1 = GET_CODE (operands[1]);
17806 rtx otherops[3];
17807 if (count)
17808 *count = 1;
17810 /* The only case when this might happen is when
17811 you are looking at the length of a DImode instruction
17812 that has an invalid constant in it. */
17813 if (code0 == REG && code1 != MEM)
17815 gcc_assert (!emit);
17816 *count = 2;
17817 return "";
17820 if (code0 == REG)
17822 unsigned int reg0 = REGNO (operands[0]);
17824 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
17826 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
17828 switch (GET_CODE (XEXP (operands[1], 0)))
17830 case REG:
17832 if (emit)
17834 if (TARGET_LDRD
17835 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
17836 output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
17837 else
17838 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
17840 break;
17842 case PRE_INC:
17843 gcc_assert (TARGET_LDRD);
17844 if (emit)
17845 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
17846 break;
17848 case PRE_DEC:
17849 if (emit)
17851 if (TARGET_LDRD)
17852 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
17853 else
17854 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
17856 break;
17858 case POST_INC:
17859 if (emit)
17861 if (TARGET_LDRD)
17862 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
17863 else
17864 output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
17866 break;
17868 case POST_DEC:
17869 gcc_assert (TARGET_LDRD);
17870 if (emit)
17871 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
17872 break;
17874 case PRE_MODIFY:
17875 case POST_MODIFY:
17876 /* Autoicrement addressing modes should never have overlapping
17877 base and destination registers, and overlapping index registers
17878 are already prohibited, so this doesn't need to worry about
17879 fix_cm3_ldrd. */
17880 otherops[0] = operands[0];
17881 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
17882 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
17884 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
17886 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
17888 /* Registers overlap so split out the increment. */
17889 if (emit)
17891 output_asm_insn ("add%?\t%1, %1, %2", otherops);
17892 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
17894 if (count)
17895 *count = 2;
17897 else
17899 /* Use a single insn if we can.
17900 FIXME: IWMMXT allows offsets larger than ldrd can
17901 handle, fix these up with a pair of ldr. */
17902 if (TARGET_THUMB2
17903 || !CONST_INT_P (otherops[2])
17904 || (INTVAL (otherops[2]) > -256
17905 && INTVAL (otherops[2]) < 256))
17907 if (emit)
17908 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
17910 else
17912 if (emit)
17914 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
17915 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
17917 if (count)
17918 *count = 2;
17923 else
17925 /* Use a single insn if we can.
17926 FIXME: IWMMXT allows offsets larger than ldrd can handle,
17927 fix these up with a pair of ldr. */
17928 if (TARGET_THUMB2
17929 || !CONST_INT_P (otherops[2])
17930 || (INTVAL (otherops[2]) > -256
17931 && INTVAL (otherops[2]) < 256))
17933 if (emit)
17934 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
17936 else
17938 if (emit)
17940 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
17941 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
17943 if (count)
17944 *count = 2;
17947 break;
17949 case LABEL_REF:
17950 case CONST:
17951 /* We might be able to use ldrd %0, %1 here. However the range is
17952 different to ldr/adr, and it is broken on some ARMv7-M
17953 implementations. */
17954 /* Use the second register of the pair to avoid problematic
17955 overlap. */
17956 otherops[1] = operands[1];
17957 if (emit)
17958 output_asm_insn ("adr%?\t%0, %1", otherops);
17959 operands[1] = otherops[0];
17960 if (emit)
17962 if (TARGET_LDRD)
17963 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
17964 else
17965 output_asm_insn ("ldmia%?\t%1, %M0", operands);
17968 if (count)
17969 *count = 2;
17970 break;
17972 /* ??? This needs checking for thumb2. */
17973 default:
17974 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
17975 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
17977 otherops[0] = operands[0];
17978 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
17979 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
17981 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
17983 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
17985 switch ((int) INTVAL (otherops[2]))
17987 case -8:
17988 if (emit)
17989 output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
17990 return "";
17991 case -4:
17992 if (TARGET_THUMB2)
17993 break;
17994 if (emit)
17995 output_asm_insn ("ldmda%?\t%1, %M0", otherops);
17996 return "";
17997 case 4:
17998 if (TARGET_THUMB2)
17999 break;
18000 if (emit)
18001 output_asm_insn ("ldmib%?\t%1, %M0", otherops);
18002 return "";
18005 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18006 operands[1] = otherops[0];
18007 if (TARGET_LDRD
18008 && (REG_P (otherops[2])
18009 || TARGET_THUMB2
18010 || (CONST_INT_P (otherops[2])
18011 && INTVAL (otherops[2]) > -256
18012 && INTVAL (otherops[2]) < 256)))
18014 if (reg_overlap_mentioned_p (operands[0],
18015 otherops[2]))
18017 /* Swap base and index registers over to
18018 avoid a conflict. */
18019 std::swap (otherops[1], otherops[2]);
18021 /* If both registers conflict, it will usually
18022 have been fixed by a splitter. */
18023 if (reg_overlap_mentioned_p (operands[0], otherops[2])
18024 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18026 if (emit)
18028 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18029 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18031 if (count)
18032 *count = 2;
18034 else
18036 otherops[0] = operands[0];
18037 if (emit)
18038 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
18040 return "";
18043 if (CONST_INT_P (otherops[2]))
18045 if (emit)
18047 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18048 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18049 else
18050 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18053 else
18055 if (emit)
18056 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18059 else
18061 if (emit)
18062 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18065 if (count)
18066 *count = 2;
18068 if (TARGET_LDRD)
18069 return "ldrd%?\t%0, [%1]";
18071 return "ldmia%?\t%1, %M0";
18073 else
18075 otherops[1] = adjust_address (operands[1], SImode, 4);
18076 /* Take care of overlapping base/data reg. */
18077 if (reg_mentioned_p (operands[0], operands[1]))
18079 if (emit)
18081 output_asm_insn ("ldr%?\t%0, %1", otherops);
18082 output_asm_insn ("ldr%?\t%0, %1", operands);
18084 if (count)
18085 *count = 2;
18088 else
18090 if (emit)
18092 output_asm_insn ("ldr%?\t%0, %1", operands);
18093 output_asm_insn ("ldr%?\t%0, %1", otherops);
18095 if (count)
18096 *count = 2;
18101 else
18103 /* Constraints should ensure this. */
18104 gcc_assert (code0 == MEM && code1 == REG);
18105 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18106 || (TARGET_ARM && TARGET_LDRD));
18108 switch (GET_CODE (XEXP (operands[0], 0)))
18110 case REG:
18111 if (emit)
18113 if (TARGET_LDRD)
18114 output_asm_insn ("strd%?\t%1, [%m0]", operands);
18115 else
18116 output_asm_insn ("stm%?\t%m0, %M1", operands);
18118 break;
18120 case PRE_INC:
18121 gcc_assert (TARGET_LDRD);
18122 if (emit)
18123 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
18124 break;
18126 case PRE_DEC:
18127 if (emit)
18129 if (TARGET_LDRD)
18130 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
18131 else
18132 output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
18134 break;
18136 case POST_INC:
18137 if (emit)
18139 if (TARGET_LDRD)
18140 output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
18141 else
18142 output_asm_insn ("stm%?\t%m0!, %M1", operands);
18144 break;
18146 case POST_DEC:
18147 gcc_assert (TARGET_LDRD);
18148 if (emit)
18149 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
18150 break;
18152 case PRE_MODIFY:
18153 case POST_MODIFY:
18154 otherops[0] = operands[1];
18155 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18156 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18158 /* IWMMXT allows offsets larger than ldrd can handle,
18159 fix these up with a pair of ldr. */
18160 if (!TARGET_THUMB2
18161 && CONST_INT_P (otherops[2])
18162 && (INTVAL(otherops[2]) <= -256
18163 || INTVAL(otherops[2]) >= 256))
18165 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18167 if (emit)
18169 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18170 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18172 if (count)
18173 *count = 2;
18175 else
18177 if (emit)
18179 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18180 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18182 if (count)
18183 *count = 2;
18186 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18188 if (emit)
18189 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
18191 else
18193 if (emit)
18194 output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
18196 break;
18198 case PLUS:
18199 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18200 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18202 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18204 case -8:
18205 if (emit)
18206 output_asm_insn ("stmdb%?\t%m0, %M1", operands);
18207 return "";
18209 case -4:
18210 if (TARGET_THUMB2)
18211 break;
18212 if (emit)
18213 output_asm_insn ("stmda%?\t%m0, %M1", operands);
18214 return "";
18216 case 4:
18217 if (TARGET_THUMB2)
18218 break;
18219 if (emit)
18220 output_asm_insn ("stmib%?\t%m0, %M1", operands);
18221 return "";
18224 if (TARGET_LDRD
18225 && (REG_P (otherops[2])
18226 || TARGET_THUMB2
18227 || (CONST_INT_P (otherops[2])
18228 && INTVAL (otherops[2]) > -256
18229 && INTVAL (otherops[2]) < 256)))
18231 otherops[0] = operands[1];
18232 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18233 if (emit)
18234 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
18235 return "";
18237 /* Fall through */
18239 default:
18240 otherops[0] = adjust_address (operands[0], SImode, 4);
18241 otherops[1] = operands[1];
18242 if (emit)
18244 output_asm_insn ("str%?\t%1, %0", operands);
18245 output_asm_insn ("str%?\t%H1, %0", otherops);
18247 if (count)
18248 *count = 2;
18252 return "";
18255 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18256 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18258 const char *
18259 output_move_quad (rtx *operands)
18261 if (REG_P (operands[0]))
18263 /* Load, or reg->reg move. */
18265 if (MEM_P (operands[1]))
18267 switch (GET_CODE (XEXP (operands[1], 0)))
18269 case REG:
18270 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18271 break;
18273 case LABEL_REF:
18274 case CONST:
18275 output_asm_insn ("adr%?\t%0, %1", operands);
18276 output_asm_insn ("ldmia%?\t%0, %M0", operands);
18277 break;
18279 default:
18280 gcc_unreachable ();
18283 else
18285 rtx ops[2];
18286 int dest, src, i;
18288 gcc_assert (REG_P (operands[1]));
18290 dest = REGNO (operands[0]);
18291 src = REGNO (operands[1]);
18293 /* This seems pretty dumb, but hopefully GCC won't try to do it
18294 very often. */
18295 if (dest < src)
18296 for (i = 0; i < 4; i++)
18298 ops[0] = gen_rtx_REG (SImode, dest + i);
18299 ops[1] = gen_rtx_REG (SImode, src + i);
18300 output_asm_insn ("mov%?\t%0, %1", ops);
18302 else
18303 for (i = 3; i >= 0; i--)
18305 ops[0] = gen_rtx_REG (SImode, dest + i);
18306 ops[1] = gen_rtx_REG (SImode, src + i);
18307 output_asm_insn ("mov%?\t%0, %1", ops);
18311 else
18313 gcc_assert (MEM_P (operands[0]));
18314 gcc_assert (REG_P (operands[1]));
18315 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18317 switch (GET_CODE (XEXP (operands[0], 0)))
18319 case REG:
18320 output_asm_insn ("stm%?\t%m0, %M1", operands);
18321 break;
18323 default:
18324 gcc_unreachable ();
18328 return "";
18331 /* Output a VFP load or store instruction. */
18333 const char *
18334 output_move_vfp (rtx *operands)
18336 rtx reg, mem, addr, ops[2];
18337 int load = REG_P (operands[0]);
18338 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18339 int sp = (!TARGET_VFP_FP16INST
18340 || GET_MODE_SIZE (GET_MODE (operands[0])) == 4);
18341 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18342 const char *templ;
18343 char buff[50];
18344 machine_mode mode;
18346 reg = operands[!load];
18347 mem = operands[load];
18349 mode = GET_MODE (reg);
18351 gcc_assert (REG_P (reg));
18352 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18353 gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT)
18354 || mode == SFmode
18355 || mode == DFmode
18356 || mode == HImode
18357 || mode == SImode
18358 || mode == DImode
18359 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18360 gcc_assert (MEM_P (mem));
18362 addr = XEXP (mem, 0);
18364 switch (GET_CODE (addr))
18366 case PRE_DEC:
18367 templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18368 ops[0] = XEXP (addr, 0);
18369 ops[1] = reg;
18370 break;
18372 case POST_INC:
18373 templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18374 ops[0] = XEXP (addr, 0);
18375 ops[1] = reg;
18376 break;
18378 default:
18379 templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18380 ops[0] = reg;
18381 ops[1] = mem;
18382 break;
18385 sprintf (buff, templ,
18386 load ? "ld" : "st",
18387 dp ? "64" : sp ? "32" : "16",
18388 dp ? "P" : "",
18389 integer_p ? "\t%@ int" : "");
18390 output_asm_insn (buff, ops);
18392 return "";
18395 /* Output a Neon double-word or quad-word load or store, or a load
18396 or store for larger structure modes.
18398 WARNING: The ordering of elements is weird in big-endian mode,
18399 because the EABI requires that vectors stored in memory appear
18400 as though they were stored by a VSTM, as required by the EABI.
18401 GCC RTL defines element ordering based on in-memory order.
18402 This can be different from the architectural ordering of elements
18403 within a NEON register. The intrinsics defined in arm_neon.h use the
18404 NEON register element ordering, not the GCC RTL element ordering.
18406 For example, the in-memory ordering of a big-endian a quadword
18407 vector with 16-bit elements when stored from register pair {d0,d1}
18408 will be (lowest address first, d0[N] is NEON register element N):
18410 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18412 When necessary, quadword registers (dN, dN+1) are moved to ARM
18413 registers from rN in the order:
18415 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18417 So that STM/LDM can be used on vectors in ARM registers, and the
18418 same memory layout will result as if VSTM/VLDM were used.
18420 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18421 possible, which allows use of appropriate alignment tags.
18422 Note that the choice of "64" is independent of the actual vector
18423 element size; this size simply ensures that the behavior is
18424 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18426 Due to limitations of those instructions, use of VST1.64/VLD1.64
18427 is not possible if:
18428 - the address contains PRE_DEC, or
18429 - the mode refers to more than 4 double-word registers
18431 In those cases, it would be possible to replace VSTM/VLDM by a
18432 sequence of instructions; this is not currently implemented since
18433 this is not certain to actually improve performance. */
18435 const char *
18436 output_move_neon (rtx *operands)
18438 rtx reg, mem, addr, ops[2];
18439 int regno, nregs, load = REG_P (operands[0]);
18440 const char *templ;
18441 char buff[50];
18442 machine_mode mode;
18444 reg = operands[!load];
18445 mem = operands[load];
18447 mode = GET_MODE (reg);
18449 gcc_assert (REG_P (reg));
18450 regno = REGNO (reg);
18451 nregs = HARD_REGNO_NREGS (regno, mode) / 2;
18452 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18453 || NEON_REGNO_OK_FOR_QUAD (regno));
18454 gcc_assert (VALID_NEON_DREG_MODE (mode)
18455 || VALID_NEON_QREG_MODE (mode)
18456 || VALID_NEON_STRUCT_MODE (mode));
18457 gcc_assert (MEM_P (mem));
18459 addr = XEXP (mem, 0);
18461 /* Strip off const from addresses like (const (plus (...))). */
18462 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18463 addr = XEXP (addr, 0);
18465 switch (GET_CODE (addr))
18467 case POST_INC:
18468 /* We have to use vldm / vstm for too-large modes. */
18469 if (nregs > 4)
18471 templ = "v%smia%%?\t%%0!, %%h1";
18472 ops[0] = XEXP (addr, 0);
18474 else
18476 templ = "v%s1.64\t%%h1, %%A0";
18477 ops[0] = mem;
18479 ops[1] = reg;
18480 break;
18482 case PRE_DEC:
18483 /* We have to use vldm / vstm in this case, since there is no
18484 pre-decrement form of the vld1 / vst1 instructions. */
18485 templ = "v%smdb%%?\t%%0!, %%h1";
18486 ops[0] = XEXP (addr, 0);
18487 ops[1] = reg;
18488 break;
18490 case POST_MODIFY:
18491 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18492 gcc_unreachable ();
18494 case REG:
18495 /* We have to use vldm / vstm for too-large modes. */
18496 if (nregs > 1)
18498 if (nregs > 4)
18499 templ = "v%smia%%?\t%%m0, %%h1";
18500 else
18501 templ = "v%s1.64\t%%h1, %%A0";
18503 ops[0] = mem;
18504 ops[1] = reg;
18505 break;
18507 /* Fall through. */
18508 case LABEL_REF:
18509 case PLUS:
18511 int i;
18512 int overlap = -1;
18513 for (i = 0; i < nregs; i++)
18515 /* We're only using DImode here because it's a convenient size. */
18516 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18517 ops[1] = adjust_address (mem, DImode, 8 * i);
18518 if (reg_overlap_mentioned_p (ops[0], mem))
18520 gcc_assert (overlap == -1);
18521 overlap = i;
18523 else
18525 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18526 output_asm_insn (buff, ops);
18529 if (overlap != -1)
18531 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18532 ops[1] = adjust_address (mem, SImode, 8 * overlap);
18533 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18534 output_asm_insn (buff, ops);
18537 return "";
18540 default:
18541 gcc_unreachable ();
18544 sprintf (buff, templ, load ? "ld" : "st");
18545 output_asm_insn (buff, ops);
18547 return "";
18550 /* Compute and return the length of neon_mov<mode>, where <mode> is
18551 one of VSTRUCT modes: EI, OI, CI or XI. */
18553 arm_attr_length_move_neon (rtx_insn *insn)
18555 rtx reg, mem, addr;
18556 int load;
18557 machine_mode mode;
18559 extract_insn_cached (insn);
18561 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18563 mode = GET_MODE (recog_data.operand[0]);
18564 switch (mode)
18566 case EImode:
18567 case OImode:
18568 return 8;
18569 case CImode:
18570 return 12;
18571 case XImode:
18572 return 16;
18573 default:
18574 gcc_unreachable ();
18578 load = REG_P (recog_data.operand[0]);
18579 reg = recog_data.operand[!load];
18580 mem = recog_data.operand[load];
18582 gcc_assert (MEM_P (mem));
18584 mode = GET_MODE (reg);
18585 addr = XEXP (mem, 0);
18587 /* Strip off const from addresses like (const (plus (...))). */
18588 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18589 addr = XEXP (addr, 0);
18591 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18593 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
18594 return insns * 4;
18596 else
18597 return 4;
18600 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18601 return zero. */
18604 arm_address_offset_is_imm (rtx_insn *insn)
18606 rtx mem, addr;
18608 extract_insn_cached (insn);
18610 if (REG_P (recog_data.operand[0]))
18611 return 0;
18613 mem = recog_data.operand[0];
18615 gcc_assert (MEM_P (mem));
18617 addr = XEXP (mem, 0);
18619 if (REG_P (addr)
18620 || (GET_CODE (addr) == PLUS
18621 && REG_P (XEXP (addr, 0))
18622 && CONST_INT_P (XEXP (addr, 1))))
18623 return 1;
18624 else
18625 return 0;
18628 /* Output an ADD r, s, #n where n may be too big for one instruction.
18629 If adding zero to one register, output nothing. */
18630 const char *
18631 output_add_immediate (rtx *operands)
18633 HOST_WIDE_INT n = INTVAL (operands[2]);
18635 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18637 if (n < 0)
18638 output_multi_immediate (operands,
18639 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18640 -n);
18641 else
18642 output_multi_immediate (operands,
18643 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18647 return "";
18650 /* Output a multiple immediate operation.
18651 OPERANDS is the vector of operands referred to in the output patterns.
18652 INSTR1 is the output pattern to use for the first constant.
18653 INSTR2 is the output pattern to use for subsequent constants.
18654 IMMED_OP is the index of the constant slot in OPERANDS.
18655 N is the constant value. */
18656 static const char *
18657 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18658 int immed_op, HOST_WIDE_INT n)
18660 #if HOST_BITS_PER_WIDE_INT > 32
18661 n &= 0xffffffff;
18662 #endif
18664 if (n == 0)
18666 /* Quick and easy output. */
18667 operands[immed_op] = const0_rtx;
18668 output_asm_insn (instr1, operands);
18670 else
18672 int i;
18673 const char * instr = instr1;
18675 /* Note that n is never zero here (which would give no output). */
18676 for (i = 0; i < 32; i += 2)
18678 if (n & (3 << i))
18680 operands[immed_op] = GEN_INT (n & (255 << i));
18681 output_asm_insn (instr, operands);
18682 instr = instr2;
18683 i += 6;
18688 return "";
18691 /* Return the name of a shifter operation. */
18692 static const char *
18693 arm_shift_nmem(enum rtx_code code)
18695 switch (code)
18697 case ASHIFT:
18698 return ARM_LSL_NAME;
18700 case ASHIFTRT:
18701 return "asr";
18703 case LSHIFTRT:
18704 return "lsr";
18706 case ROTATERT:
18707 return "ror";
18709 default:
18710 abort();
18714 /* Return the appropriate ARM instruction for the operation code.
18715 The returned result should not be overwritten. OP is the rtx of the
18716 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18717 was shifted. */
18718 const char *
18719 arithmetic_instr (rtx op, int shift_first_arg)
18721 switch (GET_CODE (op))
18723 case PLUS:
18724 return "add";
18726 case MINUS:
18727 return shift_first_arg ? "rsb" : "sub";
18729 case IOR:
18730 return "orr";
18732 case XOR:
18733 return "eor";
18735 case AND:
18736 return "and";
18738 case ASHIFT:
18739 case ASHIFTRT:
18740 case LSHIFTRT:
18741 case ROTATERT:
18742 return arm_shift_nmem(GET_CODE(op));
18744 default:
18745 gcc_unreachable ();
18749 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18750 for the operation code. The returned result should not be overwritten.
18751 OP is the rtx code of the shift.
18752 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18753 shift. */
18754 static const char *
18755 shift_op (rtx op, HOST_WIDE_INT *amountp)
18757 const char * mnem;
18758 enum rtx_code code = GET_CODE (op);
18760 switch (code)
18762 case ROTATE:
18763 if (!CONST_INT_P (XEXP (op, 1)))
18765 output_operand_lossage ("invalid shift operand");
18766 return NULL;
18769 code = ROTATERT;
18770 *amountp = 32 - INTVAL (XEXP (op, 1));
18771 mnem = "ror";
18772 break;
18774 case ASHIFT:
18775 case ASHIFTRT:
18776 case LSHIFTRT:
18777 case ROTATERT:
18778 mnem = arm_shift_nmem(code);
18779 if (CONST_INT_P (XEXP (op, 1)))
18781 *amountp = INTVAL (XEXP (op, 1));
18783 else if (REG_P (XEXP (op, 1)))
18785 *amountp = -1;
18786 return mnem;
18788 else
18790 output_operand_lossage ("invalid shift operand");
18791 return NULL;
18793 break;
18795 case MULT:
18796 /* We never have to worry about the amount being other than a
18797 power of 2, since this case can never be reloaded from a reg. */
18798 if (!CONST_INT_P (XEXP (op, 1)))
18800 output_operand_lossage ("invalid shift operand");
18801 return NULL;
18804 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
18806 /* Amount must be a power of two. */
18807 if (*amountp & (*amountp - 1))
18809 output_operand_lossage ("invalid shift operand");
18810 return NULL;
18813 *amountp = exact_log2 (*amountp);
18814 gcc_assert (IN_RANGE (*amountp, 0, 31));
18815 return ARM_LSL_NAME;
18817 default:
18818 output_operand_lossage ("invalid shift operand");
18819 return NULL;
18822 /* This is not 100% correct, but follows from the desire to merge
18823 multiplication by a power of 2 with the recognizer for a
18824 shift. >=32 is not a valid shift for "lsl", so we must try and
18825 output a shift that produces the correct arithmetical result.
18826 Using lsr #32 is identical except for the fact that the carry bit
18827 is not set correctly if we set the flags; but we never use the
18828 carry bit from such an operation, so we can ignore that. */
18829 if (code == ROTATERT)
18830 /* Rotate is just modulo 32. */
18831 *amountp &= 31;
18832 else if (*amountp != (*amountp & 31))
18834 if (code == ASHIFT)
18835 mnem = "lsr";
18836 *amountp = 32;
18839 /* Shifts of 0 are no-ops. */
18840 if (*amountp == 0)
18841 return NULL;
18843 return mnem;
18846 /* Output a .ascii pseudo-op, keeping track of lengths. This is
18847 because /bin/as is horribly restrictive. The judgement about
18848 whether or not each character is 'printable' (and can be output as
18849 is) or not (and must be printed with an octal escape) must be made
18850 with reference to the *host* character set -- the situation is
18851 similar to that discussed in the comments above pp_c_char in
18852 c-pretty-print.c. */
18854 #define MAX_ASCII_LEN 51
18856 void
18857 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
18859 int i;
18860 int len_so_far = 0;
18862 fputs ("\t.ascii\t\"", stream);
18864 for (i = 0; i < len; i++)
18866 int c = p[i];
18868 if (len_so_far >= MAX_ASCII_LEN)
18870 fputs ("\"\n\t.ascii\t\"", stream);
18871 len_so_far = 0;
18874 if (ISPRINT (c))
18876 if (c == '\\' || c == '\"')
18878 putc ('\\', stream);
18879 len_so_far++;
18881 putc (c, stream);
18882 len_so_far++;
18884 else
18886 fprintf (stream, "\\%03o", c);
18887 len_so_far += 4;
18891 fputs ("\"\n", stream);
18894 /* Whether a register is callee saved or not. This is necessary because high
18895 registers are marked as caller saved when optimizing for size on Thumb-1
18896 targets despite being callee saved in order to avoid using them. */
18897 #define callee_saved_reg_p(reg) \
18898 (!call_used_regs[reg] \
18899 || (TARGET_THUMB1 && optimize_size \
18900 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
18902 /* Compute the register save mask for registers 0 through 12
18903 inclusive. This code is used by arm_compute_save_reg_mask. */
18905 static unsigned long
18906 arm_compute_save_reg0_reg12_mask (void)
18908 unsigned long func_type = arm_current_func_type ();
18909 unsigned long save_reg_mask = 0;
18910 unsigned int reg;
18912 if (IS_INTERRUPT (func_type))
18914 unsigned int max_reg;
18915 /* Interrupt functions must not corrupt any registers,
18916 even call clobbered ones. If this is a leaf function
18917 we can just examine the registers used by the RTL, but
18918 otherwise we have to assume that whatever function is
18919 called might clobber anything, and so we have to save
18920 all the call-clobbered registers as well. */
18921 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
18922 /* FIQ handlers have registers r8 - r12 banked, so
18923 we only need to check r0 - r7, Normal ISRs only
18924 bank r14 and r15, so we must check up to r12.
18925 r13 is the stack pointer which is always preserved,
18926 so we do not need to consider it here. */
18927 max_reg = 7;
18928 else
18929 max_reg = 12;
18931 for (reg = 0; reg <= max_reg; reg++)
18932 if (df_regs_ever_live_p (reg)
18933 || (! crtl->is_leaf && call_used_regs[reg]))
18934 save_reg_mask |= (1 << reg);
18936 /* Also save the pic base register if necessary. */
18937 if (flag_pic
18938 && !TARGET_SINGLE_PIC_BASE
18939 && arm_pic_register != INVALID_REGNUM
18940 && crtl->uses_pic_offset_table)
18941 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
18943 else if (IS_VOLATILE(func_type))
18945 /* For noreturn functions we historically omitted register saves
18946 altogether. However this really messes up debugging. As a
18947 compromise save just the frame pointers. Combined with the link
18948 register saved elsewhere this should be sufficient to get
18949 a backtrace. */
18950 if (frame_pointer_needed)
18951 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
18952 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
18953 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
18954 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
18955 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
18957 else
18959 /* In the normal case we only need to save those registers
18960 which are call saved and which are used by this function. */
18961 for (reg = 0; reg <= 11; reg++)
18962 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
18963 save_reg_mask |= (1 << reg);
18965 /* Handle the frame pointer as a special case. */
18966 if (frame_pointer_needed)
18967 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
18969 /* If we aren't loading the PIC register,
18970 don't stack it even though it may be live. */
18971 if (flag_pic
18972 && !TARGET_SINGLE_PIC_BASE
18973 && arm_pic_register != INVALID_REGNUM
18974 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
18975 || crtl->uses_pic_offset_table))
18976 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
18978 /* The prologue will copy SP into R0, so save it. */
18979 if (IS_STACKALIGN (func_type))
18980 save_reg_mask |= 1;
18983 /* Save registers so the exception handler can modify them. */
18984 if (crtl->calls_eh_return)
18986 unsigned int i;
18988 for (i = 0; ; i++)
18990 reg = EH_RETURN_DATA_REGNO (i);
18991 if (reg == INVALID_REGNUM)
18992 break;
18993 save_reg_mask |= 1 << reg;
18997 return save_reg_mask;
19000 /* Return true if r3 is live at the start of the function. */
19002 static bool
19003 arm_r3_live_at_start_p (void)
19005 /* Just look at cfg info, which is still close enough to correct at this
19006 point. This gives false positives for broken functions that might use
19007 uninitialized data that happens to be allocated in r3, but who cares? */
19008 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19011 /* Compute the number of bytes used to store the static chain register on the
19012 stack, above the stack frame. We need to know this accurately to get the
19013 alignment of the rest of the stack frame correct. */
19015 static int
19016 arm_compute_static_chain_stack_bytes (void)
19018 /* See the defining assertion in arm_expand_prologue. */
19019 if (IS_NESTED (arm_current_func_type ())
19020 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19021 || (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
19022 && !df_regs_ever_live_p (LR_REGNUM)))
19023 && arm_r3_live_at_start_p ()
19024 && crtl->args.pretend_args_size == 0)
19025 return 4;
19027 return 0;
19030 /* Compute a bit mask of which registers need to be
19031 saved on the stack for the current function.
19032 This is used by arm_get_frame_offsets, which may add extra registers. */
19034 static unsigned long
19035 arm_compute_save_reg_mask (void)
19037 unsigned int save_reg_mask = 0;
19038 unsigned long func_type = arm_current_func_type ();
19039 unsigned int reg;
19041 if (IS_NAKED (func_type))
19042 /* This should never really happen. */
19043 return 0;
19045 /* If we are creating a stack frame, then we must save the frame pointer,
19046 IP (which will hold the old stack pointer), LR and the PC. */
19047 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19048 save_reg_mask |=
19049 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19050 | (1 << IP_REGNUM)
19051 | (1 << LR_REGNUM)
19052 | (1 << PC_REGNUM);
19054 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19056 /* Decide if we need to save the link register.
19057 Interrupt routines have their own banked link register,
19058 so they never need to save it.
19059 Otherwise if we do not use the link register we do not need to save
19060 it. If we are pushing other registers onto the stack however, we
19061 can save an instruction in the epilogue by pushing the link register
19062 now and then popping it back into the PC. This incurs extra memory
19063 accesses though, so we only do it when optimizing for size, and only
19064 if we know that we will not need a fancy return sequence. */
19065 if (df_regs_ever_live_p (LR_REGNUM)
19066 || (save_reg_mask
19067 && optimize_size
19068 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19069 && !crtl->tail_call_emit
19070 && !crtl->calls_eh_return))
19071 save_reg_mask |= 1 << LR_REGNUM;
19073 if (cfun->machine->lr_save_eliminated)
19074 save_reg_mask &= ~ (1 << LR_REGNUM);
19076 if (TARGET_REALLY_IWMMXT
19077 && ((bit_count (save_reg_mask)
19078 + ARM_NUM_INTS (crtl->args.pretend_args_size +
19079 arm_compute_static_chain_stack_bytes())
19080 ) % 2) != 0)
19082 /* The total number of registers that are going to be pushed
19083 onto the stack is odd. We need to ensure that the stack
19084 is 64-bit aligned before we start to save iWMMXt registers,
19085 and also before we start to create locals. (A local variable
19086 might be a double or long long which we will load/store using
19087 an iWMMXt instruction). Therefore we need to push another
19088 ARM register, so that the stack will be 64-bit aligned. We
19089 try to avoid using the arg registers (r0 -r3) as they might be
19090 used to pass values in a tail call. */
19091 for (reg = 4; reg <= 12; reg++)
19092 if ((save_reg_mask & (1 << reg)) == 0)
19093 break;
19095 if (reg <= 12)
19096 save_reg_mask |= (1 << reg);
19097 else
19099 cfun->machine->sibcall_blocked = 1;
19100 save_reg_mask |= (1 << 3);
19104 /* We may need to push an additional register for use initializing the
19105 PIC base register. */
19106 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19107 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19109 reg = thumb_find_work_register (1 << 4);
19110 if (!call_used_regs[reg])
19111 save_reg_mask |= (1 << reg);
19114 return save_reg_mask;
19117 /* Compute a bit mask of which registers need to be
19118 saved on the stack for the current function. */
19119 static unsigned long
19120 thumb1_compute_save_reg_mask (void)
19122 unsigned long mask;
19123 unsigned reg;
19125 mask = 0;
19126 for (reg = 0; reg < 12; reg ++)
19127 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19128 mask |= 1 << reg;
19130 /* Handle the frame pointer as a special case. */
19131 if (frame_pointer_needed)
19132 mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19134 if (flag_pic
19135 && !TARGET_SINGLE_PIC_BASE
19136 && arm_pic_register != INVALID_REGNUM
19137 && crtl->uses_pic_offset_table)
19138 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19140 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19141 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19142 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19144 /* LR will also be pushed if any lo regs are pushed. */
19145 if (mask & 0xff || thumb_force_lr_save ())
19146 mask |= (1 << LR_REGNUM);
19148 /* Make sure we have a low work register if we need one.
19149 We will need one if we are going to push a high register,
19150 but we are not currently intending to push a low register. */
19151 if ((mask & 0xff) == 0
19152 && ((mask & 0x0f00) || TARGET_BACKTRACE))
19154 /* Use thumb_find_work_register to choose which register
19155 we will use. If the register is live then we will
19156 have to push it. Use LAST_LO_REGNUM as our fallback
19157 choice for the register to select. */
19158 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19159 /* Make sure the register returned by thumb_find_work_register is
19160 not part of the return value. */
19161 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19162 reg = LAST_LO_REGNUM;
19164 if (callee_saved_reg_p (reg))
19165 mask |= 1 << reg;
19168 /* The 504 below is 8 bytes less than 512 because there are two possible
19169 alignment words. We can't tell here if they will be present or not so we
19170 have to play it safe and assume that they are. */
19171 if ((CALLER_INTERWORKING_SLOT_SIZE +
19172 ROUND_UP_WORD (get_frame_size ()) +
19173 crtl->outgoing_args_size) >= 504)
19175 /* This is the same as the code in thumb1_expand_prologue() which
19176 determines which register to use for stack decrement. */
19177 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19178 if (mask & (1 << reg))
19179 break;
19181 if (reg > LAST_LO_REGNUM)
19183 /* Make sure we have a register available for stack decrement. */
19184 mask |= 1 << LAST_LO_REGNUM;
19188 return mask;
19192 /* Return the number of bytes required to save VFP registers. */
19193 static int
19194 arm_get_vfp_saved_size (void)
19196 unsigned int regno;
19197 int count;
19198 int saved;
19200 saved = 0;
19201 /* Space for saved VFP registers. */
19202 if (TARGET_HARD_FLOAT)
19204 count = 0;
19205 for (regno = FIRST_VFP_REGNUM;
19206 regno < LAST_VFP_REGNUM;
19207 regno += 2)
19209 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19210 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19212 if (count > 0)
19214 /* Workaround ARM10 VFPr1 bug. */
19215 if (count == 2 && !arm_arch6)
19216 count++;
19217 saved += count * 8;
19219 count = 0;
19221 else
19222 count++;
19224 if (count > 0)
19226 if (count == 2 && !arm_arch6)
19227 count++;
19228 saved += count * 8;
19231 return saved;
19235 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19236 everything bar the final return instruction. If simple_return is true,
19237 then do not output epilogue, because it has already been emitted in RTL. */
19238 const char *
19239 output_return_instruction (rtx operand, bool really_return, bool reverse,
19240 bool simple_return)
19242 char conditional[10];
19243 char instr[100];
19244 unsigned reg;
19245 unsigned long live_regs_mask;
19246 unsigned long func_type;
19247 arm_stack_offsets *offsets;
19249 func_type = arm_current_func_type ();
19251 if (IS_NAKED (func_type))
19252 return "";
19254 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19256 /* If this function was declared non-returning, and we have
19257 found a tail call, then we have to trust that the called
19258 function won't return. */
19259 if (really_return)
19261 rtx ops[2];
19263 /* Otherwise, trap an attempted return by aborting. */
19264 ops[0] = operand;
19265 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19266 : "abort");
19267 assemble_external_libcall (ops[1]);
19268 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19271 return "";
19274 gcc_assert (!cfun->calls_alloca || really_return);
19276 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19278 cfun->machine->return_used_this_function = 1;
19280 offsets = arm_get_frame_offsets ();
19281 live_regs_mask = offsets->saved_regs_mask;
19283 if (!simple_return && live_regs_mask)
19285 const char * return_reg;
19287 /* If we do not have any special requirements for function exit
19288 (e.g. interworking) then we can load the return address
19289 directly into the PC. Otherwise we must load it into LR. */
19290 if (really_return
19291 && !IS_CMSE_ENTRY (func_type)
19292 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19293 return_reg = reg_names[PC_REGNUM];
19294 else
19295 return_reg = reg_names[LR_REGNUM];
19297 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19299 /* There are three possible reasons for the IP register
19300 being saved. 1) a stack frame was created, in which case
19301 IP contains the old stack pointer, or 2) an ISR routine
19302 corrupted it, or 3) it was saved to align the stack on
19303 iWMMXt. In case 1, restore IP into SP, otherwise just
19304 restore IP. */
19305 if (frame_pointer_needed)
19307 live_regs_mask &= ~ (1 << IP_REGNUM);
19308 live_regs_mask |= (1 << SP_REGNUM);
19310 else
19311 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19314 /* On some ARM architectures it is faster to use LDR rather than
19315 LDM to load a single register. On other architectures, the
19316 cost is the same. In 26 bit mode, or for exception handlers,
19317 we have to use LDM to load the PC so that the CPSR is also
19318 restored. */
19319 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19320 if (live_regs_mask == (1U << reg))
19321 break;
19323 if (reg <= LAST_ARM_REGNUM
19324 && (reg != LR_REGNUM
19325 || ! really_return
19326 || ! IS_INTERRUPT (func_type)))
19328 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19329 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19331 else
19333 char *p;
19334 int first = 1;
19336 /* Generate the load multiple instruction to restore the
19337 registers. Note we can get here, even if
19338 frame_pointer_needed is true, but only if sp already
19339 points to the base of the saved core registers. */
19340 if (live_regs_mask & (1 << SP_REGNUM))
19342 unsigned HOST_WIDE_INT stack_adjust;
19344 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19345 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19347 if (stack_adjust && arm_arch5 && TARGET_ARM)
19348 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19349 else
19351 /* If we can't use ldmib (SA110 bug),
19352 then try to pop r3 instead. */
19353 if (stack_adjust)
19354 live_regs_mask |= 1 << 3;
19356 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19359 /* For interrupt returns we have to use an LDM rather than
19360 a POP so that we can use the exception return variant. */
19361 else if (IS_INTERRUPT (func_type))
19362 sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
19363 else
19364 sprintf (instr, "pop%s\t{", conditional);
19366 p = instr + strlen (instr);
19368 for (reg = 0; reg <= SP_REGNUM; reg++)
19369 if (live_regs_mask & (1 << reg))
19371 int l = strlen (reg_names[reg]);
19373 if (first)
19374 first = 0;
19375 else
19377 memcpy (p, ", ", 2);
19378 p += 2;
19381 memcpy (p, "%|", 2);
19382 memcpy (p + 2, reg_names[reg], l);
19383 p += l + 2;
19386 if (live_regs_mask & (1 << LR_REGNUM))
19388 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19389 /* If returning from an interrupt, restore the CPSR. */
19390 if (IS_INTERRUPT (func_type))
19391 strcat (p, "^");
19393 else
19394 strcpy (p, "}");
19397 output_asm_insn (instr, & operand);
19399 /* See if we need to generate an extra instruction to
19400 perform the actual function return. */
19401 if (really_return
19402 && func_type != ARM_FT_INTERWORKED
19403 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19405 /* The return has already been handled
19406 by loading the LR into the PC. */
19407 return "";
19411 if (really_return)
19413 switch ((int) ARM_FUNC_TYPE (func_type))
19415 case ARM_FT_ISR:
19416 case ARM_FT_FIQ:
19417 /* ??? This is wrong for unified assembly syntax. */
19418 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19419 break;
19421 case ARM_FT_INTERWORKED:
19422 gcc_assert (arm_arch5 || arm_arch4t);
19423 sprintf (instr, "bx%s\t%%|lr", conditional);
19424 break;
19426 case ARM_FT_EXCEPTION:
19427 /* ??? This is wrong for unified assembly syntax. */
19428 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19429 break;
19431 default:
19432 if (IS_CMSE_ENTRY (func_type))
19434 /* Check if we have to clear the 'GE bits' which is only used if
19435 parallel add and subtraction instructions are available. */
19436 if (TARGET_INT_SIMD)
19437 snprintf (instr, sizeof (instr),
19438 "msr%s\tAPSR_nzcvqg, %%|lr", conditional);
19439 else
19440 snprintf (instr, sizeof (instr),
19441 "msr%s\tAPSR_nzcvq, %%|lr", conditional);
19443 output_asm_insn (instr, & operand);
19444 if (TARGET_HARD_FLOAT && !TARGET_THUMB1)
19446 /* Clear the cumulative exception-status bits (0-4,7) and the
19447 condition code bits (28-31) of the FPSCR. We need to
19448 remember to clear the first scratch register used (IP) and
19449 save and restore the second (r4). */
19450 snprintf (instr, sizeof (instr), "push\t{%%|r4}");
19451 output_asm_insn (instr, & operand);
19452 snprintf (instr, sizeof (instr), "vmrs\t%%|ip, fpscr");
19453 output_asm_insn (instr, & operand);
19454 snprintf (instr, sizeof (instr), "movw\t%%|r4, #65376");
19455 output_asm_insn (instr, & operand);
19456 snprintf (instr, sizeof (instr), "movt\t%%|r4, #4095");
19457 output_asm_insn (instr, & operand);
19458 snprintf (instr, sizeof (instr), "and\t%%|ip, %%|r4");
19459 output_asm_insn (instr, & operand);
19460 snprintf (instr, sizeof (instr), "vmsr\tfpscr, %%|ip");
19461 output_asm_insn (instr, & operand);
19462 snprintf (instr, sizeof (instr), "pop\t{%%|r4}");
19463 output_asm_insn (instr, & operand);
19464 snprintf (instr, sizeof (instr), "mov\t%%|ip, %%|lr");
19465 output_asm_insn (instr, & operand);
19467 snprintf (instr, sizeof (instr), "bxns\t%%|lr");
19469 /* Use bx if it's available. */
19470 else if (arm_arch5 || arm_arch4t)
19471 sprintf (instr, "bx%s\t%%|lr", conditional);
19472 else
19473 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19474 break;
19477 output_asm_insn (instr, & operand);
19480 return "";
19483 /* Output in FILE asm statements needed to declare the NAME of the function
19484 defined by its DECL node. */
19486 void
19487 arm_asm_declare_function_name (FILE *file, const char *name, tree decl)
19489 size_t cmse_name_len;
19490 char *cmse_name = 0;
19491 char cmse_prefix[] = "__acle_se_";
19493 /* When compiling with ARMv8-M Security Extensions enabled, we should print an
19494 extra function label for each function with the 'cmse_nonsecure_entry'
19495 attribute. This extra function label should be prepended with
19496 '__acle_se_', telling the linker that it needs to create secure gateway
19497 veneers for this function. */
19498 if (use_cmse && lookup_attribute ("cmse_nonsecure_entry",
19499 DECL_ATTRIBUTES (decl)))
19501 cmse_name_len = sizeof (cmse_prefix) + strlen (name);
19502 cmse_name = XALLOCAVEC (char, cmse_name_len);
19503 snprintf (cmse_name, cmse_name_len, "%s%s", cmse_prefix, name);
19504 targetm.asm_out.globalize_label (file, cmse_name);
19506 ARM_DECLARE_FUNCTION_NAME (file, cmse_name, decl);
19507 ASM_OUTPUT_TYPE_DIRECTIVE (file, cmse_name, "function");
19510 ARM_DECLARE_FUNCTION_NAME (file, name, decl);
19511 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
19512 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
19513 ASM_OUTPUT_LABEL (file, name);
19515 if (cmse_name)
19516 ASM_OUTPUT_LABEL (file, cmse_name);
19518 ARM_OUTPUT_FN_UNWIND (file, TRUE);
19521 /* Write the function name into the code section, directly preceding
19522 the function prologue.
19524 Code will be output similar to this:
19526 .ascii "arm_poke_function_name", 0
19527 .align
19529 .word 0xff000000 + (t1 - t0)
19530 arm_poke_function_name
19531 mov ip, sp
19532 stmfd sp!, {fp, ip, lr, pc}
19533 sub fp, ip, #4
19535 When performing a stack backtrace, code can inspect the value
19536 of 'pc' stored at 'fp' + 0. If the trace function then looks
19537 at location pc - 12 and the top 8 bits are set, then we know
19538 that there is a function name embedded immediately preceding this
19539 location and has length ((pc[-3]) & 0xff000000).
19541 We assume that pc is declared as a pointer to an unsigned long.
19543 It is of no benefit to output the function name if we are assembling
19544 a leaf function. These function types will not contain a stack
19545 backtrace structure, therefore it is not possible to determine the
19546 function name. */
19547 void
19548 arm_poke_function_name (FILE *stream, const char *name)
19550 unsigned long alignlength;
19551 unsigned long length;
19552 rtx x;
19554 length = strlen (name) + 1;
19555 alignlength = ROUND_UP_WORD (length);
19557 ASM_OUTPUT_ASCII (stream, name, length);
19558 ASM_OUTPUT_ALIGN (stream, 2);
19559 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19560 assemble_aligned_integer (UNITS_PER_WORD, x);
19563 /* Place some comments into the assembler stream
19564 describing the current function. */
19565 static void
19566 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
19568 unsigned long func_type;
19570 /* Sanity check. */
19571 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19573 func_type = arm_current_func_type ();
19575 switch ((int) ARM_FUNC_TYPE (func_type))
19577 default:
19578 case ARM_FT_NORMAL:
19579 break;
19580 case ARM_FT_INTERWORKED:
19581 asm_fprintf (f, "\t%@ Function supports interworking.\n");
19582 break;
19583 case ARM_FT_ISR:
19584 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19585 break;
19586 case ARM_FT_FIQ:
19587 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19588 break;
19589 case ARM_FT_EXCEPTION:
19590 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19591 break;
19594 if (IS_NAKED (func_type))
19595 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19597 if (IS_VOLATILE (func_type))
19598 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19600 if (IS_NESTED (func_type))
19601 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19602 if (IS_STACKALIGN (func_type))
19603 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19604 if (IS_CMSE_ENTRY (func_type))
19605 asm_fprintf (f, "\t%@ Non-secure entry function: called from non-secure code.\n");
19607 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19608 crtl->args.size,
19609 crtl->args.pretend_args_size, frame_size);
19611 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19612 frame_pointer_needed,
19613 cfun->machine->uses_anonymous_args);
19615 if (cfun->machine->lr_save_eliminated)
19616 asm_fprintf (f, "\t%@ link register save eliminated.\n");
19618 if (crtl->calls_eh_return)
19619 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19623 static void
19624 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
19625 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
19627 arm_stack_offsets *offsets;
19629 if (TARGET_THUMB1)
19631 int regno;
19633 /* Emit any call-via-reg trampolines that are needed for v4t support
19634 of call_reg and call_value_reg type insns. */
19635 for (regno = 0; regno < LR_REGNUM; regno++)
19637 rtx label = cfun->machine->call_via[regno];
19639 if (label != NULL)
19641 switch_to_section (function_section (current_function_decl));
19642 targetm.asm_out.internal_label (asm_out_file, "L",
19643 CODE_LABEL_NUMBER (label));
19644 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19648 /* ??? Probably not safe to set this here, since it assumes that a
19649 function will be emitted as assembly immediately after we generate
19650 RTL for it. This does not happen for inline functions. */
19651 cfun->machine->return_used_this_function = 0;
19653 else /* TARGET_32BIT */
19655 /* We need to take into account any stack-frame rounding. */
19656 offsets = arm_get_frame_offsets ();
19658 gcc_assert (!use_return_insn (FALSE, NULL)
19659 || (cfun->machine->return_used_this_function != 0)
19660 || offsets->saved_regs == offsets->outgoing_args
19661 || frame_pointer_needed);
19665 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19666 STR and STRD. If an even number of registers are being pushed, one
19667 or more STRD patterns are created for each register pair. If an
19668 odd number of registers are pushed, emit an initial STR followed by
19669 as many STRD instructions as are needed. This works best when the
19670 stack is initially 64-bit aligned (the normal case), since it
19671 ensures that each STRD is also 64-bit aligned. */
19672 static void
19673 thumb2_emit_strd_push (unsigned long saved_regs_mask)
19675 int num_regs = 0;
19676 int i;
19677 int regno;
19678 rtx par = NULL_RTX;
19679 rtx dwarf = NULL_RTX;
19680 rtx tmp;
19681 bool first = true;
19683 num_regs = bit_count (saved_regs_mask);
19685 /* Must be at least one register to save, and can't save SP or PC. */
19686 gcc_assert (num_regs > 0 && num_regs <= 14);
19687 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19688 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19690 /* Create sequence for DWARF info. All the frame-related data for
19691 debugging is held in this wrapper. */
19692 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19694 /* Describe the stack adjustment. */
19695 tmp = gen_rtx_SET (stack_pointer_rtx,
19696 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19697 RTX_FRAME_RELATED_P (tmp) = 1;
19698 XVECEXP (dwarf, 0, 0) = tmp;
19700 /* Find the first register. */
19701 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19704 i = 0;
19706 /* If there's an odd number of registers to push. Start off by
19707 pushing a single register. This ensures that subsequent strd
19708 operations are dword aligned (assuming that SP was originally
19709 64-bit aligned). */
19710 if ((num_regs & 1) != 0)
19712 rtx reg, mem, insn;
19714 reg = gen_rtx_REG (SImode, regno);
19715 if (num_regs == 1)
19716 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19717 stack_pointer_rtx));
19718 else
19719 mem = gen_frame_mem (Pmode,
19720 gen_rtx_PRE_MODIFY
19721 (Pmode, stack_pointer_rtx,
19722 plus_constant (Pmode, stack_pointer_rtx,
19723 -4 * num_regs)));
19725 tmp = gen_rtx_SET (mem, reg);
19726 RTX_FRAME_RELATED_P (tmp) = 1;
19727 insn = emit_insn (tmp);
19728 RTX_FRAME_RELATED_P (insn) = 1;
19729 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19730 tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
19731 RTX_FRAME_RELATED_P (tmp) = 1;
19732 i++;
19733 regno++;
19734 XVECEXP (dwarf, 0, i) = tmp;
19735 first = false;
19738 while (i < num_regs)
19739 if (saved_regs_mask & (1 << regno))
19741 rtx reg1, reg2, mem1, mem2;
19742 rtx tmp0, tmp1, tmp2;
19743 int regno2;
19745 /* Find the register to pair with this one. */
19746 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
19747 regno2++)
19750 reg1 = gen_rtx_REG (SImode, regno);
19751 reg2 = gen_rtx_REG (SImode, regno2);
19753 if (first)
19755 rtx insn;
19757 first = false;
19758 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19759 stack_pointer_rtx,
19760 -4 * num_regs));
19761 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19762 stack_pointer_rtx,
19763 -4 * (num_regs - 1)));
19764 tmp0 = gen_rtx_SET (stack_pointer_rtx,
19765 plus_constant (Pmode, stack_pointer_rtx,
19766 -4 * (num_regs)));
19767 tmp1 = gen_rtx_SET (mem1, reg1);
19768 tmp2 = gen_rtx_SET (mem2, reg2);
19769 RTX_FRAME_RELATED_P (tmp0) = 1;
19770 RTX_FRAME_RELATED_P (tmp1) = 1;
19771 RTX_FRAME_RELATED_P (tmp2) = 1;
19772 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
19773 XVECEXP (par, 0, 0) = tmp0;
19774 XVECEXP (par, 0, 1) = tmp1;
19775 XVECEXP (par, 0, 2) = tmp2;
19776 insn = emit_insn (par);
19777 RTX_FRAME_RELATED_P (insn) = 1;
19778 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19780 else
19782 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19783 stack_pointer_rtx,
19784 4 * i));
19785 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19786 stack_pointer_rtx,
19787 4 * (i + 1)));
19788 tmp1 = gen_rtx_SET (mem1, reg1);
19789 tmp2 = gen_rtx_SET (mem2, reg2);
19790 RTX_FRAME_RELATED_P (tmp1) = 1;
19791 RTX_FRAME_RELATED_P (tmp2) = 1;
19792 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19793 XVECEXP (par, 0, 0) = tmp1;
19794 XVECEXP (par, 0, 1) = tmp2;
19795 emit_insn (par);
19798 /* Create unwind information. This is an approximation. */
19799 tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
19800 plus_constant (Pmode,
19801 stack_pointer_rtx,
19802 4 * i)),
19803 reg1);
19804 tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
19805 plus_constant (Pmode,
19806 stack_pointer_rtx,
19807 4 * (i + 1))),
19808 reg2);
19810 RTX_FRAME_RELATED_P (tmp1) = 1;
19811 RTX_FRAME_RELATED_P (tmp2) = 1;
19812 XVECEXP (dwarf, 0, i + 1) = tmp1;
19813 XVECEXP (dwarf, 0, i + 2) = tmp2;
19814 i += 2;
19815 regno = regno2 + 1;
19817 else
19818 regno++;
19820 return;
19823 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19824 whenever possible, otherwise it emits single-word stores. The first store
19825 also allocates stack space for all saved registers, using writeback with
19826 post-addressing mode. All other stores use offset addressing. If no STRD
19827 can be emitted, this function emits a sequence of single-word stores,
19828 and not an STM as before, because single-word stores provide more freedom
19829 scheduling and can be turned into an STM by peephole optimizations. */
19830 static void
19831 arm_emit_strd_push (unsigned long saved_regs_mask)
19833 int num_regs = 0;
19834 int i, j, dwarf_index = 0;
19835 int offset = 0;
19836 rtx dwarf = NULL_RTX;
19837 rtx insn = NULL_RTX;
19838 rtx tmp, mem;
19840 /* TODO: A more efficient code can be emitted by changing the
19841 layout, e.g., first push all pairs that can use STRD to keep the
19842 stack aligned, and then push all other registers. */
19843 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19844 if (saved_regs_mask & (1 << i))
19845 num_regs++;
19847 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19848 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19849 gcc_assert (num_regs > 0);
19851 /* Create sequence for DWARF info. */
19852 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19854 /* For dwarf info, we generate explicit stack update. */
19855 tmp = gen_rtx_SET (stack_pointer_rtx,
19856 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19857 RTX_FRAME_RELATED_P (tmp) = 1;
19858 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19860 /* Save registers. */
19861 offset = - 4 * num_regs;
19862 j = 0;
19863 while (j <= LAST_ARM_REGNUM)
19864 if (saved_regs_mask & (1 << j))
19866 if ((j % 2 == 0)
19867 && (saved_regs_mask & (1 << (j + 1))))
19869 /* Current register and previous register form register pair for
19870 which STRD can be generated. */
19871 if (offset < 0)
19873 /* Allocate stack space for all saved registers. */
19874 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19875 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19876 mem = gen_frame_mem (DImode, tmp);
19877 offset = 0;
19879 else if (offset > 0)
19880 mem = gen_frame_mem (DImode,
19881 plus_constant (Pmode,
19882 stack_pointer_rtx,
19883 offset));
19884 else
19885 mem = gen_frame_mem (DImode, stack_pointer_rtx);
19887 tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
19888 RTX_FRAME_RELATED_P (tmp) = 1;
19889 tmp = emit_insn (tmp);
19891 /* Record the first store insn. */
19892 if (dwarf_index == 1)
19893 insn = tmp;
19895 /* Generate dwarf info. */
19896 mem = gen_frame_mem (SImode,
19897 plus_constant (Pmode,
19898 stack_pointer_rtx,
19899 offset));
19900 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
19901 RTX_FRAME_RELATED_P (tmp) = 1;
19902 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19904 mem = gen_frame_mem (SImode,
19905 plus_constant (Pmode,
19906 stack_pointer_rtx,
19907 offset + 4));
19908 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
19909 RTX_FRAME_RELATED_P (tmp) = 1;
19910 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19912 offset += 8;
19913 j += 2;
19915 else
19917 /* Emit a single word store. */
19918 if (offset < 0)
19920 /* Allocate stack space for all saved registers. */
19921 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19922 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19923 mem = gen_frame_mem (SImode, tmp);
19924 offset = 0;
19926 else if (offset > 0)
19927 mem = gen_frame_mem (SImode,
19928 plus_constant (Pmode,
19929 stack_pointer_rtx,
19930 offset));
19931 else
19932 mem = gen_frame_mem (SImode, stack_pointer_rtx);
19934 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
19935 RTX_FRAME_RELATED_P (tmp) = 1;
19936 tmp = emit_insn (tmp);
19938 /* Record the first store insn. */
19939 if (dwarf_index == 1)
19940 insn = tmp;
19942 /* Generate dwarf info. */
19943 mem = gen_frame_mem (SImode,
19944 plus_constant(Pmode,
19945 stack_pointer_rtx,
19946 offset));
19947 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
19948 RTX_FRAME_RELATED_P (tmp) = 1;
19949 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19951 offset += 4;
19952 j += 1;
19955 else
19956 j++;
19958 /* Attach dwarf info to the first insn we generate. */
19959 gcc_assert (insn != NULL_RTX);
19960 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19961 RTX_FRAME_RELATED_P (insn) = 1;
19964 /* Generate and emit an insn that we will recognize as a push_multi.
19965 Unfortunately, since this insn does not reflect very well the actual
19966 semantics of the operation, we need to annotate the insn for the benefit
19967 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
19968 MASK for registers that should be annotated for DWARF2 frame unwind
19969 information. */
19970 static rtx
19971 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
19973 int num_regs = 0;
19974 int num_dwarf_regs = 0;
19975 int i, j;
19976 rtx par;
19977 rtx dwarf;
19978 int dwarf_par_index;
19979 rtx tmp, reg;
19981 /* We don't record the PC in the dwarf frame information. */
19982 dwarf_regs_mask &= ~(1 << PC_REGNUM);
19984 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19986 if (mask & (1 << i))
19987 num_regs++;
19988 if (dwarf_regs_mask & (1 << i))
19989 num_dwarf_regs++;
19992 gcc_assert (num_regs && num_regs <= 16);
19993 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
19995 /* For the body of the insn we are going to generate an UNSPEC in
19996 parallel with several USEs. This allows the insn to be recognized
19997 by the push_multi pattern in the arm.md file.
19999 The body of the insn looks something like this:
20001 (parallel [
20002 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20003 (const_int:SI <num>)))
20004 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20005 (use (reg:SI XX))
20006 (use (reg:SI YY))
20010 For the frame note however, we try to be more explicit and actually
20011 show each register being stored into the stack frame, plus a (single)
20012 decrement of the stack pointer. We do it this way in order to be
20013 friendly to the stack unwinding code, which only wants to see a single
20014 stack decrement per instruction. The RTL we generate for the note looks
20015 something like this:
20017 (sequence [
20018 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20019 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20020 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20021 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20025 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20026 instead we'd have a parallel expression detailing all
20027 the stores to the various memory addresses so that debug
20028 information is more up-to-date. Remember however while writing
20029 this to take care of the constraints with the push instruction.
20031 Note also that this has to be taken care of for the VFP registers.
20033 For more see PR43399. */
20035 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20036 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20037 dwarf_par_index = 1;
20039 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20041 if (mask & (1 << i))
20043 reg = gen_rtx_REG (SImode, i);
20045 XVECEXP (par, 0, 0)
20046 = gen_rtx_SET (gen_frame_mem
20047 (BLKmode,
20048 gen_rtx_PRE_MODIFY (Pmode,
20049 stack_pointer_rtx,
20050 plus_constant
20051 (Pmode, stack_pointer_rtx,
20052 -4 * num_regs))
20054 gen_rtx_UNSPEC (BLKmode,
20055 gen_rtvec (1, reg),
20056 UNSPEC_PUSH_MULT));
20058 if (dwarf_regs_mask & (1 << i))
20060 tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
20061 reg);
20062 RTX_FRAME_RELATED_P (tmp) = 1;
20063 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20066 break;
20070 for (j = 1, i++; j < num_regs; i++)
20072 if (mask & (1 << i))
20074 reg = gen_rtx_REG (SImode, i);
20076 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20078 if (dwarf_regs_mask & (1 << i))
20081 = gen_rtx_SET (gen_frame_mem
20082 (SImode,
20083 plus_constant (Pmode, stack_pointer_rtx,
20084 4 * j)),
20085 reg);
20086 RTX_FRAME_RELATED_P (tmp) = 1;
20087 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20090 j++;
20094 par = emit_insn (par);
20096 tmp = gen_rtx_SET (stack_pointer_rtx,
20097 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20098 RTX_FRAME_RELATED_P (tmp) = 1;
20099 XVECEXP (dwarf, 0, 0) = tmp;
20101 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20103 return par;
20106 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20107 SIZE is the offset to be adjusted.
20108 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20109 static void
20110 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20112 rtx dwarf;
20114 RTX_FRAME_RELATED_P (insn) = 1;
20115 dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
20116 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20119 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20120 SAVED_REGS_MASK shows which registers need to be restored.
20122 Unfortunately, since this insn does not reflect very well the actual
20123 semantics of the operation, we need to annotate the insn for the benefit
20124 of DWARF2 frame unwind information. */
20125 static void
20126 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20128 int num_regs = 0;
20129 int i, j;
20130 rtx par;
20131 rtx dwarf = NULL_RTX;
20132 rtx tmp, reg;
20133 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20134 int offset_adj;
20135 int emit_update;
20137 offset_adj = return_in_pc ? 1 : 0;
20138 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20139 if (saved_regs_mask & (1 << i))
20140 num_regs++;
20142 gcc_assert (num_regs && num_regs <= 16);
20144 /* If SP is in reglist, then we don't emit SP update insn. */
20145 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20147 /* The parallel needs to hold num_regs SETs
20148 and one SET for the stack update. */
20149 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20151 if (return_in_pc)
20152 XVECEXP (par, 0, 0) = ret_rtx;
20154 if (emit_update)
20156 /* Increment the stack pointer, based on there being
20157 num_regs 4-byte registers to restore. */
20158 tmp = gen_rtx_SET (stack_pointer_rtx,
20159 plus_constant (Pmode,
20160 stack_pointer_rtx,
20161 4 * num_regs));
20162 RTX_FRAME_RELATED_P (tmp) = 1;
20163 XVECEXP (par, 0, offset_adj) = tmp;
20166 /* Now restore every reg, which may include PC. */
20167 for (j = 0, i = 0; j < num_regs; i++)
20168 if (saved_regs_mask & (1 << i))
20170 reg = gen_rtx_REG (SImode, i);
20171 if ((num_regs == 1) && emit_update && !return_in_pc)
20173 /* Emit single load with writeback. */
20174 tmp = gen_frame_mem (SImode,
20175 gen_rtx_POST_INC (Pmode,
20176 stack_pointer_rtx));
20177 tmp = emit_insn (gen_rtx_SET (reg, tmp));
20178 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20179 return;
20182 tmp = gen_rtx_SET (reg,
20183 gen_frame_mem
20184 (SImode,
20185 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20186 RTX_FRAME_RELATED_P (tmp) = 1;
20187 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20189 /* We need to maintain a sequence for DWARF info too. As dwarf info
20190 should not have PC, skip PC. */
20191 if (i != PC_REGNUM)
20192 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20194 j++;
20197 if (return_in_pc)
20198 par = emit_jump_insn (par);
20199 else
20200 par = emit_insn (par);
20202 REG_NOTES (par) = dwarf;
20203 if (!return_in_pc)
20204 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20205 stack_pointer_rtx, stack_pointer_rtx);
20208 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20209 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20211 Unfortunately, since this insn does not reflect very well the actual
20212 semantics of the operation, we need to annotate the insn for the benefit
20213 of DWARF2 frame unwind information. */
20214 static void
20215 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20217 int i, j;
20218 rtx par;
20219 rtx dwarf = NULL_RTX;
20220 rtx tmp, reg;
20222 gcc_assert (num_regs && num_regs <= 32);
20224 /* Workaround ARM10 VFPr1 bug. */
20225 if (num_regs == 2 && !arm_arch6)
20227 if (first_reg == 15)
20228 first_reg--;
20230 num_regs++;
20233 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20234 there could be up to 32 D-registers to restore.
20235 If there are more than 16 D-registers, make two recursive calls,
20236 each of which emits one pop_multi instruction. */
20237 if (num_regs > 16)
20239 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20240 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20241 return;
20244 /* The parallel needs to hold num_regs SETs
20245 and one SET for the stack update. */
20246 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20248 /* Increment the stack pointer, based on there being
20249 num_regs 8-byte registers to restore. */
20250 tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
20251 RTX_FRAME_RELATED_P (tmp) = 1;
20252 XVECEXP (par, 0, 0) = tmp;
20254 /* Now show every reg that will be restored, using a SET for each. */
20255 for (j = 0, i=first_reg; j < num_regs; i += 2)
20257 reg = gen_rtx_REG (DFmode, i);
20259 tmp = gen_rtx_SET (reg,
20260 gen_frame_mem
20261 (DFmode,
20262 plus_constant (Pmode, base_reg, 8 * j)));
20263 RTX_FRAME_RELATED_P (tmp) = 1;
20264 XVECEXP (par, 0, j + 1) = tmp;
20266 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20268 j++;
20271 par = emit_insn (par);
20272 REG_NOTES (par) = dwarf;
20274 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20275 if (REGNO (base_reg) == IP_REGNUM)
20277 RTX_FRAME_RELATED_P (par) = 1;
20278 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20280 else
20281 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20282 base_reg, base_reg);
20285 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20286 number of registers are being popped, multiple LDRD patterns are created for
20287 all register pairs. If odd number of registers are popped, last register is
20288 loaded by using LDR pattern. */
20289 static void
20290 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20292 int num_regs = 0;
20293 int i, j;
20294 rtx par = NULL_RTX;
20295 rtx dwarf = NULL_RTX;
20296 rtx tmp, reg, tmp1;
20297 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20299 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20300 if (saved_regs_mask & (1 << i))
20301 num_regs++;
20303 gcc_assert (num_regs && num_regs <= 16);
20305 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20306 to be popped. So, if num_regs is even, now it will become odd,
20307 and we can generate pop with PC. If num_regs is odd, it will be
20308 even now, and ldr with return can be generated for PC. */
20309 if (return_in_pc)
20310 num_regs--;
20312 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20314 /* Var j iterates over all the registers to gather all the registers in
20315 saved_regs_mask. Var i gives index of saved registers in stack frame.
20316 A PARALLEL RTX of register-pair is created here, so that pattern for
20317 LDRD can be matched. As PC is always last register to be popped, and
20318 we have already decremented num_regs if PC, we don't have to worry
20319 about PC in this loop. */
20320 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20321 if (saved_regs_mask & (1 << j))
20323 /* Create RTX for memory load. */
20324 reg = gen_rtx_REG (SImode, j);
20325 tmp = gen_rtx_SET (reg,
20326 gen_frame_mem (SImode,
20327 plus_constant (Pmode,
20328 stack_pointer_rtx, 4 * i)));
20329 RTX_FRAME_RELATED_P (tmp) = 1;
20331 if (i % 2 == 0)
20333 /* When saved-register index (i) is even, the RTX to be emitted is
20334 yet to be created. Hence create it first. The LDRD pattern we
20335 are generating is :
20336 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20337 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20338 where target registers need not be consecutive. */
20339 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20340 dwarf = NULL_RTX;
20343 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20344 added as 0th element and if i is odd, reg_i is added as 1st element
20345 of LDRD pattern shown above. */
20346 XVECEXP (par, 0, (i % 2)) = tmp;
20347 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20349 if ((i % 2) == 1)
20351 /* When saved-register index (i) is odd, RTXs for both the registers
20352 to be loaded are generated in above given LDRD pattern, and the
20353 pattern can be emitted now. */
20354 par = emit_insn (par);
20355 REG_NOTES (par) = dwarf;
20356 RTX_FRAME_RELATED_P (par) = 1;
20359 i++;
20362 /* If the number of registers pushed is odd AND return_in_pc is false OR
20363 number of registers are even AND return_in_pc is true, last register is
20364 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20365 then LDR with post increment. */
20367 /* Increment the stack pointer, based on there being
20368 num_regs 4-byte registers to restore. */
20369 tmp = gen_rtx_SET (stack_pointer_rtx,
20370 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20371 RTX_FRAME_RELATED_P (tmp) = 1;
20372 tmp = emit_insn (tmp);
20373 if (!return_in_pc)
20375 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20376 stack_pointer_rtx, stack_pointer_rtx);
20379 dwarf = NULL_RTX;
20381 if (((num_regs % 2) == 1 && !return_in_pc)
20382 || ((num_regs % 2) == 0 && return_in_pc))
20384 /* Scan for the single register to be popped. Skip until the saved
20385 register is found. */
20386 for (; (saved_regs_mask & (1 << j)) == 0; j++);
20388 /* Gen LDR with post increment here. */
20389 tmp1 = gen_rtx_MEM (SImode,
20390 gen_rtx_POST_INC (SImode,
20391 stack_pointer_rtx));
20392 set_mem_alias_set (tmp1, get_frame_alias_set ());
20394 reg = gen_rtx_REG (SImode, j);
20395 tmp = gen_rtx_SET (reg, tmp1);
20396 RTX_FRAME_RELATED_P (tmp) = 1;
20397 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20399 if (return_in_pc)
20401 /* If return_in_pc, j must be PC_REGNUM. */
20402 gcc_assert (j == PC_REGNUM);
20403 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20404 XVECEXP (par, 0, 0) = ret_rtx;
20405 XVECEXP (par, 0, 1) = tmp;
20406 par = emit_jump_insn (par);
20408 else
20410 par = emit_insn (tmp);
20411 REG_NOTES (par) = dwarf;
20412 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20413 stack_pointer_rtx, stack_pointer_rtx);
20417 else if ((num_regs % 2) == 1 && return_in_pc)
20419 /* There are 2 registers to be popped. So, generate the pattern
20420 pop_multiple_with_stack_update_and_return to pop in PC. */
20421 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20424 return;
20427 /* LDRD in ARM mode needs consecutive registers as operands. This function
20428 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20429 offset addressing and then generates one separate stack udpate. This provides
20430 more scheduling freedom, compared to writeback on every load. However,
20431 if the function returns using load into PC directly
20432 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20433 before the last load. TODO: Add a peephole optimization to recognize
20434 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20435 peephole optimization to merge the load at stack-offset zero
20436 with the stack update instruction using load with writeback
20437 in post-index addressing mode. */
20438 static void
20439 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20441 int j = 0;
20442 int offset = 0;
20443 rtx par = NULL_RTX;
20444 rtx dwarf = NULL_RTX;
20445 rtx tmp, mem;
20447 /* Restore saved registers. */
20448 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20449 j = 0;
20450 while (j <= LAST_ARM_REGNUM)
20451 if (saved_regs_mask & (1 << j))
20453 if ((j % 2) == 0
20454 && (saved_regs_mask & (1 << (j + 1)))
20455 && (j + 1) != PC_REGNUM)
20457 /* Current register and next register form register pair for which
20458 LDRD can be generated. PC is always the last register popped, and
20459 we handle it separately. */
20460 if (offset > 0)
20461 mem = gen_frame_mem (DImode,
20462 plus_constant (Pmode,
20463 stack_pointer_rtx,
20464 offset));
20465 else
20466 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20468 tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
20469 tmp = emit_insn (tmp);
20470 RTX_FRAME_RELATED_P (tmp) = 1;
20472 /* Generate dwarf info. */
20474 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20475 gen_rtx_REG (SImode, j),
20476 NULL_RTX);
20477 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20478 gen_rtx_REG (SImode, j + 1),
20479 dwarf);
20481 REG_NOTES (tmp) = dwarf;
20483 offset += 8;
20484 j += 2;
20486 else if (j != PC_REGNUM)
20488 /* Emit a single word load. */
20489 if (offset > 0)
20490 mem = gen_frame_mem (SImode,
20491 plus_constant (Pmode,
20492 stack_pointer_rtx,
20493 offset));
20494 else
20495 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20497 tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
20498 tmp = emit_insn (tmp);
20499 RTX_FRAME_RELATED_P (tmp) = 1;
20501 /* Generate dwarf info. */
20502 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20503 gen_rtx_REG (SImode, j),
20504 NULL_RTX);
20506 offset += 4;
20507 j += 1;
20509 else /* j == PC_REGNUM */
20510 j++;
20512 else
20513 j++;
20515 /* Update the stack. */
20516 if (offset > 0)
20518 tmp = gen_rtx_SET (stack_pointer_rtx,
20519 plus_constant (Pmode,
20520 stack_pointer_rtx,
20521 offset));
20522 tmp = emit_insn (tmp);
20523 arm_add_cfa_adjust_cfa_note (tmp, offset,
20524 stack_pointer_rtx, stack_pointer_rtx);
20525 offset = 0;
20528 if (saved_regs_mask & (1 << PC_REGNUM))
20530 /* Only PC is to be popped. */
20531 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20532 XVECEXP (par, 0, 0) = ret_rtx;
20533 tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
20534 gen_frame_mem (SImode,
20535 gen_rtx_POST_INC (SImode,
20536 stack_pointer_rtx)));
20537 RTX_FRAME_RELATED_P (tmp) = 1;
20538 XVECEXP (par, 0, 1) = tmp;
20539 par = emit_jump_insn (par);
20541 /* Generate dwarf info. */
20542 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20543 gen_rtx_REG (SImode, PC_REGNUM),
20544 NULL_RTX);
20545 REG_NOTES (par) = dwarf;
20546 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20547 stack_pointer_rtx, stack_pointer_rtx);
20551 /* Calculate the size of the return value that is passed in registers. */
20552 static unsigned
20553 arm_size_return_regs (void)
20555 machine_mode mode;
20557 if (crtl->return_rtx != 0)
20558 mode = GET_MODE (crtl->return_rtx);
20559 else
20560 mode = DECL_MODE (DECL_RESULT (current_function_decl));
20562 return GET_MODE_SIZE (mode);
20565 /* Return true if the current function needs to save/restore LR. */
20566 static bool
20567 thumb_force_lr_save (void)
20569 return !cfun->machine->lr_save_eliminated
20570 && (!crtl->is_leaf
20571 || thumb_far_jump_used_p ()
20572 || df_regs_ever_live_p (LR_REGNUM));
20575 /* We do not know if r3 will be available because
20576 we do have an indirect tailcall happening in this
20577 particular case. */
20578 static bool
20579 is_indirect_tailcall_p (rtx call)
20581 rtx pat = PATTERN (call);
20583 /* Indirect tail call. */
20584 pat = XVECEXP (pat, 0, 0);
20585 if (GET_CODE (pat) == SET)
20586 pat = SET_SRC (pat);
20588 pat = XEXP (XEXP (pat, 0), 0);
20589 return REG_P (pat);
20592 /* Return true if r3 is used by any of the tail call insns in the
20593 current function. */
20594 static bool
20595 any_sibcall_could_use_r3 (void)
20597 edge_iterator ei;
20598 edge e;
20600 if (!crtl->tail_call_emit)
20601 return false;
20602 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20603 if (e->flags & EDGE_SIBCALL)
20605 rtx_insn *call = BB_END (e->src);
20606 if (!CALL_P (call))
20607 call = prev_nonnote_nondebug_insn (call);
20608 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20609 if (find_regno_fusage (call, USE, 3)
20610 || is_indirect_tailcall_p (call))
20611 return true;
20613 return false;
20617 /* Compute the distance from register FROM to register TO.
20618 These can be the arg pointer (26), the soft frame pointer (25),
20619 the stack pointer (13) or the hard frame pointer (11).
20620 In thumb mode r7 is used as the soft frame pointer, if needed.
20621 Typical stack layout looks like this:
20623 old stack pointer -> | |
20624 ----
20625 | | \
20626 | | saved arguments for
20627 | | vararg functions
20628 | | /
20630 hard FP & arg pointer -> | | \
20631 | | stack
20632 | | frame
20633 | | /
20635 | | \
20636 | | call saved
20637 | | registers
20638 soft frame pointer -> | | /
20640 | | \
20641 | | local
20642 | | variables
20643 locals base pointer -> | | /
20645 | | \
20646 | | outgoing
20647 | | arguments
20648 current stack pointer -> | | /
20651 For a given function some or all of these stack components
20652 may not be needed, giving rise to the possibility of
20653 eliminating some of the registers.
20655 The values returned by this function must reflect the behavior
20656 of arm_expand_prologue() and arm_compute_save_reg_mask().
20658 The sign of the number returned reflects the direction of stack
20659 growth, so the values are positive for all eliminations except
20660 from the soft frame pointer to the hard frame pointer.
20662 SFP may point just inside the local variables block to ensure correct
20663 alignment. */
20666 /* Calculate stack offsets. These are used to calculate register elimination
20667 offsets and in prologue/epilogue code. Also calculates which registers
20668 should be saved. */
20670 static arm_stack_offsets *
20671 arm_get_frame_offsets (void)
20673 struct arm_stack_offsets *offsets;
20674 unsigned long func_type;
20675 int saved;
20676 int core_saved;
20677 HOST_WIDE_INT frame_size;
20678 int i;
20680 offsets = &cfun->machine->stack_offsets;
20682 if (reload_completed)
20683 return offsets;
20685 /* Initially this is the size of the local variables. It will translated
20686 into an offset once we have determined the size of preceding data. */
20687 frame_size = ROUND_UP_WORD (get_frame_size ());
20689 /* Space for variadic functions. */
20690 offsets->saved_args = crtl->args.pretend_args_size;
20692 /* In Thumb mode this is incorrect, but never used. */
20693 offsets->frame
20694 = (offsets->saved_args
20695 + arm_compute_static_chain_stack_bytes ()
20696 + (frame_pointer_needed ? 4 : 0));
20698 if (TARGET_32BIT)
20700 unsigned int regno;
20702 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
20703 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20704 saved = core_saved;
20706 /* We know that SP will be doubleword aligned on entry, and we must
20707 preserve that condition at any subroutine call. We also require the
20708 soft frame pointer to be doubleword aligned. */
20710 if (TARGET_REALLY_IWMMXT)
20712 /* Check for the call-saved iWMMXt registers. */
20713 for (regno = FIRST_IWMMXT_REGNUM;
20714 regno <= LAST_IWMMXT_REGNUM;
20715 regno++)
20716 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20717 saved += 8;
20720 func_type = arm_current_func_type ();
20721 /* Space for saved VFP registers. */
20722 if (! IS_VOLATILE (func_type)
20723 && TARGET_HARD_FLOAT)
20724 saved += arm_get_vfp_saved_size ();
20726 else /* TARGET_THUMB1 */
20728 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
20729 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20730 saved = core_saved;
20731 if (TARGET_BACKTRACE)
20732 saved += 16;
20735 /* Saved registers include the stack frame. */
20736 offsets->saved_regs
20737 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
20738 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
20740 /* A leaf function does not need any stack alignment if it has nothing
20741 on the stack. */
20742 if (crtl->is_leaf && frame_size == 0
20743 /* However if it calls alloca(), we have a dynamically allocated
20744 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20745 && ! cfun->calls_alloca)
20747 offsets->outgoing_args = offsets->soft_frame;
20748 offsets->locals_base = offsets->soft_frame;
20749 return offsets;
20752 /* Ensure SFP has the correct alignment. */
20753 if (ARM_DOUBLEWORD_ALIGN
20754 && (offsets->soft_frame & 7))
20756 offsets->soft_frame += 4;
20757 /* Try to align stack by pushing an extra reg. Don't bother doing this
20758 when there is a stack frame as the alignment will be rolled into
20759 the normal stack adjustment. */
20760 if (frame_size + crtl->outgoing_args_size == 0)
20762 int reg = -1;
20764 /* Register r3 is caller-saved. Normally it does not need to be
20765 saved on entry by the prologue. However if we choose to save
20766 it for padding then we may confuse the compiler into thinking
20767 a prologue sequence is required when in fact it is not. This
20768 will occur when shrink-wrapping if r3 is used as a scratch
20769 register and there are no other callee-saved writes.
20771 This situation can be avoided when other callee-saved registers
20772 are available and r3 is not mandatory if we choose a callee-saved
20773 register for padding. */
20774 bool prefer_callee_reg_p = false;
20776 /* If it is safe to use r3, then do so. This sometimes
20777 generates better code on Thumb-2 by avoiding the need to
20778 use 32-bit push/pop instructions. */
20779 if (! any_sibcall_could_use_r3 ()
20780 && arm_size_return_regs () <= 12
20781 && (offsets->saved_regs_mask & (1 << 3)) == 0
20782 && (TARGET_THUMB2
20783 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
20785 reg = 3;
20786 if (!TARGET_THUMB2)
20787 prefer_callee_reg_p = true;
20789 if (reg == -1
20790 || prefer_callee_reg_p)
20792 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
20794 /* Avoid fixed registers; they may be changed at
20795 arbitrary times so it's unsafe to restore them
20796 during the epilogue. */
20797 if (!fixed_regs[i]
20798 && (offsets->saved_regs_mask & (1 << i)) == 0)
20800 reg = i;
20801 break;
20806 if (reg != -1)
20808 offsets->saved_regs += 4;
20809 offsets->saved_regs_mask |= (1 << reg);
20814 offsets->locals_base = offsets->soft_frame + frame_size;
20815 offsets->outgoing_args = (offsets->locals_base
20816 + crtl->outgoing_args_size);
20818 if (ARM_DOUBLEWORD_ALIGN)
20820 /* Ensure SP remains doubleword aligned. */
20821 if (offsets->outgoing_args & 7)
20822 offsets->outgoing_args += 4;
20823 gcc_assert (!(offsets->outgoing_args & 7));
20826 return offsets;
20830 /* Calculate the relative offsets for the different stack pointers. Positive
20831 offsets are in the direction of stack growth. */
20833 HOST_WIDE_INT
20834 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20836 arm_stack_offsets *offsets;
20838 offsets = arm_get_frame_offsets ();
20840 /* OK, now we have enough information to compute the distances.
20841 There must be an entry in these switch tables for each pair
20842 of registers in ELIMINABLE_REGS, even if some of the entries
20843 seem to be redundant or useless. */
20844 switch (from)
20846 case ARG_POINTER_REGNUM:
20847 switch (to)
20849 case THUMB_HARD_FRAME_POINTER_REGNUM:
20850 return 0;
20852 case FRAME_POINTER_REGNUM:
20853 /* This is the reverse of the soft frame pointer
20854 to hard frame pointer elimination below. */
20855 return offsets->soft_frame - offsets->saved_args;
20857 case ARM_HARD_FRAME_POINTER_REGNUM:
20858 /* This is only non-zero in the case where the static chain register
20859 is stored above the frame. */
20860 return offsets->frame - offsets->saved_args - 4;
20862 case STACK_POINTER_REGNUM:
20863 /* If nothing has been pushed on the stack at all
20864 then this will return -4. This *is* correct! */
20865 return offsets->outgoing_args - (offsets->saved_args + 4);
20867 default:
20868 gcc_unreachable ();
20870 gcc_unreachable ();
20872 case FRAME_POINTER_REGNUM:
20873 switch (to)
20875 case THUMB_HARD_FRAME_POINTER_REGNUM:
20876 return 0;
20878 case ARM_HARD_FRAME_POINTER_REGNUM:
20879 /* The hard frame pointer points to the top entry in the
20880 stack frame. The soft frame pointer to the bottom entry
20881 in the stack frame. If there is no stack frame at all,
20882 then they are identical. */
20884 return offsets->frame - offsets->soft_frame;
20886 case STACK_POINTER_REGNUM:
20887 return offsets->outgoing_args - offsets->soft_frame;
20889 default:
20890 gcc_unreachable ();
20892 gcc_unreachable ();
20894 default:
20895 /* You cannot eliminate from the stack pointer.
20896 In theory you could eliminate from the hard frame
20897 pointer to the stack pointer, but this will never
20898 happen, since if a stack frame is not needed the
20899 hard frame pointer will never be used. */
20900 gcc_unreachable ();
20904 /* Given FROM and TO register numbers, say whether this elimination is
20905 allowed. Frame pointer elimination is automatically handled.
20907 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
20908 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
20909 pointer, we must eliminate FRAME_POINTER_REGNUM into
20910 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
20911 ARG_POINTER_REGNUM. */
20913 bool
20914 arm_can_eliminate (const int from, const int to)
20916 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
20917 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
20918 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
20919 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
20920 true);
20923 /* Emit RTL to save coprocessor registers on function entry. Returns the
20924 number of bytes pushed. */
20926 static int
20927 arm_save_coproc_regs(void)
20929 int saved_size = 0;
20930 unsigned reg;
20931 unsigned start_reg;
20932 rtx insn;
20934 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
20935 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
20937 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
20938 insn = gen_rtx_MEM (V2SImode, insn);
20939 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
20940 RTX_FRAME_RELATED_P (insn) = 1;
20941 saved_size += 8;
20944 if (TARGET_HARD_FLOAT)
20946 start_reg = FIRST_VFP_REGNUM;
20948 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
20950 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
20951 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
20953 if (start_reg != reg)
20954 saved_size += vfp_emit_fstmd (start_reg,
20955 (reg - start_reg) / 2);
20956 start_reg = reg + 2;
20959 if (start_reg != reg)
20960 saved_size += vfp_emit_fstmd (start_reg,
20961 (reg - start_reg) / 2);
20963 return saved_size;
20967 /* Set the Thumb frame pointer from the stack pointer. */
20969 static void
20970 thumb_set_frame_pointer (arm_stack_offsets *offsets)
20972 HOST_WIDE_INT amount;
20973 rtx insn, dwarf;
20975 amount = offsets->outgoing_args - offsets->locals_base;
20976 if (amount < 1024)
20977 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20978 stack_pointer_rtx, GEN_INT (amount)));
20979 else
20981 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
20982 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
20983 expects the first two operands to be the same. */
20984 if (TARGET_THUMB2)
20986 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20987 stack_pointer_rtx,
20988 hard_frame_pointer_rtx));
20990 else
20992 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20993 hard_frame_pointer_rtx,
20994 stack_pointer_rtx));
20996 dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
20997 plus_constant (Pmode, stack_pointer_rtx, amount));
20998 RTX_FRAME_RELATED_P (dwarf) = 1;
20999 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21002 RTX_FRAME_RELATED_P (insn) = 1;
21005 struct scratch_reg {
21006 rtx reg;
21007 bool saved;
21010 /* Return a short-lived scratch register for use as a 2nd scratch register on
21011 function entry after the registers are saved in the prologue. This register
21012 must be released by means of release_scratch_register_on_entry. IP is not
21013 considered since it is always used as the 1st scratch register if available.
21015 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
21016 mask of live registers. */
21018 static void
21019 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
21020 unsigned long live_regs)
21022 int regno = -1;
21024 sr->saved = false;
21026 if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
21027 regno = LR_REGNUM;
21028 else
21030 unsigned int i;
21032 for (i = 4; i < 11; i++)
21033 if (regno1 != i && (live_regs & (1 << i)) != 0)
21035 regno = i;
21036 break;
21039 if (regno < 0)
21041 /* If IP is used as the 1st scratch register for a nested function,
21042 then either r3 wasn't available or is used to preserve IP. */
21043 if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
21044 regno1 = 3;
21045 regno = (regno1 == 3 ? 2 : 3);
21046 sr->saved
21047 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
21048 regno);
21052 sr->reg = gen_rtx_REG (SImode, regno);
21053 if (sr->saved)
21055 rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21056 rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
21057 rtx x = gen_rtx_SET (stack_pointer_rtx,
21058 plus_constant (Pmode, stack_pointer_rtx, -4));
21059 RTX_FRAME_RELATED_P (insn) = 1;
21060 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21064 /* Release a scratch register obtained from the preceding function. */
21066 static void
21067 release_scratch_register_on_entry (struct scratch_reg *sr)
21069 if (sr->saved)
21071 rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
21072 rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
21073 rtx x = gen_rtx_SET (stack_pointer_rtx,
21074 plus_constant (Pmode, stack_pointer_rtx, 4));
21075 RTX_FRAME_RELATED_P (insn) = 1;
21076 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21080 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
21082 #if PROBE_INTERVAL > 4096
21083 #error Cannot use indexed addressing mode for stack probing
21084 #endif
21086 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
21087 inclusive. These are offsets from the current stack pointer. REGNO1
21088 is the index number of the 1st scratch register and LIVE_REGS is the
21089 mask of live registers. */
21091 static void
21092 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
21093 unsigned int regno1, unsigned long live_regs)
21095 rtx reg1 = gen_rtx_REG (Pmode, regno1);
21097 /* See if we have a constant small number of probes to generate. If so,
21098 that's the easy case. */
21099 if (size <= PROBE_INTERVAL)
21101 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21102 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21103 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
21106 /* The run-time loop is made up of 10 insns in the generic case while the
21107 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
21108 else if (size <= 5 * PROBE_INTERVAL)
21110 HOST_WIDE_INT i, rem;
21112 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21113 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21114 emit_stack_probe (reg1);
21116 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
21117 it exceeds SIZE. If only two probes are needed, this will not
21118 generate any code. Then probe at FIRST + SIZE. */
21119 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
21121 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21122 emit_stack_probe (reg1);
21125 rem = size - (i - PROBE_INTERVAL);
21126 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21128 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21129 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
21131 else
21132 emit_stack_probe (plus_constant (Pmode, reg1, -rem));
21135 /* Otherwise, do the same as above, but in a loop. Note that we must be
21136 extra careful with variables wrapping around because we might be at
21137 the very top (or the very bottom) of the address space and we have
21138 to be able to handle this case properly; in particular, we use an
21139 equality test for the loop condition. */
21140 else
21142 HOST_WIDE_INT rounded_size;
21143 struct scratch_reg sr;
21145 get_scratch_register_on_entry (&sr, regno1, live_regs);
21147 emit_move_insn (reg1, GEN_INT (first));
21150 /* Step 1: round SIZE to the previous multiple of the interval. */
21152 rounded_size = size & -PROBE_INTERVAL;
21153 emit_move_insn (sr.reg, GEN_INT (rounded_size));
21156 /* Step 2: compute initial and final value of the loop counter. */
21158 /* TEST_ADDR = SP + FIRST. */
21159 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21161 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
21162 emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
21165 /* Step 3: the loop
21169 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
21170 probe at TEST_ADDR
21172 while (TEST_ADDR != LAST_ADDR)
21174 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
21175 until it is equal to ROUNDED_SIZE. */
21177 emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
21180 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
21181 that SIZE is equal to ROUNDED_SIZE. */
21183 if (size != rounded_size)
21185 HOST_WIDE_INT rem = size - rounded_size;
21187 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21189 emit_set_insn (sr.reg,
21190 plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
21191 emit_stack_probe (plus_constant (Pmode, sr.reg,
21192 PROBE_INTERVAL - rem));
21194 else
21195 emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
21198 release_scratch_register_on_entry (&sr);
21201 /* Make sure nothing is scheduled before we are done. */
21202 emit_insn (gen_blockage ());
21205 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
21206 absolute addresses. */
21208 const char *
21209 output_probe_stack_range (rtx reg1, rtx reg2)
21211 static int labelno = 0;
21212 char loop_lab[32];
21213 rtx xops[2];
21215 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
21217 /* Loop. */
21218 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
21220 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
21221 xops[0] = reg1;
21222 xops[1] = GEN_INT (PROBE_INTERVAL);
21223 output_asm_insn ("sub\t%0, %0, %1", xops);
21225 /* Probe at TEST_ADDR. */
21226 output_asm_insn ("str\tr0, [%0, #0]", xops);
21228 /* Test if TEST_ADDR == LAST_ADDR. */
21229 xops[1] = reg2;
21230 output_asm_insn ("cmp\t%0, %1", xops);
21232 /* Branch. */
21233 fputs ("\tbne\t", asm_out_file);
21234 assemble_name_raw (asm_out_file, loop_lab);
21235 fputc ('\n', asm_out_file);
21237 return "";
21240 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21241 function. */
21242 void
21243 arm_expand_prologue (void)
21245 rtx amount;
21246 rtx insn;
21247 rtx ip_rtx;
21248 unsigned long live_regs_mask;
21249 unsigned long func_type;
21250 int fp_offset = 0;
21251 int saved_pretend_args = 0;
21252 int saved_regs = 0;
21253 unsigned HOST_WIDE_INT args_to_push;
21254 HOST_WIDE_INT size;
21255 arm_stack_offsets *offsets;
21256 bool clobber_ip;
21258 func_type = arm_current_func_type ();
21260 /* Naked functions don't have prologues. */
21261 if (IS_NAKED (func_type))
21263 if (flag_stack_usage_info)
21264 current_function_static_stack_size = 0;
21265 return;
21268 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21269 args_to_push = crtl->args.pretend_args_size;
21271 /* Compute which register we will have to save onto the stack. */
21272 offsets = arm_get_frame_offsets ();
21273 live_regs_mask = offsets->saved_regs_mask;
21275 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21277 if (IS_STACKALIGN (func_type))
21279 rtx r0, r1;
21281 /* Handle a word-aligned stack pointer. We generate the following:
21283 mov r0, sp
21284 bic r1, r0, #7
21285 mov sp, r1
21286 <save and restore r0 in normal prologue/epilogue>
21287 mov sp, r0
21288 bx lr
21290 The unwinder doesn't need to know about the stack realignment.
21291 Just tell it we saved SP in r0. */
21292 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21294 r0 = gen_rtx_REG (SImode, R0_REGNUM);
21295 r1 = gen_rtx_REG (SImode, R1_REGNUM);
21297 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21298 RTX_FRAME_RELATED_P (insn) = 1;
21299 add_reg_note (insn, REG_CFA_REGISTER, NULL);
21301 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21303 /* ??? The CFA changes here, which may cause GDB to conclude that it
21304 has entered a different function. That said, the unwind info is
21305 correct, individually, before and after this instruction because
21306 we've described the save of SP, which will override the default
21307 handling of SP as restoring from the CFA. */
21308 emit_insn (gen_movsi (stack_pointer_rtx, r1));
21311 /* The static chain register is the same as the IP register. If it is
21312 clobbered when creating the frame, we need to save and restore it. */
21313 clobber_ip = IS_NESTED (func_type)
21314 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21315 || (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21316 && !df_regs_ever_live_p (LR_REGNUM)
21317 && arm_r3_live_at_start_p ()));
21319 /* Find somewhere to store IP whilst the frame is being created.
21320 We try the following places in order:
21322 1. The last argument register r3 if it is available.
21323 2. A slot on the stack above the frame if there are no
21324 arguments to push onto the stack.
21325 3. Register r3 again, after pushing the argument registers
21326 onto the stack, if this is a varargs function.
21327 4. The last slot on the stack created for the arguments to
21328 push, if this isn't a varargs function.
21330 Note - we only need to tell the dwarf2 backend about the SP
21331 adjustment in the second variant; the static chain register
21332 doesn't need to be unwound, as it doesn't contain a value
21333 inherited from the caller. */
21334 if (clobber_ip)
21336 if (!arm_r3_live_at_start_p ())
21337 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21338 else if (args_to_push == 0)
21340 rtx addr, dwarf;
21342 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21343 saved_regs += 4;
21345 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21346 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21347 fp_offset = 4;
21349 /* Just tell the dwarf backend that we adjusted SP. */
21350 dwarf = gen_rtx_SET (stack_pointer_rtx,
21351 plus_constant (Pmode, stack_pointer_rtx,
21352 -fp_offset));
21353 RTX_FRAME_RELATED_P (insn) = 1;
21354 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21356 else
21358 /* Store the args on the stack. */
21359 if (cfun->machine->uses_anonymous_args)
21361 insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21362 (0xf0 >> (args_to_push / 4)) & 0xf);
21363 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21364 saved_pretend_args = 1;
21366 else
21368 rtx addr, dwarf;
21370 if (args_to_push == 4)
21371 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21372 else
21373 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21374 plus_constant (Pmode,
21375 stack_pointer_rtx,
21376 -args_to_push));
21378 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21380 /* Just tell the dwarf backend that we adjusted SP. */
21381 dwarf = gen_rtx_SET (stack_pointer_rtx,
21382 plus_constant (Pmode, stack_pointer_rtx,
21383 -args_to_push));
21384 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21387 RTX_FRAME_RELATED_P (insn) = 1;
21388 fp_offset = args_to_push;
21389 args_to_push = 0;
21393 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21395 if (IS_INTERRUPT (func_type))
21397 /* Interrupt functions must not corrupt any registers.
21398 Creating a frame pointer however, corrupts the IP
21399 register, so we must push it first. */
21400 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21402 /* Do not set RTX_FRAME_RELATED_P on this insn.
21403 The dwarf stack unwinding code only wants to see one
21404 stack decrement per function, and this is not it. If
21405 this instruction is labeled as being part of the frame
21406 creation sequence then dwarf2out_frame_debug_expr will
21407 die when it encounters the assignment of IP to FP
21408 later on, since the use of SP here establishes SP as
21409 the CFA register and not IP.
21411 Anyway this instruction is not really part of the stack
21412 frame creation although it is part of the prologue. */
21415 insn = emit_set_insn (ip_rtx,
21416 plus_constant (Pmode, stack_pointer_rtx,
21417 fp_offset));
21418 RTX_FRAME_RELATED_P (insn) = 1;
21421 if (args_to_push)
21423 /* Push the argument registers, or reserve space for them. */
21424 if (cfun->machine->uses_anonymous_args)
21425 insn = emit_multi_reg_push
21426 ((0xf0 >> (args_to_push / 4)) & 0xf,
21427 (0xf0 >> (args_to_push / 4)) & 0xf);
21428 else
21429 insn = emit_insn
21430 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21431 GEN_INT (- args_to_push)));
21432 RTX_FRAME_RELATED_P (insn) = 1;
21435 /* If this is an interrupt service routine, and the link register
21436 is going to be pushed, and we're not generating extra
21437 push of IP (needed when frame is needed and frame layout if apcs),
21438 subtracting four from LR now will mean that the function return
21439 can be done with a single instruction. */
21440 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21441 && (live_regs_mask & (1 << LR_REGNUM)) != 0
21442 && !(frame_pointer_needed && TARGET_APCS_FRAME)
21443 && TARGET_ARM)
21445 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21447 emit_set_insn (lr, plus_constant (SImode, lr, -4));
21450 if (live_regs_mask)
21452 unsigned long dwarf_regs_mask = live_regs_mask;
21454 saved_regs += bit_count (live_regs_mask) * 4;
21455 if (optimize_size && !frame_pointer_needed
21456 && saved_regs == offsets->saved_regs - offsets->saved_args)
21458 /* If no coprocessor registers are being pushed and we don't have
21459 to worry about a frame pointer then push extra registers to
21460 create the stack frame. This is done is a way that does not
21461 alter the frame layout, so is independent of the epilogue. */
21462 int n;
21463 int frame;
21464 n = 0;
21465 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21466 n++;
21467 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21468 if (frame && n * 4 >= frame)
21470 n = frame / 4;
21471 live_regs_mask |= (1 << n) - 1;
21472 saved_regs += frame;
21476 if (TARGET_LDRD
21477 && current_tune->prefer_ldrd_strd
21478 && !optimize_function_for_size_p (cfun))
21480 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21481 if (TARGET_THUMB2)
21482 thumb2_emit_strd_push (live_regs_mask);
21483 else if (TARGET_ARM
21484 && !TARGET_APCS_FRAME
21485 && !IS_INTERRUPT (func_type))
21486 arm_emit_strd_push (live_regs_mask);
21487 else
21489 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21490 RTX_FRAME_RELATED_P (insn) = 1;
21493 else
21495 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21496 RTX_FRAME_RELATED_P (insn) = 1;
21500 if (! IS_VOLATILE (func_type))
21501 saved_regs += arm_save_coproc_regs ();
21503 if (frame_pointer_needed && TARGET_ARM)
21505 /* Create the new frame pointer. */
21506 if (TARGET_APCS_FRAME)
21508 insn = GEN_INT (-(4 + args_to_push + fp_offset));
21509 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21510 RTX_FRAME_RELATED_P (insn) = 1;
21512 else
21514 insn = GEN_INT (saved_regs - (4 + fp_offset));
21515 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21516 stack_pointer_rtx, insn));
21517 RTX_FRAME_RELATED_P (insn) = 1;
21521 size = offsets->outgoing_args - offsets->saved_args;
21522 if (flag_stack_usage_info)
21523 current_function_static_stack_size = size;
21525 /* If this isn't an interrupt service routine and we have a frame, then do
21526 stack checking. We use IP as the first scratch register, except for the
21527 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
21528 if (!IS_INTERRUPT (func_type)
21529 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
21531 unsigned int regno;
21533 if (!IS_NESTED (func_type) || clobber_ip)
21534 regno = IP_REGNUM;
21535 else if (df_regs_ever_live_p (LR_REGNUM))
21536 regno = LR_REGNUM;
21537 else
21538 regno = 3;
21540 if (crtl->is_leaf && !cfun->calls_alloca)
21542 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
21543 arm_emit_probe_stack_range (STACK_CHECK_PROTECT,
21544 size - STACK_CHECK_PROTECT,
21545 regno, live_regs_mask);
21547 else if (size > 0)
21548 arm_emit_probe_stack_range (STACK_CHECK_PROTECT, size,
21549 regno, live_regs_mask);
21552 /* Recover the static chain register. */
21553 if (clobber_ip)
21555 if (!arm_r3_live_at_start_p () || saved_pretend_args)
21556 insn = gen_rtx_REG (SImode, 3);
21557 else
21559 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21560 insn = gen_frame_mem (SImode, insn);
21562 emit_set_insn (ip_rtx, insn);
21563 emit_insn (gen_force_register_use (ip_rtx));
21566 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21568 /* This add can produce multiple insns for a large constant, so we
21569 need to get tricky. */
21570 rtx_insn *last = get_last_insn ();
21572 amount = GEN_INT (offsets->saved_args + saved_regs
21573 - offsets->outgoing_args);
21575 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21576 amount));
21579 last = last ? NEXT_INSN (last) : get_insns ();
21580 RTX_FRAME_RELATED_P (last) = 1;
21582 while (last != insn);
21584 /* If the frame pointer is needed, emit a special barrier that
21585 will prevent the scheduler from moving stores to the frame
21586 before the stack adjustment. */
21587 if (frame_pointer_needed)
21588 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
21589 hard_frame_pointer_rtx));
21593 if (frame_pointer_needed && TARGET_THUMB2)
21594 thumb_set_frame_pointer (offsets);
21596 if (flag_pic && arm_pic_register != INVALID_REGNUM)
21598 unsigned long mask;
21600 mask = live_regs_mask;
21601 mask &= THUMB2_WORK_REGS;
21602 if (!IS_NESTED (func_type))
21603 mask |= (1 << IP_REGNUM);
21604 arm_load_pic_register (mask);
21607 /* If we are profiling, make sure no instructions are scheduled before
21608 the call to mcount. Similarly if the user has requested no
21609 scheduling in the prolog. Similarly if we want non-call exceptions
21610 using the EABI unwinder, to prevent faulting instructions from being
21611 swapped with a stack adjustment. */
21612 if (crtl->profile || !TARGET_SCHED_PROLOG
21613 || (arm_except_unwind_info (&global_options) == UI_TARGET
21614 && cfun->can_throw_non_call_exceptions))
21615 emit_insn (gen_blockage ());
21617 /* If the link register is being kept alive, with the return address in it,
21618 then make sure that it does not get reused by the ce2 pass. */
21619 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
21620 cfun->machine->lr_save_eliminated = 1;
21623 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21624 static void
21625 arm_print_condition (FILE *stream)
21627 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
21629 /* Branch conversion is not implemented for Thumb-2. */
21630 if (TARGET_THUMB)
21632 output_operand_lossage ("predicated Thumb instruction");
21633 return;
21635 if (current_insn_predicate != NULL)
21637 output_operand_lossage
21638 ("predicated instruction in conditional sequence");
21639 return;
21642 fputs (arm_condition_codes[arm_current_cc], stream);
21644 else if (current_insn_predicate)
21646 enum arm_cond_code code;
21648 if (TARGET_THUMB1)
21650 output_operand_lossage ("predicated Thumb instruction");
21651 return;
21654 code = get_arm_condition_code (current_insn_predicate);
21655 fputs (arm_condition_codes[code], stream);
21660 /* Globally reserved letters: acln
21661 Puncutation letters currently used: @_|?().!#
21662 Lower case letters currently used: bcdefhimpqtvwxyz
21663 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21664 Letters previously used, but now deprecated/obsolete: sVWXYZ.
21666 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21668 If CODE is 'd', then the X is a condition operand and the instruction
21669 should only be executed if the condition is true.
21670 if CODE is 'D', then the X is a condition operand and the instruction
21671 should only be executed if the condition is false: however, if the mode
21672 of the comparison is CCFPEmode, then always execute the instruction -- we
21673 do this because in these circumstances !GE does not necessarily imply LT;
21674 in these cases the instruction pattern will take care to make sure that
21675 an instruction containing %d will follow, thereby undoing the effects of
21676 doing this instruction unconditionally.
21677 If CODE is 'N' then X is a floating point operand that must be negated
21678 before output.
21679 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21680 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21681 static void
21682 arm_print_operand (FILE *stream, rtx x, int code)
21684 switch (code)
21686 case '@':
21687 fputs (ASM_COMMENT_START, stream);
21688 return;
21690 case '_':
21691 fputs (user_label_prefix, stream);
21692 return;
21694 case '|':
21695 fputs (REGISTER_PREFIX, stream);
21696 return;
21698 case '?':
21699 arm_print_condition (stream);
21700 return;
21702 case '.':
21703 /* The current condition code for a condition code setting instruction.
21704 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21705 fputc('s', stream);
21706 arm_print_condition (stream);
21707 return;
21709 case '!':
21710 /* If the instruction is conditionally executed then print
21711 the current condition code, otherwise print 's'. */
21712 gcc_assert (TARGET_THUMB2);
21713 if (current_insn_predicate)
21714 arm_print_condition (stream);
21715 else
21716 fputc('s', stream);
21717 break;
21719 /* %# is a "break" sequence. It doesn't output anything, but is used to
21720 separate e.g. operand numbers from following text, if that text consists
21721 of further digits which we don't want to be part of the operand
21722 number. */
21723 case '#':
21724 return;
21726 case 'N':
21728 REAL_VALUE_TYPE r;
21729 r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
21730 fprintf (stream, "%s", fp_const_from_val (&r));
21732 return;
21734 /* An integer or symbol address without a preceding # sign. */
21735 case 'c':
21736 switch (GET_CODE (x))
21738 case CONST_INT:
21739 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
21740 break;
21742 case SYMBOL_REF:
21743 output_addr_const (stream, x);
21744 break;
21746 case CONST:
21747 if (GET_CODE (XEXP (x, 0)) == PLUS
21748 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
21750 output_addr_const (stream, x);
21751 break;
21753 /* Fall through. */
21755 default:
21756 output_operand_lossage ("Unsupported operand for code '%c'", code);
21758 return;
21760 /* An integer that we want to print in HEX. */
21761 case 'x':
21762 switch (GET_CODE (x))
21764 case CONST_INT:
21765 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
21766 break;
21768 default:
21769 output_operand_lossage ("Unsupported operand for code '%c'", code);
21771 return;
21773 case 'B':
21774 if (CONST_INT_P (x))
21776 HOST_WIDE_INT val;
21777 val = ARM_SIGN_EXTEND (~INTVAL (x));
21778 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
21780 else
21782 putc ('~', stream);
21783 output_addr_const (stream, x);
21785 return;
21787 case 'b':
21788 /* Print the log2 of a CONST_INT. */
21790 HOST_WIDE_INT val;
21792 if (!CONST_INT_P (x)
21793 || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
21794 output_operand_lossage ("Unsupported operand for code '%c'", code);
21795 else
21796 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21798 return;
21800 case 'L':
21801 /* The low 16 bits of an immediate constant. */
21802 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
21803 return;
21805 case 'i':
21806 fprintf (stream, "%s", arithmetic_instr (x, 1));
21807 return;
21809 case 'I':
21810 fprintf (stream, "%s", arithmetic_instr (x, 0));
21811 return;
21813 case 'S':
21815 HOST_WIDE_INT val;
21816 const char *shift;
21818 shift = shift_op (x, &val);
21820 if (shift)
21822 fprintf (stream, ", %s ", shift);
21823 if (val == -1)
21824 arm_print_operand (stream, XEXP (x, 1), 0);
21825 else
21826 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21829 return;
21831 /* An explanation of the 'Q', 'R' and 'H' register operands:
21833 In a pair of registers containing a DI or DF value the 'Q'
21834 operand returns the register number of the register containing
21835 the least significant part of the value. The 'R' operand returns
21836 the register number of the register containing the most
21837 significant part of the value.
21839 The 'H' operand returns the higher of the two register numbers.
21840 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21841 same as the 'Q' operand, since the most significant part of the
21842 value is held in the lower number register. The reverse is true
21843 on systems where WORDS_BIG_ENDIAN is false.
21845 The purpose of these operands is to distinguish between cases
21846 where the endian-ness of the values is important (for example
21847 when they are added together), and cases where the endian-ness
21848 is irrelevant, but the order of register operations is important.
21849 For example when loading a value from memory into a register
21850 pair, the endian-ness does not matter. Provided that the value
21851 from the lower memory address is put into the lower numbered
21852 register, and the value from the higher address is put into the
21853 higher numbered register, the load will work regardless of whether
21854 the value being loaded is big-wordian or little-wordian. The
21855 order of the two register loads can matter however, if the address
21856 of the memory location is actually held in one of the registers
21857 being overwritten by the load.
21859 The 'Q' and 'R' constraints are also available for 64-bit
21860 constants. */
21861 case 'Q':
21862 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21864 rtx part = gen_lowpart (SImode, x);
21865 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21866 return;
21869 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21871 output_operand_lossage ("invalid operand for code '%c'", code);
21872 return;
21875 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
21876 return;
21878 case 'R':
21879 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21881 machine_mode mode = GET_MODE (x);
21882 rtx part;
21884 if (mode == VOIDmode)
21885 mode = DImode;
21886 part = gen_highpart_mode (SImode, mode, x);
21887 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21888 return;
21891 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21893 output_operand_lossage ("invalid operand for code '%c'", code);
21894 return;
21897 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
21898 return;
21900 case 'H':
21901 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21903 output_operand_lossage ("invalid operand for code '%c'", code);
21904 return;
21907 asm_fprintf (stream, "%r", REGNO (x) + 1);
21908 return;
21910 case 'J':
21911 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21913 output_operand_lossage ("invalid operand for code '%c'", code);
21914 return;
21917 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
21918 return;
21920 case 'K':
21921 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21923 output_operand_lossage ("invalid operand for code '%c'", code);
21924 return;
21927 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
21928 return;
21930 case 'm':
21931 asm_fprintf (stream, "%r",
21932 REG_P (XEXP (x, 0))
21933 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
21934 return;
21936 case 'M':
21937 asm_fprintf (stream, "{%r-%r}",
21938 REGNO (x),
21939 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
21940 return;
21942 /* Like 'M', but writing doubleword vector registers, for use by Neon
21943 insns. */
21944 case 'h':
21946 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
21947 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
21948 if (numregs == 1)
21949 asm_fprintf (stream, "{d%d}", regno);
21950 else
21951 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
21953 return;
21955 case 'd':
21956 /* CONST_TRUE_RTX means always -- that's the default. */
21957 if (x == const_true_rtx)
21958 return;
21960 if (!COMPARISON_P (x))
21962 output_operand_lossage ("invalid operand for code '%c'", code);
21963 return;
21966 fputs (arm_condition_codes[get_arm_condition_code (x)],
21967 stream);
21968 return;
21970 case 'D':
21971 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
21972 want to do that. */
21973 if (x == const_true_rtx)
21975 output_operand_lossage ("instruction never executed");
21976 return;
21978 if (!COMPARISON_P (x))
21980 output_operand_lossage ("invalid operand for code '%c'", code);
21981 return;
21984 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
21985 (get_arm_condition_code (x))],
21986 stream);
21987 return;
21989 case 's':
21990 case 'V':
21991 case 'W':
21992 case 'X':
21993 case 'Y':
21994 case 'Z':
21995 /* Former Maverick support, removed after GCC-4.7. */
21996 output_operand_lossage ("obsolete Maverick format code '%c'", code);
21997 return;
21999 case 'U':
22000 if (!REG_P (x)
22001 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
22002 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
22003 /* Bad value for wCG register number. */
22005 output_operand_lossage ("invalid operand for code '%c'", code);
22006 return;
22009 else
22010 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
22011 return;
22013 /* Print an iWMMXt control register name. */
22014 case 'w':
22015 if (!CONST_INT_P (x)
22016 || INTVAL (x) < 0
22017 || INTVAL (x) >= 16)
22018 /* Bad value for wC register number. */
22020 output_operand_lossage ("invalid operand for code '%c'", code);
22021 return;
22024 else
22026 static const char * wc_reg_names [16] =
22028 "wCID", "wCon", "wCSSF", "wCASF",
22029 "wC4", "wC5", "wC6", "wC7",
22030 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
22031 "wC12", "wC13", "wC14", "wC15"
22034 fputs (wc_reg_names [INTVAL (x)], stream);
22036 return;
22038 /* Print the high single-precision register of a VFP double-precision
22039 register. */
22040 case 'p':
22042 machine_mode mode = GET_MODE (x);
22043 int regno;
22045 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
22047 output_operand_lossage ("invalid operand for code '%c'", code);
22048 return;
22051 regno = REGNO (x);
22052 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
22054 output_operand_lossage ("invalid operand for code '%c'", code);
22055 return;
22058 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
22060 return;
22062 /* Print a VFP/Neon double precision or quad precision register name. */
22063 case 'P':
22064 case 'q':
22066 machine_mode mode = GET_MODE (x);
22067 int is_quad = (code == 'q');
22068 int regno;
22070 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
22072 output_operand_lossage ("invalid operand for code '%c'", code);
22073 return;
22076 if (!REG_P (x)
22077 || !IS_VFP_REGNUM (REGNO (x)))
22079 output_operand_lossage ("invalid operand for code '%c'", code);
22080 return;
22083 regno = REGNO (x);
22084 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
22085 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
22087 output_operand_lossage ("invalid operand for code '%c'", code);
22088 return;
22091 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
22092 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
22094 return;
22096 /* These two codes print the low/high doubleword register of a Neon quad
22097 register, respectively. For pair-structure types, can also print
22098 low/high quadword registers. */
22099 case 'e':
22100 case 'f':
22102 machine_mode mode = GET_MODE (x);
22103 int regno;
22105 if ((GET_MODE_SIZE (mode) != 16
22106 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
22108 output_operand_lossage ("invalid operand for code '%c'", code);
22109 return;
22112 regno = REGNO (x);
22113 if (!NEON_REGNO_OK_FOR_QUAD (regno))
22115 output_operand_lossage ("invalid operand for code '%c'", code);
22116 return;
22119 if (GET_MODE_SIZE (mode) == 16)
22120 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
22121 + (code == 'f' ? 1 : 0));
22122 else
22123 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
22124 + (code == 'f' ? 1 : 0));
22126 return;
22128 /* Print a VFPv3 floating-point constant, represented as an integer
22129 index. */
22130 case 'G':
22132 int index = vfp3_const_double_index (x);
22133 gcc_assert (index != -1);
22134 fprintf (stream, "%d", index);
22136 return;
22138 /* Print bits representing opcode features for Neon.
22140 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
22141 and polynomials as unsigned.
22143 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22145 Bit 2 is 1 for rounding functions, 0 otherwise. */
22147 /* Identify the type as 's', 'u', 'p' or 'f'. */
22148 case 'T':
22150 HOST_WIDE_INT bits = INTVAL (x);
22151 fputc ("uspf"[bits & 3], stream);
22153 return;
22155 /* Likewise, but signed and unsigned integers are both 'i'. */
22156 case 'F':
22158 HOST_WIDE_INT bits = INTVAL (x);
22159 fputc ("iipf"[bits & 3], stream);
22161 return;
22163 /* As for 'T', but emit 'u' instead of 'p'. */
22164 case 't':
22166 HOST_WIDE_INT bits = INTVAL (x);
22167 fputc ("usuf"[bits & 3], stream);
22169 return;
22171 /* Bit 2: rounding (vs none). */
22172 case 'O':
22174 HOST_WIDE_INT bits = INTVAL (x);
22175 fputs ((bits & 4) != 0 ? "r" : "", stream);
22177 return;
22179 /* Memory operand for vld1/vst1 instruction. */
22180 case 'A':
22182 rtx addr;
22183 bool postinc = FALSE;
22184 rtx postinc_reg = NULL;
22185 unsigned align, memsize, align_bits;
22187 gcc_assert (MEM_P (x));
22188 addr = XEXP (x, 0);
22189 if (GET_CODE (addr) == POST_INC)
22191 postinc = 1;
22192 addr = XEXP (addr, 0);
22194 if (GET_CODE (addr) == POST_MODIFY)
22196 postinc_reg = XEXP( XEXP (addr, 1), 1);
22197 addr = XEXP (addr, 0);
22199 asm_fprintf (stream, "[%r", REGNO (addr));
22201 /* We know the alignment of this access, so we can emit a hint in the
22202 instruction (for some alignments) as an aid to the memory subsystem
22203 of the target. */
22204 align = MEM_ALIGN (x) >> 3;
22205 memsize = MEM_SIZE (x);
22207 /* Only certain alignment specifiers are supported by the hardware. */
22208 if (memsize == 32 && (align % 32) == 0)
22209 align_bits = 256;
22210 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
22211 align_bits = 128;
22212 else if (memsize >= 8 && (align % 8) == 0)
22213 align_bits = 64;
22214 else
22215 align_bits = 0;
22217 if (align_bits != 0)
22218 asm_fprintf (stream, ":%d", align_bits);
22220 asm_fprintf (stream, "]");
22222 if (postinc)
22223 fputs("!", stream);
22224 if (postinc_reg)
22225 asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22227 return;
22229 case 'C':
22231 rtx addr;
22233 gcc_assert (MEM_P (x));
22234 addr = XEXP (x, 0);
22235 gcc_assert (REG_P (addr));
22236 asm_fprintf (stream, "[%r]", REGNO (addr));
22238 return;
22240 /* Translate an S register number into a D register number and element index. */
22241 case 'y':
22243 machine_mode mode = GET_MODE (x);
22244 int regno;
22246 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22248 output_operand_lossage ("invalid operand for code '%c'", code);
22249 return;
22252 regno = REGNO (x);
22253 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22255 output_operand_lossage ("invalid operand for code '%c'", code);
22256 return;
22259 regno = regno - FIRST_VFP_REGNUM;
22260 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22262 return;
22264 case 'v':
22265 gcc_assert (CONST_DOUBLE_P (x));
22266 int result;
22267 result = vfp3_const_double_for_fract_bits (x);
22268 if (result == 0)
22269 result = vfp3_const_double_for_bits (x);
22270 fprintf (stream, "#%d", result);
22271 return;
22273 /* Register specifier for vld1.16/vst1.16. Translate the S register
22274 number into a D register number and element index. */
22275 case 'z':
22277 machine_mode mode = GET_MODE (x);
22278 int regno;
22280 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22282 output_operand_lossage ("invalid operand for code '%c'", code);
22283 return;
22286 regno = REGNO (x);
22287 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22289 output_operand_lossage ("invalid operand for code '%c'", code);
22290 return;
22293 regno = regno - FIRST_VFP_REGNUM;
22294 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22296 return;
22298 default:
22299 if (x == 0)
22301 output_operand_lossage ("missing operand");
22302 return;
22305 switch (GET_CODE (x))
22307 case REG:
22308 asm_fprintf (stream, "%r", REGNO (x));
22309 break;
22311 case MEM:
22312 output_address (GET_MODE (x), XEXP (x, 0));
22313 break;
22315 case CONST_DOUBLE:
22317 char fpstr[20];
22318 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22319 sizeof (fpstr), 0, 1);
22320 fprintf (stream, "#%s", fpstr);
22322 break;
22324 default:
22325 gcc_assert (GET_CODE (x) != NEG);
22326 fputc ('#', stream);
22327 if (GET_CODE (x) == HIGH)
22329 fputs (":lower16:", stream);
22330 x = XEXP (x, 0);
22333 output_addr_const (stream, x);
22334 break;
22339 /* Target hook for printing a memory address. */
22340 static void
22341 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
22343 if (TARGET_32BIT)
22345 int is_minus = GET_CODE (x) == MINUS;
22347 if (REG_P (x))
22348 asm_fprintf (stream, "[%r]", REGNO (x));
22349 else if (GET_CODE (x) == PLUS || is_minus)
22351 rtx base = XEXP (x, 0);
22352 rtx index = XEXP (x, 1);
22353 HOST_WIDE_INT offset = 0;
22354 if (!REG_P (base)
22355 || (REG_P (index) && REGNO (index) == SP_REGNUM))
22357 /* Ensure that BASE is a register. */
22358 /* (one of them must be). */
22359 /* Also ensure the SP is not used as in index register. */
22360 std::swap (base, index);
22362 switch (GET_CODE (index))
22364 case CONST_INT:
22365 offset = INTVAL (index);
22366 if (is_minus)
22367 offset = -offset;
22368 asm_fprintf (stream, "[%r, #%wd]",
22369 REGNO (base), offset);
22370 break;
22372 case REG:
22373 asm_fprintf (stream, "[%r, %s%r]",
22374 REGNO (base), is_minus ? "-" : "",
22375 REGNO (index));
22376 break;
22378 case MULT:
22379 case ASHIFTRT:
22380 case LSHIFTRT:
22381 case ASHIFT:
22382 case ROTATERT:
22384 asm_fprintf (stream, "[%r, %s%r",
22385 REGNO (base), is_minus ? "-" : "",
22386 REGNO (XEXP (index, 0)));
22387 arm_print_operand (stream, index, 'S');
22388 fputs ("]", stream);
22389 break;
22392 default:
22393 gcc_unreachable ();
22396 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22397 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22399 gcc_assert (REG_P (XEXP (x, 0)));
22401 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22402 asm_fprintf (stream, "[%r, #%s%d]!",
22403 REGNO (XEXP (x, 0)),
22404 GET_CODE (x) == PRE_DEC ? "-" : "",
22405 GET_MODE_SIZE (mode));
22406 else
22407 asm_fprintf (stream, "[%r], #%s%d",
22408 REGNO (XEXP (x, 0)),
22409 GET_CODE (x) == POST_DEC ? "-" : "",
22410 GET_MODE_SIZE (mode));
22412 else if (GET_CODE (x) == PRE_MODIFY)
22414 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22415 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22416 asm_fprintf (stream, "#%wd]!",
22417 INTVAL (XEXP (XEXP (x, 1), 1)));
22418 else
22419 asm_fprintf (stream, "%r]!",
22420 REGNO (XEXP (XEXP (x, 1), 1)));
22422 else if (GET_CODE (x) == POST_MODIFY)
22424 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22425 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22426 asm_fprintf (stream, "#%wd",
22427 INTVAL (XEXP (XEXP (x, 1), 1)));
22428 else
22429 asm_fprintf (stream, "%r",
22430 REGNO (XEXP (XEXP (x, 1), 1)));
22432 else output_addr_const (stream, x);
22434 else
22436 if (REG_P (x))
22437 asm_fprintf (stream, "[%r]", REGNO (x));
22438 else if (GET_CODE (x) == POST_INC)
22439 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22440 else if (GET_CODE (x) == PLUS)
22442 gcc_assert (REG_P (XEXP (x, 0)));
22443 if (CONST_INT_P (XEXP (x, 1)))
22444 asm_fprintf (stream, "[%r, #%wd]",
22445 REGNO (XEXP (x, 0)),
22446 INTVAL (XEXP (x, 1)));
22447 else
22448 asm_fprintf (stream, "[%r, %r]",
22449 REGNO (XEXP (x, 0)),
22450 REGNO (XEXP (x, 1)));
22452 else
22453 output_addr_const (stream, x);
22457 /* Target hook for indicating whether a punctuation character for
22458 TARGET_PRINT_OPERAND is valid. */
22459 static bool
22460 arm_print_operand_punct_valid_p (unsigned char code)
22462 return (code == '@' || code == '|' || code == '.'
22463 || code == '(' || code == ')' || code == '#'
22464 || (TARGET_32BIT && (code == '?'))
22465 || (TARGET_THUMB2 && (code == '!'))
22466 || (TARGET_THUMB && (code == '_')));
22469 /* Target hook for assembling integer objects. The ARM version needs to
22470 handle word-sized values specially. */
22471 static bool
22472 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22474 machine_mode mode;
22476 if (size == UNITS_PER_WORD && aligned_p)
22478 fputs ("\t.word\t", asm_out_file);
22479 output_addr_const (asm_out_file, x);
22481 /* Mark symbols as position independent. We only do this in the
22482 .text segment, not in the .data segment. */
22483 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22484 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22486 /* See legitimize_pic_address for an explanation of the
22487 TARGET_VXWORKS_RTP check. */
22488 /* References to weak symbols cannot be resolved locally:
22489 they may be overridden by a non-weak definition at link
22490 time. */
22491 if (!arm_pic_data_is_text_relative
22492 || (GET_CODE (x) == SYMBOL_REF
22493 && (!SYMBOL_REF_LOCAL_P (x)
22494 || (SYMBOL_REF_DECL (x)
22495 ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0))))
22496 fputs ("(GOT)", asm_out_file);
22497 else
22498 fputs ("(GOTOFF)", asm_out_file);
22500 fputc ('\n', asm_out_file);
22501 return true;
22504 mode = GET_MODE (x);
22506 if (arm_vector_mode_supported_p (mode))
22508 int i, units;
22510 gcc_assert (GET_CODE (x) == CONST_VECTOR);
22512 units = CONST_VECTOR_NUNITS (x);
22513 size = GET_MODE_UNIT_SIZE (mode);
22515 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22516 for (i = 0; i < units; i++)
22518 rtx elt = CONST_VECTOR_ELT (x, i);
22519 assemble_integer
22520 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22522 else
22523 for (i = 0; i < units; i++)
22525 rtx elt = CONST_VECTOR_ELT (x, i);
22526 assemble_real
22527 (*CONST_DOUBLE_REAL_VALUE (elt), GET_MODE_INNER (mode),
22528 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22531 return true;
22534 return default_assemble_integer (x, size, aligned_p);
22537 static void
22538 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22540 section *s;
22542 if (!TARGET_AAPCS_BASED)
22544 (is_ctor ?
22545 default_named_section_asm_out_constructor
22546 : default_named_section_asm_out_destructor) (symbol, priority);
22547 return;
22550 /* Put these in the .init_array section, using a special relocation. */
22551 if (priority != DEFAULT_INIT_PRIORITY)
22553 char buf[18];
22554 sprintf (buf, "%s.%.5u",
22555 is_ctor ? ".init_array" : ".fini_array",
22556 priority);
22557 s = get_section (buf, SECTION_WRITE, NULL_TREE);
22559 else if (is_ctor)
22560 s = ctors_section;
22561 else
22562 s = dtors_section;
22564 switch_to_section (s);
22565 assemble_align (POINTER_SIZE);
22566 fputs ("\t.word\t", asm_out_file);
22567 output_addr_const (asm_out_file, symbol);
22568 fputs ("(target1)\n", asm_out_file);
22571 /* Add a function to the list of static constructors. */
22573 static void
22574 arm_elf_asm_constructor (rtx symbol, int priority)
22576 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22579 /* Add a function to the list of static destructors. */
22581 static void
22582 arm_elf_asm_destructor (rtx symbol, int priority)
22584 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22587 /* A finite state machine takes care of noticing whether or not instructions
22588 can be conditionally executed, and thus decrease execution time and code
22589 size by deleting branch instructions. The fsm is controlled by
22590 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22592 /* The state of the fsm controlling condition codes are:
22593 0: normal, do nothing special
22594 1: make ASM_OUTPUT_OPCODE not output this instruction
22595 2: make ASM_OUTPUT_OPCODE not output this instruction
22596 3: make instructions conditional
22597 4: make instructions conditional
22599 State transitions (state->state by whom under condition):
22600 0 -> 1 final_prescan_insn if the `target' is a label
22601 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22602 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22603 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22604 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22605 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22606 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22607 (the target insn is arm_target_insn).
22609 If the jump clobbers the conditions then we use states 2 and 4.
22611 A similar thing can be done with conditional return insns.
22613 XXX In case the `target' is an unconditional branch, this conditionalising
22614 of the instructions always reduces code size, but not always execution
22615 time. But then, I want to reduce the code size to somewhere near what
22616 /bin/cc produces. */
22618 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22619 instructions. When a COND_EXEC instruction is seen the subsequent
22620 instructions are scanned so that multiple conditional instructions can be
22621 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22622 specify the length and true/false mask for the IT block. These will be
22623 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22625 /* Returns the index of the ARM condition code string in
22626 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22627 COMPARISON should be an rtx like `(eq (...) (...))'. */
22629 enum arm_cond_code
22630 maybe_get_arm_condition_code (rtx comparison)
22632 machine_mode mode = GET_MODE (XEXP (comparison, 0));
22633 enum arm_cond_code code;
22634 enum rtx_code comp_code = GET_CODE (comparison);
22636 if (GET_MODE_CLASS (mode) != MODE_CC)
22637 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
22638 XEXP (comparison, 1));
22640 switch (mode)
22642 case CC_DNEmode: code = ARM_NE; goto dominance;
22643 case CC_DEQmode: code = ARM_EQ; goto dominance;
22644 case CC_DGEmode: code = ARM_GE; goto dominance;
22645 case CC_DGTmode: code = ARM_GT; goto dominance;
22646 case CC_DLEmode: code = ARM_LE; goto dominance;
22647 case CC_DLTmode: code = ARM_LT; goto dominance;
22648 case CC_DGEUmode: code = ARM_CS; goto dominance;
22649 case CC_DGTUmode: code = ARM_HI; goto dominance;
22650 case CC_DLEUmode: code = ARM_LS; goto dominance;
22651 case CC_DLTUmode: code = ARM_CC;
22653 dominance:
22654 if (comp_code == EQ)
22655 return ARM_INVERSE_CONDITION_CODE (code);
22656 if (comp_code == NE)
22657 return code;
22658 return ARM_NV;
22660 case CC_NOOVmode:
22661 switch (comp_code)
22663 case NE: return ARM_NE;
22664 case EQ: return ARM_EQ;
22665 case GE: return ARM_PL;
22666 case LT: return ARM_MI;
22667 default: return ARM_NV;
22670 case CC_Zmode:
22671 switch (comp_code)
22673 case NE: return ARM_NE;
22674 case EQ: return ARM_EQ;
22675 default: return ARM_NV;
22678 case CC_Nmode:
22679 switch (comp_code)
22681 case NE: return ARM_MI;
22682 case EQ: return ARM_PL;
22683 default: return ARM_NV;
22686 case CCFPEmode:
22687 case CCFPmode:
22688 /* We can handle all cases except UNEQ and LTGT. */
22689 switch (comp_code)
22691 case GE: return ARM_GE;
22692 case GT: return ARM_GT;
22693 case LE: return ARM_LS;
22694 case LT: return ARM_MI;
22695 case NE: return ARM_NE;
22696 case EQ: return ARM_EQ;
22697 case ORDERED: return ARM_VC;
22698 case UNORDERED: return ARM_VS;
22699 case UNLT: return ARM_LT;
22700 case UNLE: return ARM_LE;
22701 case UNGT: return ARM_HI;
22702 case UNGE: return ARM_PL;
22703 /* UNEQ and LTGT do not have a representation. */
22704 case UNEQ: /* Fall through. */
22705 case LTGT: /* Fall through. */
22706 default: return ARM_NV;
22709 case CC_SWPmode:
22710 switch (comp_code)
22712 case NE: return ARM_NE;
22713 case EQ: return ARM_EQ;
22714 case GE: return ARM_LE;
22715 case GT: return ARM_LT;
22716 case LE: return ARM_GE;
22717 case LT: return ARM_GT;
22718 case GEU: return ARM_LS;
22719 case GTU: return ARM_CC;
22720 case LEU: return ARM_CS;
22721 case LTU: return ARM_HI;
22722 default: return ARM_NV;
22725 case CC_Cmode:
22726 switch (comp_code)
22728 case LTU: return ARM_CS;
22729 case GEU: return ARM_CC;
22730 case NE: return ARM_CS;
22731 case EQ: return ARM_CC;
22732 default: return ARM_NV;
22735 case CC_CZmode:
22736 switch (comp_code)
22738 case NE: return ARM_NE;
22739 case EQ: return ARM_EQ;
22740 case GEU: return ARM_CS;
22741 case GTU: return ARM_HI;
22742 case LEU: return ARM_LS;
22743 case LTU: return ARM_CC;
22744 default: return ARM_NV;
22747 case CC_NCVmode:
22748 switch (comp_code)
22750 case GE: return ARM_GE;
22751 case LT: return ARM_LT;
22752 case GEU: return ARM_CS;
22753 case LTU: return ARM_CC;
22754 default: return ARM_NV;
22757 case CC_Vmode:
22758 switch (comp_code)
22760 case NE: return ARM_VS;
22761 case EQ: return ARM_VC;
22762 default: return ARM_NV;
22765 case CCmode:
22766 switch (comp_code)
22768 case NE: return ARM_NE;
22769 case EQ: return ARM_EQ;
22770 case GE: return ARM_GE;
22771 case GT: return ARM_GT;
22772 case LE: return ARM_LE;
22773 case LT: return ARM_LT;
22774 case GEU: return ARM_CS;
22775 case GTU: return ARM_HI;
22776 case LEU: return ARM_LS;
22777 case LTU: return ARM_CC;
22778 default: return ARM_NV;
22781 default: gcc_unreachable ();
22785 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22786 static enum arm_cond_code
22787 get_arm_condition_code (rtx comparison)
22789 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
22790 gcc_assert (code != ARM_NV);
22791 return code;
22794 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22795 instructions. */
22796 void
22797 thumb2_final_prescan_insn (rtx_insn *insn)
22799 rtx_insn *first_insn = insn;
22800 rtx body = PATTERN (insn);
22801 rtx predicate;
22802 enum arm_cond_code code;
22803 int n;
22804 int mask;
22805 int max;
22807 /* max_insns_skipped in the tune was already taken into account in the
22808 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
22809 just emit the IT blocks as we can. It does not make sense to split
22810 the IT blocks. */
22811 max = MAX_INSN_PER_IT_BLOCK;
22813 /* Remove the previous insn from the count of insns to be output. */
22814 if (arm_condexec_count)
22815 arm_condexec_count--;
22817 /* Nothing to do if we are already inside a conditional block. */
22818 if (arm_condexec_count)
22819 return;
22821 if (GET_CODE (body) != COND_EXEC)
22822 return;
22824 /* Conditional jumps are implemented directly. */
22825 if (JUMP_P (insn))
22826 return;
22828 predicate = COND_EXEC_TEST (body);
22829 arm_current_cc = get_arm_condition_code (predicate);
22831 n = get_attr_ce_count (insn);
22832 arm_condexec_count = 1;
22833 arm_condexec_mask = (1 << n) - 1;
22834 arm_condexec_masklen = n;
22835 /* See if subsequent instructions can be combined into the same block. */
22836 for (;;)
22838 insn = next_nonnote_insn (insn);
22840 /* Jumping into the middle of an IT block is illegal, so a label or
22841 barrier terminates the block. */
22842 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
22843 break;
22845 body = PATTERN (insn);
22846 /* USE and CLOBBER aren't really insns, so just skip them. */
22847 if (GET_CODE (body) == USE
22848 || GET_CODE (body) == CLOBBER)
22849 continue;
22851 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
22852 if (GET_CODE (body) != COND_EXEC)
22853 break;
22854 /* Maximum number of conditionally executed instructions in a block. */
22855 n = get_attr_ce_count (insn);
22856 if (arm_condexec_masklen + n > max)
22857 break;
22859 predicate = COND_EXEC_TEST (body);
22860 code = get_arm_condition_code (predicate);
22861 mask = (1 << n) - 1;
22862 if (arm_current_cc == code)
22863 arm_condexec_mask |= (mask << arm_condexec_masklen);
22864 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
22865 break;
22867 arm_condexec_count++;
22868 arm_condexec_masklen += n;
22870 /* A jump must be the last instruction in a conditional block. */
22871 if (JUMP_P (insn))
22872 break;
22874 /* Restore recog_data (getting the attributes of other insns can
22875 destroy this array, but final.c assumes that it remains intact
22876 across this call). */
22877 extract_constrain_insn_cached (first_insn);
22880 void
22881 arm_final_prescan_insn (rtx_insn *insn)
22883 /* BODY will hold the body of INSN. */
22884 rtx body = PATTERN (insn);
22886 /* This will be 1 if trying to repeat the trick, and things need to be
22887 reversed if it appears to fail. */
22888 int reverse = 0;
22890 /* If we start with a return insn, we only succeed if we find another one. */
22891 int seeking_return = 0;
22892 enum rtx_code return_code = UNKNOWN;
22894 /* START_INSN will hold the insn from where we start looking. This is the
22895 first insn after the following code_label if REVERSE is true. */
22896 rtx_insn *start_insn = insn;
22898 /* If in state 4, check if the target branch is reached, in order to
22899 change back to state 0. */
22900 if (arm_ccfsm_state == 4)
22902 if (insn == arm_target_insn)
22904 arm_target_insn = NULL;
22905 arm_ccfsm_state = 0;
22907 return;
22910 /* If in state 3, it is possible to repeat the trick, if this insn is an
22911 unconditional branch to a label, and immediately following this branch
22912 is the previous target label which is only used once, and the label this
22913 branch jumps to is not too far off. */
22914 if (arm_ccfsm_state == 3)
22916 if (simplejump_p (insn))
22918 start_insn = next_nonnote_insn (start_insn);
22919 if (BARRIER_P (start_insn))
22921 /* XXX Isn't this always a barrier? */
22922 start_insn = next_nonnote_insn (start_insn);
22924 if (LABEL_P (start_insn)
22925 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22926 && LABEL_NUSES (start_insn) == 1)
22927 reverse = TRUE;
22928 else
22929 return;
22931 else if (ANY_RETURN_P (body))
22933 start_insn = next_nonnote_insn (start_insn);
22934 if (BARRIER_P (start_insn))
22935 start_insn = next_nonnote_insn (start_insn);
22936 if (LABEL_P (start_insn)
22937 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22938 && LABEL_NUSES (start_insn) == 1)
22940 reverse = TRUE;
22941 seeking_return = 1;
22942 return_code = GET_CODE (body);
22944 else
22945 return;
22947 else
22948 return;
22951 gcc_assert (!arm_ccfsm_state || reverse);
22952 if (!JUMP_P (insn))
22953 return;
22955 /* This jump might be paralleled with a clobber of the condition codes
22956 the jump should always come first */
22957 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
22958 body = XVECEXP (body, 0, 0);
22960 if (reverse
22961 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
22962 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
22964 int insns_skipped;
22965 int fail = FALSE, succeed = FALSE;
22966 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
22967 int then_not_else = TRUE;
22968 rtx_insn *this_insn = start_insn;
22969 rtx label = 0;
22971 /* Register the insn jumped to. */
22972 if (reverse)
22974 if (!seeking_return)
22975 label = XEXP (SET_SRC (body), 0);
22977 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
22978 label = XEXP (XEXP (SET_SRC (body), 1), 0);
22979 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
22981 label = XEXP (XEXP (SET_SRC (body), 2), 0);
22982 then_not_else = FALSE;
22984 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
22986 seeking_return = 1;
22987 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
22989 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
22991 seeking_return = 1;
22992 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
22993 then_not_else = FALSE;
22995 else
22996 gcc_unreachable ();
22998 /* See how many insns this branch skips, and what kind of insns. If all
22999 insns are okay, and the label or unconditional branch to the same
23000 label is not too far away, succeed. */
23001 for (insns_skipped = 0;
23002 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
23004 rtx scanbody;
23006 this_insn = next_nonnote_insn (this_insn);
23007 if (!this_insn)
23008 break;
23010 switch (GET_CODE (this_insn))
23012 case CODE_LABEL:
23013 /* Succeed if it is the target label, otherwise fail since
23014 control falls in from somewhere else. */
23015 if (this_insn == label)
23017 arm_ccfsm_state = 1;
23018 succeed = TRUE;
23020 else
23021 fail = TRUE;
23022 break;
23024 case BARRIER:
23025 /* Succeed if the following insn is the target label.
23026 Otherwise fail.
23027 If return insns are used then the last insn in a function
23028 will be a barrier. */
23029 this_insn = next_nonnote_insn (this_insn);
23030 if (this_insn && this_insn == label)
23032 arm_ccfsm_state = 1;
23033 succeed = TRUE;
23035 else
23036 fail = TRUE;
23037 break;
23039 case CALL_INSN:
23040 /* The AAPCS says that conditional calls should not be
23041 used since they make interworking inefficient (the
23042 linker can't transform BL<cond> into BLX). That's
23043 only a problem if the machine has BLX. */
23044 if (arm_arch5)
23046 fail = TRUE;
23047 break;
23050 /* Succeed if the following insn is the target label, or
23051 if the following two insns are a barrier and the
23052 target label. */
23053 this_insn = next_nonnote_insn (this_insn);
23054 if (this_insn && BARRIER_P (this_insn))
23055 this_insn = next_nonnote_insn (this_insn);
23057 if (this_insn && this_insn == label
23058 && insns_skipped < max_insns_skipped)
23060 arm_ccfsm_state = 1;
23061 succeed = TRUE;
23063 else
23064 fail = TRUE;
23065 break;
23067 case JUMP_INSN:
23068 /* If this is an unconditional branch to the same label, succeed.
23069 If it is to another label, do nothing. If it is conditional,
23070 fail. */
23071 /* XXX Probably, the tests for SET and the PC are
23072 unnecessary. */
23074 scanbody = PATTERN (this_insn);
23075 if (GET_CODE (scanbody) == SET
23076 && GET_CODE (SET_DEST (scanbody)) == PC)
23078 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
23079 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
23081 arm_ccfsm_state = 2;
23082 succeed = TRUE;
23084 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
23085 fail = TRUE;
23087 /* Fail if a conditional return is undesirable (e.g. on a
23088 StrongARM), but still allow this if optimizing for size. */
23089 else if (GET_CODE (scanbody) == return_code
23090 && !use_return_insn (TRUE, NULL)
23091 && !optimize_size)
23092 fail = TRUE;
23093 else if (GET_CODE (scanbody) == return_code)
23095 arm_ccfsm_state = 2;
23096 succeed = TRUE;
23098 else if (GET_CODE (scanbody) == PARALLEL)
23100 switch (get_attr_conds (this_insn))
23102 case CONDS_NOCOND:
23103 break;
23104 default:
23105 fail = TRUE;
23106 break;
23109 else
23110 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
23112 break;
23114 case INSN:
23115 /* Instructions using or affecting the condition codes make it
23116 fail. */
23117 scanbody = PATTERN (this_insn);
23118 if (!(GET_CODE (scanbody) == SET
23119 || GET_CODE (scanbody) == PARALLEL)
23120 || get_attr_conds (this_insn) != CONDS_NOCOND)
23121 fail = TRUE;
23122 break;
23124 default:
23125 break;
23128 if (succeed)
23130 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
23131 arm_target_label = CODE_LABEL_NUMBER (label);
23132 else
23134 gcc_assert (seeking_return || arm_ccfsm_state == 2);
23136 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
23138 this_insn = next_nonnote_insn (this_insn);
23139 gcc_assert (!this_insn
23140 || (!BARRIER_P (this_insn)
23141 && !LABEL_P (this_insn)));
23143 if (!this_insn)
23145 /* Oh, dear! we ran off the end.. give up. */
23146 extract_constrain_insn_cached (insn);
23147 arm_ccfsm_state = 0;
23148 arm_target_insn = NULL;
23149 return;
23151 arm_target_insn = this_insn;
23154 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23155 what it was. */
23156 if (!reverse)
23157 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
23159 if (reverse || then_not_else)
23160 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
23163 /* Restore recog_data (getting the attributes of other insns can
23164 destroy this array, but final.c assumes that it remains intact
23165 across this call. */
23166 extract_constrain_insn_cached (insn);
23170 /* Output IT instructions. */
23171 void
23172 thumb2_asm_output_opcode (FILE * stream)
23174 char buff[5];
23175 int n;
23177 if (arm_condexec_mask)
23179 for (n = 0; n < arm_condexec_masklen; n++)
23180 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
23181 buff[n] = 0;
23182 asm_fprintf(stream, "i%s\t%s\n\t", buff,
23183 arm_condition_codes[arm_current_cc]);
23184 arm_condexec_mask = 0;
23188 /* Returns true if REGNO is a valid register
23189 for holding a quantity of type MODE. */
23191 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
23193 if (GET_MODE_CLASS (mode) == MODE_CC)
23194 return (regno == CC_REGNUM
23195 || (TARGET_HARD_FLOAT
23196 && regno == VFPCC_REGNUM));
23198 if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
23199 return false;
23201 if (TARGET_THUMB1)
23202 /* For the Thumb we only allow values bigger than SImode in
23203 registers 0 - 6, so that there is always a second low
23204 register available to hold the upper part of the value.
23205 We probably we ought to ensure that the register is the
23206 start of an even numbered register pair. */
23207 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
23209 if (TARGET_HARD_FLOAT && IS_VFP_REGNUM (regno))
23211 if (mode == SFmode || mode == SImode)
23212 return VFP_REGNO_OK_FOR_SINGLE (regno);
23214 if (mode == DFmode)
23215 return VFP_REGNO_OK_FOR_DOUBLE (regno);
23217 if (mode == HFmode)
23218 return VFP_REGNO_OK_FOR_SINGLE (regno);
23220 /* VFP registers can hold HImode values. */
23221 if (mode == HImode)
23222 return VFP_REGNO_OK_FOR_SINGLE (regno);
23224 if (TARGET_NEON)
23225 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
23226 || (VALID_NEON_QREG_MODE (mode)
23227 && NEON_REGNO_OK_FOR_QUAD (regno))
23228 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
23229 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
23230 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
23231 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
23232 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23234 return FALSE;
23237 if (TARGET_REALLY_IWMMXT)
23239 if (IS_IWMMXT_GR_REGNUM (regno))
23240 return mode == SImode;
23242 if (IS_IWMMXT_REGNUM (regno))
23243 return VALID_IWMMXT_REG_MODE (mode);
23246 /* We allow almost any value to be stored in the general registers.
23247 Restrict doubleword quantities to even register pairs in ARM state
23248 so that we can use ldrd. Do not allow very large Neon structure
23249 opaque modes in general registers; they would use too many. */
23250 if (regno <= LAST_ARM_REGNUM)
23252 if (ARM_NUM_REGS (mode) > 4)
23253 return FALSE;
23255 if (TARGET_THUMB2)
23256 return TRUE;
23258 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23261 if (regno == FRAME_POINTER_REGNUM
23262 || regno == ARG_POINTER_REGNUM)
23263 /* We only allow integers in the fake hard registers. */
23264 return GET_MODE_CLASS (mode) == MODE_INT;
23266 return FALSE;
23269 /* Implement MODES_TIEABLE_P. */
23271 bool
23272 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
23274 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23275 return true;
23277 /* We specifically want to allow elements of "structure" modes to
23278 be tieable to the structure. This more general condition allows
23279 other rarer situations too. */
23280 if (TARGET_NEON
23281 && (VALID_NEON_DREG_MODE (mode1)
23282 || VALID_NEON_QREG_MODE (mode1)
23283 || VALID_NEON_STRUCT_MODE (mode1))
23284 && (VALID_NEON_DREG_MODE (mode2)
23285 || VALID_NEON_QREG_MODE (mode2)
23286 || VALID_NEON_STRUCT_MODE (mode2)))
23287 return true;
23289 return false;
23292 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23293 not used in arm mode. */
23295 enum reg_class
23296 arm_regno_class (int regno)
23298 if (regno == PC_REGNUM)
23299 return NO_REGS;
23301 if (TARGET_THUMB1)
23303 if (regno == STACK_POINTER_REGNUM)
23304 return STACK_REG;
23305 if (regno == CC_REGNUM)
23306 return CC_REG;
23307 if (regno < 8)
23308 return LO_REGS;
23309 return HI_REGS;
23312 if (TARGET_THUMB2 && regno < 8)
23313 return LO_REGS;
23315 if ( regno <= LAST_ARM_REGNUM
23316 || regno == FRAME_POINTER_REGNUM
23317 || regno == ARG_POINTER_REGNUM)
23318 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23320 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23321 return TARGET_THUMB2 ? CC_REG : NO_REGS;
23323 if (IS_VFP_REGNUM (regno))
23325 if (regno <= D7_VFP_REGNUM)
23326 return VFP_D0_D7_REGS;
23327 else if (regno <= LAST_LO_VFP_REGNUM)
23328 return VFP_LO_REGS;
23329 else
23330 return VFP_HI_REGS;
23333 if (IS_IWMMXT_REGNUM (regno))
23334 return IWMMXT_REGS;
23336 if (IS_IWMMXT_GR_REGNUM (regno))
23337 return IWMMXT_GR_REGS;
23339 return NO_REGS;
23342 /* Handle a special case when computing the offset
23343 of an argument from the frame pointer. */
23345 arm_debugger_arg_offset (int value, rtx addr)
23347 rtx_insn *insn;
23349 /* We are only interested if dbxout_parms() failed to compute the offset. */
23350 if (value != 0)
23351 return 0;
23353 /* We can only cope with the case where the address is held in a register. */
23354 if (!REG_P (addr))
23355 return 0;
23357 /* If we are using the frame pointer to point at the argument, then
23358 an offset of 0 is correct. */
23359 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23360 return 0;
23362 /* If we are using the stack pointer to point at the
23363 argument, then an offset of 0 is correct. */
23364 /* ??? Check this is consistent with thumb2 frame layout. */
23365 if ((TARGET_THUMB || !frame_pointer_needed)
23366 && REGNO (addr) == SP_REGNUM)
23367 return 0;
23369 /* Oh dear. The argument is pointed to by a register rather
23370 than being held in a register, or being stored at a known
23371 offset from the frame pointer. Since GDB only understands
23372 those two kinds of argument we must translate the address
23373 held in the register into an offset from the frame pointer.
23374 We do this by searching through the insns for the function
23375 looking to see where this register gets its value. If the
23376 register is initialized from the frame pointer plus an offset
23377 then we are in luck and we can continue, otherwise we give up.
23379 This code is exercised by producing debugging information
23380 for a function with arguments like this:
23382 double func (double a, double b, int c, double d) {return d;}
23384 Without this code the stab for parameter 'd' will be set to
23385 an offset of 0 from the frame pointer, rather than 8. */
23387 /* The if() statement says:
23389 If the insn is a normal instruction
23390 and if the insn is setting the value in a register
23391 and if the register being set is the register holding the address of the argument
23392 and if the address is computing by an addition
23393 that involves adding to a register
23394 which is the frame pointer
23395 a constant integer
23397 then... */
23399 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23401 if ( NONJUMP_INSN_P (insn)
23402 && GET_CODE (PATTERN (insn)) == SET
23403 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23404 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23405 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23406 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23407 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23410 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23412 break;
23416 if (value == 0)
23418 debug_rtx (addr);
23419 warning (0, "unable to compute real location of stacked parameter");
23420 value = 8; /* XXX magic hack */
23423 return value;
23426 /* Implement TARGET_PROMOTED_TYPE. */
23428 static tree
23429 arm_promoted_type (const_tree t)
23431 if (SCALAR_FLOAT_TYPE_P (t)
23432 && TYPE_PRECISION (t) == 16
23433 && TYPE_MAIN_VARIANT (t) == arm_fp16_type_node)
23434 return float_type_node;
23435 return NULL_TREE;
23438 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23439 This simply adds HFmode as a supported mode; even though we don't
23440 implement arithmetic on this type directly, it's supported by
23441 optabs conversions, much the way the double-word arithmetic is
23442 special-cased in the default hook. */
23444 static bool
23445 arm_scalar_mode_supported_p (machine_mode mode)
23447 if (mode == HFmode)
23448 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
23449 else if (ALL_FIXED_POINT_MODE_P (mode))
23450 return true;
23451 else
23452 return default_scalar_mode_supported_p (mode);
23455 /* Set the value of FLT_EVAL_METHOD.
23456 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
23458 0: evaluate all operations and constants, whose semantic type has at
23459 most the range and precision of type float, to the range and
23460 precision of float; evaluate all other operations and constants to
23461 the range and precision of the semantic type;
23463 N, where _FloatN is a supported interchange floating type
23464 evaluate all operations and constants, whose semantic type has at
23465 most the range and precision of _FloatN type, to the range and
23466 precision of the _FloatN type; evaluate all other operations and
23467 constants to the range and precision of the semantic type;
23469 If we have the ARMv8.2-A extensions then we support _Float16 in native
23470 precision, so we should set this to 16. Otherwise, we support the type,
23471 but want to evaluate expressions in float precision, so set this to
23472 0. */
23474 static enum flt_eval_method
23475 arm_excess_precision (enum excess_precision_type type)
23477 switch (type)
23479 case EXCESS_PRECISION_TYPE_FAST:
23480 case EXCESS_PRECISION_TYPE_STANDARD:
23481 /* We can calculate either in 16-bit range and precision or
23482 32-bit range and precision. Make that decision based on whether
23483 we have native support for the ARMv8.2-A 16-bit floating-point
23484 instructions or not. */
23485 return (TARGET_VFP_FP16INST
23486 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
23487 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
23488 case EXCESS_PRECISION_TYPE_IMPLICIT:
23489 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
23490 default:
23491 gcc_unreachable ();
23493 return FLT_EVAL_METHOD_UNPREDICTABLE;
23497 /* Implement TARGET_FLOATN_MODE. Make very sure that we don't provide
23498 _Float16 if we are using anything other than ieee format for 16-bit
23499 floating point. Otherwise, punt to the default implementation. */
23500 static machine_mode
23501 arm_floatn_mode (int n, bool extended)
23503 if (!extended && n == 16)
23504 return arm_fp16_format == ARM_FP16_FORMAT_IEEE ? HFmode : VOIDmode;
23506 return default_floatn_mode (n, extended);
23510 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23511 not to early-clobber SRC registers in the process.
23513 We assume that the operands described by SRC and DEST represent a
23514 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
23515 number of components into which the copy has been decomposed. */
23516 void
23517 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
23519 unsigned int i;
23521 if (!reg_overlap_mentioned_p (operands[0], operands[1])
23522 || REGNO (operands[0]) < REGNO (operands[1]))
23524 for (i = 0; i < count; i++)
23526 operands[2 * i] = dest[i];
23527 operands[2 * i + 1] = src[i];
23530 else
23532 for (i = 0; i < count; i++)
23534 operands[2 * i] = dest[count - i - 1];
23535 operands[2 * i + 1] = src[count - i - 1];
23540 /* Split operands into moves from op[1] + op[2] into op[0]. */
23542 void
23543 neon_split_vcombine (rtx operands[3])
23545 unsigned int dest = REGNO (operands[0]);
23546 unsigned int src1 = REGNO (operands[1]);
23547 unsigned int src2 = REGNO (operands[2]);
23548 machine_mode halfmode = GET_MODE (operands[1]);
23549 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
23550 rtx destlo, desthi;
23552 if (src1 == dest && src2 == dest + halfregs)
23554 /* No-op move. Can't split to nothing; emit something. */
23555 emit_note (NOTE_INSN_DELETED);
23556 return;
23559 /* Preserve register attributes for variable tracking. */
23560 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
23561 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
23562 GET_MODE_SIZE (halfmode));
23564 /* Special case of reversed high/low parts. Use VSWP. */
23565 if (src2 == dest && src1 == dest + halfregs)
23567 rtx x = gen_rtx_SET (destlo, operands[1]);
23568 rtx y = gen_rtx_SET (desthi, operands[2]);
23569 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
23570 return;
23573 if (!reg_overlap_mentioned_p (operands[2], destlo))
23575 /* Try to avoid unnecessary moves if part of the result
23576 is in the right place already. */
23577 if (src1 != dest)
23578 emit_move_insn (destlo, operands[1]);
23579 if (src2 != dest + halfregs)
23580 emit_move_insn (desthi, operands[2]);
23582 else
23584 if (src2 != dest + halfregs)
23585 emit_move_insn (desthi, operands[2]);
23586 if (src1 != dest)
23587 emit_move_insn (destlo, operands[1]);
23591 /* Return the number (counting from 0) of
23592 the least significant set bit in MASK. */
23594 inline static int
23595 number_of_first_bit_set (unsigned mask)
23597 return ctz_hwi (mask);
23600 /* Like emit_multi_reg_push, but allowing for a different set of
23601 registers to be described as saved. MASK is the set of registers
23602 to be saved; REAL_REGS is the set of registers to be described as
23603 saved. If REAL_REGS is 0, only describe the stack adjustment. */
23605 static rtx_insn *
23606 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
23608 unsigned long regno;
23609 rtx par[10], tmp, reg;
23610 rtx_insn *insn;
23611 int i, j;
23613 /* Build the parallel of the registers actually being stored. */
23614 for (i = 0; mask; ++i, mask &= mask - 1)
23616 regno = ctz_hwi (mask);
23617 reg = gen_rtx_REG (SImode, regno);
23619 if (i == 0)
23620 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
23621 else
23622 tmp = gen_rtx_USE (VOIDmode, reg);
23624 par[i] = tmp;
23627 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23628 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
23629 tmp = gen_frame_mem (BLKmode, tmp);
23630 tmp = gen_rtx_SET (tmp, par[0]);
23631 par[0] = tmp;
23633 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
23634 insn = emit_insn (tmp);
23636 /* Always build the stack adjustment note for unwind info. */
23637 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23638 tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
23639 par[0] = tmp;
23641 /* Build the parallel of the registers recorded as saved for unwind. */
23642 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
23644 regno = ctz_hwi (real_regs);
23645 reg = gen_rtx_REG (SImode, regno);
23647 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
23648 tmp = gen_frame_mem (SImode, tmp);
23649 tmp = gen_rtx_SET (tmp, reg);
23650 RTX_FRAME_RELATED_P (tmp) = 1;
23651 par[j + 1] = tmp;
23654 if (j == 0)
23655 tmp = par[0];
23656 else
23658 RTX_FRAME_RELATED_P (par[0]) = 1;
23659 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
23662 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
23664 return insn;
23667 /* Emit code to push or pop registers to or from the stack. F is the
23668 assembly file. MASK is the registers to pop. */
23669 static void
23670 thumb_pop (FILE *f, unsigned long mask)
23672 int regno;
23673 int lo_mask = mask & 0xFF;
23674 int pushed_words = 0;
23676 gcc_assert (mask);
23678 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
23680 /* Special case. Do not generate a POP PC statement here, do it in
23681 thumb_exit() */
23682 thumb_exit (f, -1);
23683 return;
23686 fprintf (f, "\tpop\t{");
23688 /* Look at the low registers first. */
23689 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
23691 if (lo_mask & 1)
23693 asm_fprintf (f, "%r", regno);
23695 if ((lo_mask & ~1) != 0)
23696 fprintf (f, ", ");
23698 pushed_words++;
23702 if (mask & (1 << PC_REGNUM))
23704 /* Catch popping the PC. */
23705 if (TARGET_INTERWORK || TARGET_BACKTRACE || crtl->calls_eh_return
23706 || IS_CMSE_ENTRY (arm_current_func_type ()))
23708 /* The PC is never poped directly, instead
23709 it is popped into r3 and then BX is used. */
23710 fprintf (f, "}\n");
23712 thumb_exit (f, -1);
23714 return;
23716 else
23718 if (mask & 0xFF)
23719 fprintf (f, ", ");
23721 asm_fprintf (f, "%r", PC_REGNUM);
23725 fprintf (f, "}\n");
23728 /* Generate code to return from a thumb function.
23729 If 'reg_containing_return_addr' is -1, then the return address is
23730 actually on the stack, at the stack pointer. */
23731 static void
23732 thumb_exit (FILE *f, int reg_containing_return_addr)
23734 unsigned regs_available_for_popping;
23735 unsigned regs_to_pop;
23736 int pops_needed;
23737 unsigned available;
23738 unsigned required;
23739 machine_mode mode;
23740 int size;
23741 int restore_a4 = FALSE;
23743 /* Compute the registers we need to pop. */
23744 regs_to_pop = 0;
23745 pops_needed = 0;
23747 if (reg_containing_return_addr == -1)
23749 regs_to_pop |= 1 << LR_REGNUM;
23750 ++pops_needed;
23753 if (TARGET_BACKTRACE)
23755 /* Restore the (ARM) frame pointer and stack pointer. */
23756 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
23757 pops_needed += 2;
23760 /* If there is nothing to pop then just emit the BX instruction and
23761 return. */
23762 if (pops_needed == 0)
23764 if (crtl->calls_eh_return)
23765 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23767 if (IS_CMSE_ENTRY (arm_current_func_type ()))
23769 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n",
23770 reg_containing_return_addr);
23771 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
23773 else
23774 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23775 return;
23777 /* Otherwise if we are not supporting interworking and we have not created
23778 a backtrace structure and the function was not entered in ARM mode then
23779 just pop the return address straight into the PC. */
23780 else if (!TARGET_INTERWORK
23781 && !TARGET_BACKTRACE
23782 && !is_called_in_ARM_mode (current_function_decl)
23783 && !crtl->calls_eh_return
23784 && !IS_CMSE_ENTRY (arm_current_func_type ()))
23786 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
23787 return;
23790 /* Find out how many of the (return) argument registers we can corrupt. */
23791 regs_available_for_popping = 0;
23793 /* If returning via __builtin_eh_return, the bottom three registers
23794 all contain information needed for the return. */
23795 if (crtl->calls_eh_return)
23796 size = 12;
23797 else
23799 /* If we can deduce the registers used from the function's
23800 return value. This is more reliable that examining
23801 df_regs_ever_live_p () because that will be set if the register is
23802 ever used in the function, not just if the register is used
23803 to hold a return value. */
23805 if (crtl->return_rtx != 0)
23806 mode = GET_MODE (crtl->return_rtx);
23807 else
23808 mode = DECL_MODE (DECL_RESULT (current_function_decl));
23810 size = GET_MODE_SIZE (mode);
23812 if (size == 0)
23814 /* In a void function we can use any argument register.
23815 In a function that returns a structure on the stack
23816 we can use the second and third argument registers. */
23817 if (mode == VOIDmode)
23818 regs_available_for_popping =
23819 (1 << ARG_REGISTER (1))
23820 | (1 << ARG_REGISTER (2))
23821 | (1 << ARG_REGISTER (3));
23822 else
23823 regs_available_for_popping =
23824 (1 << ARG_REGISTER (2))
23825 | (1 << ARG_REGISTER (3));
23827 else if (size <= 4)
23828 regs_available_for_popping =
23829 (1 << ARG_REGISTER (2))
23830 | (1 << ARG_REGISTER (3));
23831 else if (size <= 8)
23832 regs_available_for_popping =
23833 (1 << ARG_REGISTER (3));
23836 /* Match registers to be popped with registers into which we pop them. */
23837 for (available = regs_available_for_popping,
23838 required = regs_to_pop;
23839 required != 0 && available != 0;
23840 available &= ~(available & - available),
23841 required &= ~(required & - required))
23842 -- pops_needed;
23844 /* If we have any popping registers left over, remove them. */
23845 if (available > 0)
23846 regs_available_for_popping &= ~available;
23848 /* Otherwise if we need another popping register we can use
23849 the fourth argument register. */
23850 else if (pops_needed)
23852 /* If we have not found any free argument registers and
23853 reg a4 contains the return address, we must move it. */
23854 if (regs_available_for_popping == 0
23855 && reg_containing_return_addr == LAST_ARG_REGNUM)
23857 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
23858 reg_containing_return_addr = LR_REGNUM;
23860 else if (size > 12)
23862 /* Register a4 is being used to hold part of the return value,
23863 but we have dire need of a free, low register. */
23864 restore_a4 = TRUE;
23866 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
23869 if (reg_containing_return_addr != LAST_ARG_REGNUM)
23871 /* The fourth argument register is available. */
23872 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
23874 --pops_needed;
23878 /* Pop as many registers as we can. */
23879 thumb_pop (f, regs_available_for_popping);
23881 /* Process the registers we popped. */
23882 if (reg_containing_return_addr == -1)
23884 /* The return address was popped into the lowest numbered register. */
23885 regs_to_pop &= ~(1 << LR_REGNUM);
23887 reg_containing_return_addr =
23888 number_of_first_bit_set (regs_available_for_popping);
23890 /* Remove this register for the mask of available registers, so that
23891 the return address will not be corrupted by further pops. */
23892 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
23895 /* If we popped other registers then handle them here. */
23896 if (regs_available_for_popping)
23898 int frame_pointer;
23900 /* Work out which register currently contains the frame pointer. */
23901 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
23903 /* Move it into the correct place. */
23904 asm_fprintf (f, "\tmov\t%r, %r\n",
23905 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
23907 /* (Temporarily) remove it from the mask of popped registers. */
23908 regs_available_for_popping &= ~(1 << frame_pointer);
23909 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
23911 if (regs_available_for_popping)
23913 int stack_pointer;
23915 /* We popped the stack pointer as well,
23916 find the register that contains it. */
23917 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
23919 /* Move it into the stack register. */
23920 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
23922 /* At this point we have popped all necessary registers, so
23923 do not worry about restoring regs_available_for_popping
23924 to its correct value:
23926 assert (pops_needed == 0)
23927 assert (regs_available_for_popping == (1 << frame_pointer))
23928 assert (regs_to_pop == (1 << STACK_POINTER)) */
23930 else
23932 /* Since we have just move the popped value into the frame
23933 pointer, the popping register is available for reuse, and
23934 we know that we still have the stack pointer left to pop. */
23935 regs_available_for_popping |= (1 << frame_pointer);
23939 /* If we still have registers left on the stack, but we no longer have
23940 any registers into which we can pop them, then we must move the return
23941 address into the link register and make available the register that
23942 contained it. */
23943 if (regs_available_for_popping == 0 && pops_needed > 0)
23945 regs_available_for_popping |= 1 << reg_containing_return_addr;
23947 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
23948 reg_containing_return_addr);
23950 reg_containing_return_addr = LR_REGNUM;
23953 /* If we have registers left on the stack then pop some more.
23954 We know that at most we will want to pop FP and SP. */
23955 if (pops_needed > 0)
23957 int popped_into;
23958 int move_to;
23960 thumb_pop (f, regs_available_for_popping);
23962 /* We have popped either FP or SP.
23963 Move whichever one it is into the correct register. */
23964 popped_into = number_of_first_bit_set (regs_available_for_popping);
23965 move_to = number_of_first_bit_set (regs_to_pop);
23967 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
23969 regs_to_pop &= ~(1 << move_to);
23971 --pops_needed;
23974 /* If we still have not popped everything then we must have only
23975 had one register available to us and we are now popping the SP. */
23976 if (pops_needed > 0)
23978 int popped_into;
23980 thumb_pop (f, regs_available_for_popping);
23982 popped_into = number_of_first_bit_set (regs_available_for_popping);
23984 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
23986 assert (regs_to_pop == (1 << STACK_POINTER))
23987 assert (pops_needed == 1)
23991 /* If necessary restore the a4 register. */
23992 if (restore_a4)
23994 if (reg_containing_return_addr != LR_REGNUM)
23996 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
23997 reg_containing_return_addr = LR_REGNUM;
24000 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
24003 if (crtl->calls_eh_return)
24004 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
24006 /* Return to caller. */
24007 if (IS_CMSE_ENTRY (arm_current_func_type ()))
24009 /* This is for the cases where LR is not being used to contain the return
24010 address. It may therefore contain information that we might not want
24011 to leak, hence it must be cleared. The value in R0 will never be a
24012 secret at this point, so it is safe to use it, see the clearing code
24013 in 'cmse_nonsecure_entry_clear_before_return'. */
24014 if (reg_containing_return_addr != LR_REGNUM)
24015 asm_fprintf (f, "\tmov\tlr, r0\n");
24017 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr);
24018 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
24020 else
24021 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
24024 /* Scan INSN just before assembler is output for it.
24025 For Thumb-1, we track the status of the condition codes; this
24026 information is used in the cbranchsi4_insn pattern. */
24027 void
24028 thumb1_final_prescan_insn (rtx_insn *insn)
24030 if (flag_print_asm_name)
24031 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
24032 INSN_ADDRESSES (INSN_UID (insn)));
24033 /* Don't overwrite the previous setter when we get to a cbranch. */
24034 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
24036 enum attr_conds conds;
24038 if (cfun->machine->thumb1_cc_insn)
24040 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
24041 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
24042 CC_STATUS_INIT;
24044 conds = get_attr_conds (insn);
24045 if (conds == CONDS_SET)
24047 rtx set = single_set (insn);
24048 cfun->machine->thumb1_cc_insn = insn;
24049 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
24050 cfun->machine->thumb1_cc_op1 = const0_rtx;
24051 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
24052 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
24054 rtx src1 = XEXP (SET_SRC (set), 1);
24055 if (src1 == const0_rtx)
24056 cfun->machine->thumb1_cc_mode = CCmode;
24058 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
24060 /* Record the src register operand instead of dest because
24061 cprop_hardreg pass propagates src. */
24062 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
24065 else if (conds != CONDS_NOCOND)
24066 cfun->machine->thumb1_cc_insn = NULL_RTX;
24069 /* Check if unexpected far jump is used. */
24070 if (cfun->machine->lr_save_eliminated
24071 && get_attr_far_jump (insn) == FAR_JUMP_YES)
24072 internal_error("Unexpected thumb1 far jump");
24076 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
24078 unsigned HOST_WIDE_INT mask = 0xff;
24079 int i;
24081 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
24082 if (val == 0) /* XXX */
24083 return 0;
24085 for (i = 0; i < 25; i++)
24086 if ((val & (mask << i)) == val)
24087 return 1;
24089 return 0;
24092 /* Returns nonzero if the current function contains,
24093 or might contain a far jump. */
24094 static int
24095 thumb_far_jump_used_p (void)
24097 rtx_insn *insn;
24098 bool far_jump = false;
24099 unsigned int func_size = 0;
24101 /* If we have already decided that far jumps may be used,
24102 do not bother checking again, and always return true even if
24103 it turns out that they are not being used. Once we have made
24104 the decision that far jumps are present (and that hence the link
24105 register will be pushed onto the stack) we cannot go back on it. */
24106 if (cfun->machine->far_jump_used)
24107 return 1;
24109 /* If this function is not being called from the prologue/epilogue
24110 generation code then it must be being called from the
24111 INITIAL_ELIMINATION_OFFSET macro. */
24112 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
24114 /* In this case we know that we are being asked about the elimination
24115 of the arg pointer register. If that register is not being used,
24116 then there are no arguments on the stack, and we do not have to
24117 worry that a far jump might force the prologue to push the link
24118 register, changing the stack offsets. In this case we can just
24119 return false, since the presence of far jumps in the function will
24120 not affect stack offsets.
24122 If the arg pointer is live (or if it was live, but has now been
24123 eliminated and so set to dead) then we do have to test to see if
24124 the function might contain a far jump. This test can lead to some
24125 false negatives, since before reload is completed, then length of
24126 branch instructions is not known, so gcc defaults to returning their
24127 longest length, which in turn sets the far jump attribute to true.
24129 A false negative will not result in bad code being generated, but it
24130 will result in a needless push and pop of the link register. We
24131 hope that this does not occur too often.
24133 If we need doubleword stack alignment this could affect the other
24134 elimination offsets so we can't risk getting it wrong. */
24135 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
24136 cfun->machine->arg_pointer_live = 1;
24137 else if (!cfun->machine->arg_pointer_live)
24138 return 0;
24141 /* We should not change far_jump_used during or after reload, as there is
24142 no chance to change stack frame layout. */
24143 if (reload_in_progress || reload_completed)
24144 return 0;
24146 /* Check to see if the function contains a branch
24147 insn with the far jump attribute set. */
24148 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
24150 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
24152 far_jump = true;
24154 func_size += get_attr_length (insn);
24157 /* Attribute far_jump will always be true for thumb1 before
24158 shorten_branch pass. So checking far_jump attribute before
24159 shorten_branch isn't much useful.
24161 Following heuristic tries to estimate more accurately if a far jump
24162 may finally be used. The heuristic is very conservative as there is
24163 no chance to roll-back the decision of not to use far jump.
24165 Thumb1 long branch offset is -2048 to 2046. The worst case is each
24166 2-byte insn is associated with a 4 byte constant pool. Using
24167 function size 2048/3 as the threshold is conservative enough. */
24168 if (far_jump)
24170 if ((func_size * 3) >= 2048)
24172 /* Record the fact that we have decided that
24173 the function does use far jumps. */
24174 cfun->machine->far_jump_used = 1;
24175 return 1;
24179 return 0;
24182 /* Return nonzero if FUNC must be entered in ARM mode. */
24183 static bool
24184 is_called_in_ARM_mode (tree func)
24186 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
24188 /* Ignore the problem about functions whose address is taken. */
24189 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
24190 return true;
24192 #ifdef ARM_PE
24193 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
24194 #else
24195 return false;
24196 #endif
24199 /* Given the stack offsets and register mask in OFFSETS, decide how
24200 many additional registers to push instead of subtracting a constant
24201 from SP. For epilogues the principle is the same except we use pop.
24202 FOR_PROLOGUE indicates which we're generating. */
24203 static int
24204 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
24206 HOST_WIDE_INT amount;
24207 unsigned long live_regs_mask = offsets->saved_regs_mask;
24208 /* Extract a mask of the ones we can give to the Thumb's push/pop
24209 instruction. */
24210 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
24211 /* Then count how many other high registers will need to be pushed. */
24212 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24213 int n_free, reg_base, size;
24215 if (!for_prologue && frame_pointer_needed)
24216 amount = offsets->locals_base - offsets->saved_regs;
24217 else
24218 amount = offsets->outgoing_args - offsets->saved_regs;
24220 /* If the stack frame size is 512 exactly, we can save one load
24221 instruction, which should make this a win even when optimizing
24222 for speed. */
24223 if (!optimize_size && amount != 512)
24224 return 0;
24226 /* Can't do this if there are high registers to push. */
24227 if (high_regs_pushed != 0)
24228 return 0;
24230 /* Shouldn't do it in the prologue if no registers would normally
24231 be pushed at all. In the epilogue, also allow it if we'll have
24232 a pop insn for the PC. */
24233 if (l_mask == 0
24234 && (for_prologue
24235 || TARGET_BACKTRACE
24236 || (live_regs_mask & 1 << LR_REGNUM) == 0
24237 || TARGET_INTERWORK
24238 || crtl->args.pretend_args_size != 0))
24239 return 0;
24241 /* Don't do this if thumb_expand_prologue wants to emit instructions
24242 between the push and the stack frame allocation. */
24243 if (for_prologue
24244 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
24245 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
24246 return 0;
24248 reg_base = 0;
24249 n_free = 0;
24250 if (!for_prologue)
24252 size = arm_size_return_regs ();
24253 reg_base = ARM_NUM_INTS (size);
24254 live_regs_mask >>= reg_base;
24257 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
24258 && (for_prologue || call_used_regs[reg_base + n_free]))
24260 live_regs_mask >>= 1;
24261 n_free++;
24264 if (n_free == 0)
24265 return 0;
24266 gcc_assert (amount / 4 * 4 == amount);
24268 if (amount >= 512 && (amount - n_free * 4) < 512)
24269 return (amount - 508) / 4;
24270 if (amount <= n_free * 4)
24271 return amount / 4;
24272 return 0;
24275 /* The bits which aren't usefully expanded as rtl. */
24276 const char *
24277 thumb1_unexpanded_epilogue (void)
24279 arm_stack_offsets *offsets;
24280 int regno;
24281 unsigned long live_regs_mask = 0;
24282 int high_regs_pushed = 0;
24283 int extra_pop;
24284 int had_to_push_lr;
24285 int size;
24287 if (cfun->machine->return_used_this_function != 0)
24288 return "";
24290 if (IS_NAKED (arm_current_func_type ()))
24291 return "";
24293 offsets = arm_get_frame_offsets ();
24294 live_regs_mask = offsets->saved_regs_mask;
24295 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24297 /* If we can deduce the registers used from the function's return value.
24298 This is more reliable that examining df_regs_ever_live_p () because that
24299 will be set if the register is ever used in the function, not just if
24300 the register is used to hold a return value. */
24301 size = arm_size_return_regs ();
24303 extra_pop = thumb1_extra_regs_pushed (offsets, false);
24304 if (extra_pop > 0)
24306 unsigned long extra_mask = (1 << extra_pop) - 1;
24307 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
24310 /* The prolog may have pushed some high registers to use as
24311 work registers. e.g. the testsuite file:
24312 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24313 compiles to produce:
24314 push {r4, r5, r6, r7, lr}
24315 mov r7, r9
24316 mov r6, r8
24317 push {r6, r7}
24318 as part of the prolog. We have to undo that pushing here. */
24320 if (high_regs_pushed)
24322 unsigned long mask = live_regs_mask & 0xff;
24323 int next_hi_reg;
24325 /* The available low registers depend on the size of the value we are
24326 returning. */
24327 if (size <= 12)
24328 mask |= 1 << 3;
24329 if (size <= 8)
24330 mask |= 1 << 2;
24332 if (mask == 0)
24333 /* Oh dear! We have no low registers into which we can pop
24334 high registers! */
24335 internal_error
24336 ("no low registers available for popping high registers");
24338 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
24339 if (live_regs_mask & (1 << next_hi_reg))
24340 break;
24342 while (high_regs_pushed)
24344 /* Find lo register(s) into which the high register(s) can
24345 be popped. */
24346 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24348 if (mask & (1 << regno))
24349 high_regs_pushed--;
24350 if (high_regs_pushed == 0)
24351 break;
24354 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
24356 /* Pop the values into the low register(s). */
24357 thumb_pop (asm_out_file, mask);
24359 /* Move the value(s) into the high registers. */
24360 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24362 if (mask & (1 << regno))
24364 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
24365 regno);
24367 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
24368 if (live_regs_mask & (1 << next_hi_reg))
24369 break;
24373 live_regs_mask &= ~0x0f00;
24376 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
24377 live_regs_mask &= 0xff;
24379 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
24381 /* Pop the return address into the PC. */
24382 if (had_to_push_lr)
24383 live_regs_mask |= 1 << PC_REGNUM;
24385 /* Either no argument registers were pushed or a backtrace
24386 structure was created which includes an adjusted stack
24387 pointer, so just pop everything. */
24388 if (live_regs_mask)
24389 thumb_pop (asm_out_file, live_regs_mask);
24391 /* We have either just popped the return address into the
24392 PC or it is was kept in LR for the entire function.
24393 Note that thumb_pop has already called thumb_exit if the
24394 PC was in the list. */
24395 if (!had_to_push_lr)
24396 thumb_exit (asm_out_file, LR_REGNUM);
24398 else
24400 /* Pop everything but the return address. */
24401 if (live_regs_mask)
24402 thumb_pop (asm_out_file, live_regs_mask);
24404 if (had_to_push_lr)
24406 if (size > 12)
24408 /* We have no free low regs, so save one. */
24409 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
24410 LAST_ARG_REGNUM);
24413 /* Get the return address into a temporary register. */
24414 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
24416 if (size > 12)
24418 /* Move the return address to lr. */
24419 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
24420 LAST_ARG_REGNUM);
24421 /* Restore the low register. */
24422 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
24423 IP_REGNUM);
24424 regno = LR_REGNUM;
24426 else
24427 regno = LAST_ARG_REGNUM;
24429 else
24430 regno = LR_REGNUM;
24432 /* Remove the argument registers that were pushed onto the stack. */
24433 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
24434 SP_REGNUM, SP_REGNUM,
24435 crtl->args.pretend_args_size);
24437 thumb_exit (asm_out_file, regno);
24440 return "";
24443 /* Functions to save and restore machine-specific function data. */
24444 static struct machine_function *
24445 arm_init_machine_status (void)
24447 struct machine_function *machine;
24448 machine = ggc_cleared_alloc<machine_function> ();
24450 #if ARM_FT_UNKNOWN != 0
24451 machine->func_type = ARM_FT_UNKNOWN;
24452 #endif
24453 return machine;
24456 /* Return an RTX indicating where the return address to the
24457 calling function can be found. */
24459 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
24461 if (count != 0)
24462 return NULL_RTX;
24464 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
24467 /* Do anything needed before RTL is emitted for each function. */
24468 void
24469 arm_init_expanders (void)
24471 /* Arrange to initialize and mark the machine per-function status. */
24472 init_machine_status = arm_init_machine_status;
24474 /* This is to stop the combine pass optimizing away the alignment
24475 adjustment of va_arg. */
24476 /* ??? It is claimed that this should not be necessary. */
24477 if (cfun)
24478 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
24481 /* Check that FUNC is called with a different mode. */
24483 bool
24484 arm_change_mode_p (tree func)
24486 if (TREE_CODE (func) != FUNCTION_DECL)
24487 return false;
24489 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
24491 if (!callee_tree)
24492 callee_tree = target_option_default_node;
24494 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
24495 int flags = callee_opts->x_target_flags;
24497 return (TARGET_THUMB_P (flags) != TARGET_THUMB);
24500 /* Like arm_compute_initial_elimination offset. Simpler because there
24501 isn't an ABI specified frame pointer for Thumb. Instead, we set it
24502 to point at the base of the local variables after static stack
24503 space for a function has been allocated. */
24505 HOST_WIDE_INT
24506 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
24508 arm_stack_offsets *offsets;
24510 offsets = arm_get_frame_offsets ();
24512 switch (from)
24514 case ARG_POINTER_REGNUM:
24515 switch (to)
24517 case STACK_POINTER_REGNUM:
24518 return offsets->outgoing_args - offsets->saved_args;
24520 case FRAME_POINTER_REGNUM:
24521 return offsets->soft_frame - offsets->saved_args;
24523 case ARM_HARD_FRAME_POINTER_REGNUM:
24524 return offsets->saved_regs - offsets->saved_args;
24526 case THUMB_HARD_FRAME_POINTER_REGNUM:
24527 return offsets->locals_base - offsets->saved_args;
24529 default:
24530 gcc_unreachable ();
24532 break;
24534 case FRAME_POINTER_REGNUM:
24535 switch (to)
24537 case STACK_POINTER_REGNUM:
24538 return offsets->outgoing_args - offsets->soft_frame;
24540 case ARM_HARD_FRAME_POINTER_REGNUM:
24541 return offsets->saved_regs - offsets->soft_frame;
24543 case THUMB_HARD_FRAME_POINTER_REGNUM:
24544 return offsets->locals_base - offsets->soft_frame;
24546 default:
24547 gcc_unreachable ();
24549 break;
24551 default:
24552 gcc_unreachable ();
24556 /* Generate the function's prologue. */
24558 void
24559 thumb1_expand_prologue (void)
24561 rtx_insn *insn;
24563 HOST_WIDE_INT amount;
24564 HOST_WIDE_INT size;
24565 arm_stack_offsets *offsets;
24566 unsigned long func_type;
24567 int regno;
24568 unsigned long live_regs_mask;
24569 unsigned long l_mask;
24570 unsigned high_regs_pushed = 0;
24571 bool lr_needs_saving;
24573 func_type = arm_current_func_type ();
24575 /* Naked functions don't have prologues. */
24576 if (IS_NAKED (func_type))
24578 if (flag_stack_usage_info)
24579 current_function_static_stack_size = 0;
24580 return;
24583 if (IS_INTERRUPT (func_type))
24585 error ("interrupt Service Routines cannot be coded in Thumb mode");
24586 return;
24589 if (is_called_in_ARM_mode (current_function_decl))
24590 emit_insn (gen_prologue_thumb1_interwork ());
24592 offsets = arm_get_frame_offsets ();
24593 live_regs_mask = offsets->saved_regs_mask;
24594 lr_needs_saving = live_regs_mask & (1 << LR_REGNUM);
24596 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
24597 l_mask = live_regs_mask & 0x40ff;
24598 /* Then count how many other high registers will need to be pushed. */
24599 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24601 if (crtl->args.pretend_args_size)
24603 rtx x = GEN_INT (-crtl->args.pretend_args_size);
24605 if (cfun->machine->uses_anonymous_args)
24607 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
24608 unsigned long mask;
24610 mask = 1ul << (LAST_ARG_REGNUM + 1);
24611 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
24613 insn = thumb1_emit_multi_reg_push (mask, 0);
24615 else
24617 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24618 stack_pointer_rtx, x));
24620 RTX_FRAME_RELATED_P (insn) = 1;
24623 if (TARGET_BACKTRACE)
24625 HOST_WIDE_INT offset = 0;
24626 unsigned work_register;
24627 rtx work_reg, x, arm_hfp_rtx;
24629 /* We have been asked to create a stack backtrace structure.
24630 The code looks like this:
24632 0 .align 2
24633 0 func:
24634 0 sub SP, #16 Reserve space for 4 registers.
24635 2 push {R7} Push low registers.
24636 4 add R7, SP, #20 Get the stack pointer before the push.
24637 6 str R7, [SP, #8] Store the stack pointer
24638 (before reserving the space).
24639 8 mov R7, PC Get hold of the start of this code + 12.
24640 10 str R7, [SP, #16] Store it.
24641 12 mov R7, FP Get hold of the current frame pointer.
24642 14 str R7, [SP, #4] Store it.
24643 16 mov R7, LR Get hold of the current return address.
24644 18 str R7, [SP, #12] Store it.
24645 20 add R7, SP, #16 Point at the start of the
24646 backtrace structure.
24647 22 mov FP, R7 Put this value into the frame pointer. */
24649 work_register = thumb_find_work_register (live_regs_mask);
24650 work_reg = gen_rtx_REG (SImode, work_register);
24651 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
24653 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24654 stack_pointer_rtx, GEN_INT (-16)));
24655 RTX_FRAME_RELATED_P (insn) = 1;
24657 if (l_mask)
24659 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
24660 RTX_FRAME_RELATED_P (insn) = 1;
24661 lr_needs_saving = false;
24663 offset = bit_count (l_mask) * UNITS_PER_WORD;
24666 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
24667 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24669 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
24670 x = gen_frame_mem (SImode, x);
24671 emit_move_insn (x, work_reg);
24673 /* Make sure that the instruction fetching the PC is in the right place
24674 to calculate "start of backtrace creation code + 12". */
24675 /* ??? The stores using the common WORK_REG ought to be enough to
24676 prevent the scheduler from doing anything weird. Failing that
24677 we could always move all of the following into an UNSPEC_VOLATILE. */
24678 if (l_mask)
24680 x = gen_rtx_REG (SImode, PC_REGNUM);
24681 emit_move_insn (work_reg, x);
24683 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24684 x = gen_frame_mem (SImode, x);
24685 emit_move_insn (x, work_reg);
24687 emit_move_insn (work_reg, arm_hfp_rtx);
24689 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24690 x = gen_frame_mem (SImode, x);
24691 emit_move_insn (x, work_reg);
24693 else
24695 emit_move_insn (work_reg, arm_hfp_rtx);
24697 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24698 x = gen_frame_mem (SImode, x);
24699 emit_move_insn (x, work_reg);
24701 x = gen_rtx_REG (SImode, PC_REGNUM);
24702 emit_move_insn (work_reg, x);
24704 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24705 x = gen_frame_mem (SImode, x);
24706 emit_move_insn (x, work_reg);
24709 x = gen_rtx_REG (SImode, LR_REGNUM);
24710 emit_move_insn (work_reg, x);
24712 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
24713 x = gen_frame_mem (SImode, x);
24714 emit_move_insn (x, work_reg);
24716 x = GEN_INT (offset + 12);
24717 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24719 emit_move_insn (arm_hfp_rtx, work_reg);
24721 /* Optimization: If we are not pushing any low registers but we are going
24722 to push some high registers then delay our first push. This will just
24723 be a push of LR and we can combine it with the push of the first high
24724 register. */
24725 else if ((l_mask & 0xff) != 0
24726 || (high_regs_pushed == 0 && lr_needs_saving))
24728 unsigned long mask = l_mask;
24729 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
24730 insn = thumb1_emit_multi_reg_push (mask, mask);
24731 RTX_FRAME_RELATED_P (insn) = 1;
24732 lr_needs_saving = false;
24735 if (high_regs_pushed)
24737 unsigned pushable_regs;
24738 unsigned next_hi_reg;
24739 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
24740 : crtl->args.info.nregs;
24741 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
24743 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
24744 if (live_regs_mask & (1 << next_hi_reg))
24745 break;
24747 /* Here we need to mask out registers used for passing arguments
24748 even if they can be pushed. This is to avoid using them to stash the high
24749 registers. Such kind of stash may clobber the use of arguments. */
24750 pushable_regs = l_mask & (~arg_regs_mask);
24751 if (lr_needs_saving)
24752 pushable_regs &= ~(1 << LR_REGNUM);
24754 if (pushable_regs == 0)
24755 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
24757 while (high_regs_pushed > 0)
24759 unsigned long real_regs_mask = 0;
24760 unsigned long push_mask = 0;
24762 for (regno = LR_REGNUM; regno >= 0; regno --)
24764 if (pushable_regs & (1 << regno))
24766 emit_move_insn (gen_rtx_REG (SImode, regno),
24767 gen_rtx_REG (SImode, next_hi_reg));
24769 high_regs_pushed --;
24770 real_regs_mask |= (1 << next_hi_reg);
24771 push_mask |= (1 << regno);
24773 if (high_regs_pushed)
24775 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
24776 next_hi_reg --)
24777 if (live_regs_mask & (1 << next_hi_reg))
24778 break;
24780 else
24781 break;
24785 /* If we had to find a work register and we have not yet
24786 saved the LR then add it to the list of regs to push. */
24787 if (lr_needs_saving)
24789 push_mask |= 1 << LR_REGNUM;
24790 real_regs_mask |= 1 << LR_REGNUM;
24791 lr_needs_saving = false;
24794 insn = thumb1_emit_multi_reg_push (push_mask, real_regs_mask);
24795 RTX_FRAME_RELATED_P (insn) = 1;
24799 /* Load the pic register before setting the frame pointer,
24800 so we can use r7 as a temporary work register. */
24801 if (flag_pic && arm_pic_register != INVALID_REGNUM)
24802 arm_load_pic_register (live_regs_mask);
24804 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
24805 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
24806 stack_pointer_rtx);
24808 size = offsets->outgoing_args - offsets->saved_args;
24809 if (flag_stack_usage_info)
24810 current_function_static_stack_size = size;
24812 /* If we have a frame, then do stack checking. FIXME: not implemented. */
24813 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK && size)
24814 sorry ("-fstack-check=specific for Thumb-1");
24816 amount = offsets->outgoing_args - offsets->saved_regs;
24817 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
24818 if (amount)
24820 if (amount < 512)
24822 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
24823 GEN_INT (- amount)));
24824 RTX_FRAME_RELATED_P (insn) = 1;
24826 else
24828 rtx reg, dwarf;
24830 /* The stack decrement is too big for an immediate value in a single
24831 insn. In theory we could issue multiple subtracts, but after
24832 three of them it becomes more space efficient to place the full
24833 value in the constant pool and load into a register. (Also the
24834 ARM debugger really likes to see only one stack decrement per
24835 function). So instead we look for a scratch register into which
24836 we can load the decrement, and then we subtract this from the
24837 stack pointer. Unfortunately on the thumb the only available
24838 scratch registers are the argument registers, and we cannot use
24839 these as they may hold arguments to the function. Instead we
24840 attempt to locate a call preserved register which is used by this
24841 function. If we can find one, then we know that it will have
24842 been pushed at the start of the prologue and so we can corrupt
24843 it now. */
24844 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
24845 if (live_regs_mask & (1 << regno))
24846 break;
24848 gcc_assert(regno <= LAST_LO_REGNUM);
24850 reg = gen_rtx_REG (SImode, regno);
24852 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
24854 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24855 stack_pointer_rtx, reg));
24857 dwarf = gen_rtx_SET (stack_pointer_rtx,
24858 plus_constant (Pmode, stack_pointer_rtx,
24859 -amount));
24860 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
24861 RTX_FRAME_RELATED_P (insn) = 1;
24865 if (frame_pointer_needed)
24866 thumb_set_frame_pointer (offsets);
24868 /* If we are profiling, make sure no instructions are scheduled before
24869 the call to mcount. Similarly if the user has requested no
24870 scheduling in the prolog. Similarly if we want non-call exceptions
24871 using the EABI unwinder, to prevent faulting instructions from being
24872 swapped with a stack adjustment. */
24873 if (crtl->profile || !TARGET_SCHED_PROLOG
24874 || (arm_except_unwind_info (&global_options) == UI_TARGET
24875 && cfun->can_throw_non_call_exceptions))
24876 emit_insn (gen_blockage ());
24878 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
24879 if (live_regs_mask & 0xff)
24880 cfun->machine->lr_save_eliminated = 0;
24883 /* Clear caller saved registers not used to pass return values and leaked
24884 condition flags before exiting a cmse_nonsecure_entry function. */
24886 void
24887 cmse_nonsecure_entry_clear_before_return (void)
24889 uint64_t to_clear_mask[2];
24890 uint32_t padding_bits_to_clear = 0;
24891 uint32_t * padding_bits_to_clear_ptr = &padding_bits_to_clear;
24892 int regno, maxregno = IP_REGNUM;
24893 tree result_type;
24894 rtx result_rtl;
24896 to_clear_mask[0] = (1ULL << (NUM_ARG_REGS)) - 1;
24897 to_clear_mask[0] |= (1ULL << IP_REGNUM);
24899 /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
24900 registers. We also check that TARGET_HARD_FLOAT and !TARGET_THUMB1 hold
24901 to make sure the instructions used to clear them are present. */
24902 if (TARGET_HARD_FLOAT && !TARGET_THUMB1)
24904 uint64_t float_mask = (1ULL << (D7_VFP_REGNUM + 1)) - 1;
24905 maxregno = LAST_VFP_REGNUM;
24907 float_mask &= ~((1ULL << FIRST_VFP_REGNUM) - 1);
24908 to_clear_mask[0] |= float_mask;
24910 float_mask = (1ULL << (maxregno - 63)) - 1;
24911 to_clear_mask[1] = float_mask;
24913 /* Make sure we don't clear the two scratch registers used to clear the
24914 relevant FPSCR bits in output_return_instruction. */
24915 emit_use (gen_rtx_REG (SImode, IP_REGNUM));
24916 to_clear_mask[0] &= ~(1ULL << IP_REGNUM);
24917 emit_use (gen_rtx_REG (SImode, 4));
24918 to_clear_mask[0] &= ~(1ULL << 4);
24921 /* If the user has defined registers to be caller saved, these are no longer
24922 restored by the function before returning and must thus be cleared for
24923 security purposes. */
24924 for (regno = NUM_ARG_REGS; regno < LAST_VFP_REGNUM; regno++)
24926 /* We do not touch registers that can be used to pass arguments as per
24927 the AAPCS, since these should never be made callee-saved by user
24928 options. */
24929 if (IN_RANGE (regno, FIRST_VFP_REGNUM, D7_VFP_REGNUM))
24930 continue;
24931 if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM))
24932 continue;
24933 if (call_used_regs[regno])
24934 to_clear_mask[regno / 64] |= (1ULL << (regno % 64));
24937 /* Make sure we do not clear the registers used to return the result in. */
24938 result_type = TREE_TYPE (DECL_RESULT (current_function_decl));
24939 if (!VOID_TYPE_P (result_type))
24941 result_rtl = arm_function_value (result_type, current_function_decl, 0);
24943 /* No need to check that we return in registers, because we don't
24944 support returning on stack yet. */
24945 to_clear_mask[0]
24946 &= ~compute_not_to_clear_mask (result_type, result_rtl, 0,
24947 padding_bits_to_clear_ptr);
24950 if (padding_bits_to_clear != 0)
24952 rtx reg_rtx;
24953 /* Padding bits to clear is not 0 so we know we are dealing with
24954 returning a composite type, which only uses r0. Let's make sure that
24955 r1-r3 is cleared too, we will use r1 as a scratch register. */
24956 gcc_assert ((to_clear_mask[0] & 0xe) == 0xe);
24958 reg_rtx = gen_rtx_REG (SImode, R1_REGNUM);
24960 /* Fill the lower half of the negated padding_bits_to_clear. */
24961 emit_move_insn (reg_rtx,
24962 GEN_INT ((((~padding_bits_to_clear) << 16u) >> 16u)));
24964 /* Also fill the top half of the negated padding_bits_to_clear. */
24965 if (((~padding_bits_to_clear) >> 16) > 0)
24966 emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode, reg_rtx,
24967 GEN_INT (16),
24968 GEN_INT (16)),
24969 GEN_INT ((~padding_bits_to_clear) >> 16)));
24971 emit_insn (gen_andsi3 (gen_rtx_REG (SImode, R0_REGNUM),
24972 gen_rtx_REG (SImode, R0_REGNUM),
24973 reg_rtx));
24976 for (regno = R0_REGNUM; regno <= maxregno; regno++)
24978 if (!(to_clear_mask[regno / 64] & (1ULL << (regno % 64))))
24979 continue;
24981 if (IS_VFP_REGNUM (regno))
24983 /* If regno is an even vfp register and its successor is also to
24984 be cleared, use vmov. */
24985 if (TARGET_VFP_DOUBLE
24986 && VFP_REGNO_OK_FOR_DOUBLE (regno)
24987 && to_clear_mask[regno / 64] & (1ULL << ((regno % 64) + 1)))
24989 emit_move_insn (gen_rtx_REG (DFmode, regno),
24990 CONST1_RTX (DFmode));
24991 emit_use (gen_rtx_REG (DFmode, regno));
24992 regno++;
24994 else
24996 emit_move_insn (gen_rtx_REG (SFmode, regno),
24997 CONST1_RTX (SFmode));
24998 emit_use (gen_rtx_REG (SFmode, regno));
25001 else
25003 if (TARGET_THUMB1)
25005 if (regno == R0_REGNUM)
25006 emit_move_insn (gen_rtx_REG (SImode, regno),
25007 const0_rtx);
25008 else
25009 /* R0 has either been cleared before, see code above, or it
25010 holds a return value, either way it is not secret
25011 information. */
25012 emit_move_insn (gen_rtx_REG (SImode, regno),
25013 gen_rtx_REG (SImode, R0_REGNUM));
25014 emit_use (gen_rtx_REG (SImode, regno));
25016 else
25018 emit_move_insn (gen_rtx_REG (SImode, regno),
25019 gen_rtx_REG (SImode, LR_REGNUM));
25020 emit_use (gen_rtx_REG (SImode, regno));
25026 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
25027 POP instruction can be generated. LR should be replaced by PC. All
25028 the checks required are already done by USE_RETURN_INSN (). Hence,
25029 all we really need to check here is if single register is to be
25030 returned, or multiple register return. */
25031 void
25032 thumb2_expand_return (bool simple_return)
25034 int i, num_regs;
25035 unsigned long saved_regs_mask;
25036 arm_stack_offsets *offsets;
25038 offsets = arm_get_frame_offsets ();
25039 saved_regs_mask = offsets->saved_regs_mask;
25041 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
25042 if (saved_regs_mask & (1 << i))
25043 num_regs++;
25045 if (!simple_return && saved_regs_mask)
25047 /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
25048 functions or adapt code to handle according to ACLE. This path should
25049 not be reachable for cmse_nonsecure_entry functions though we prefer
25050 to assert it for now to ensure that future code changes do not silently
25051 change this behavior. */
25052 gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
25053 if (num_regs == 1)
25055 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25056 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
25057 rtx addr = gen_rtx_MEM (SImode,
25058 gen_rtx_POST_INC (SImode,
25059 stack_pointer_rtx));
25060 set_mem_alias_set (addr, get_frame_alias_set ());
25061 XVECEXP (par, 0, 0) = ret_rtx;
25062 XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
25063 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
25064 emit_jump_insn (par);
25066 else
25068 saved_regs_mask &= ~ (1 << LR_REGNUM);
25069 saved_regs_mask |= (1 << PC_REGNUM);
25070 arm_emit_multi_reg_pop (saved_regs_mask);
25073 else
25075 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25076 cmse_nonsecure_entry_clear_before_return ();
25077 emit_jump_insn (simple_return_rtx);
25081 void
25082 thumb1_expand_epilogue (void)
25084 HOST_WIDE_INT amount;
25085 arm_stack_offsets *offsets;
25086 int regno;
25088 /* Naked functions don't have prologues. */
25089 if (IS_NAKED (arm_current_func_type ()))
25090 return;
25092 offsets = arm_get_frame_offsets ();
25093 amount = offsets->outgoing_args - offsets->saved_regs;
25095 if (frame_pointer_needed)
25097 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
25098 amount = offsets->locals_base - offsets->saved_regs;
25100 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
25102 gcc_assert (amount >= 0);
25103 if (amount)
25105 emit_insn (gen_blockage ());
25107 if (amount < 512)
25108 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25109 GEN_INT (amount)));
25110 else
25112 /* r3 is always free in the epilogue. */
25113 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
25115 emit_insn (gen_movsi (reg, GEN_INT (amount)));
25116 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
25120 /* Emit a USE (stack_pointer_rtx), so that
25121 the stack adjustment will not be deleted. */
25122 emit_insn (gen_force_register_use (stack_pointer_rtx));
25124 if (crtl->profile || !TARGET_SCHED_PROLOG)
25125 emit_insn (gen_blockage ());
25127 /* Emit a clobber for each insn that will be restored in the epilogue,
25128 so that flow2 will get register lifetimes correct. */
25129 for (regno = 0; regno < 13; regno++)
25130 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
25131 emit_clobber (gen_rtx_REG (SImode, regno));
25133 if (! df_regs_ever_live_p (LR_REGNUM))
25134 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
25136 /* Clear all caller-saved regs that are not used to return. */
25137 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25138 cmse_nonsecure_entry_clear_before_return ();
25141 /* Epilogue code for APCS frame. */
25142 static void
25143 arm_expand_epilogue_apcs_frame (bool really_return)
25145 unsigned long func_type;
25146 unsigned long saved_regs_mask;
25147 int num_regs = 0;
25148 int i;
25149 int floats_from_frame = 0;
25150 arm_stack_offsets *offsets;
25152 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
25153 func_type = arm_current_func_type ();
25155 /* Get frame offsets for ARM. */
25156 offsets = arm_get_frame_offsets ();
25157 saved_regs_mask = offsets->saved_regs_mask;
25159 /* Find the offset of the floating-point save area in the frame. */
25160 floats_from_frame
25161 = (offsets->saved_args
25162 + arm_compute_static_chain_stack_bytes ()
25163 - offsets->frame);
25165 /* Compute how many core registers saved and how far away the floats are. */
25166 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25167 if (saved_regs_mask & (1 << i))
25169 num_regs++;
25170 floats_from_frame += 4;
25173 if (TARGET_HARD_FLOAT)
25175 int start_reg;
25176 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
25178 /* The offset is from IP_REGNUM. */
25179 int saved_size = arm_get_vfp_saved_size ();
25180 if (saved_size > 0)
25182 rtx_insn *insn;
25183 floats_from_frame += saved_size;
25184 insn = emit_insn (gen_addsi3 (ip_rtx,
25185 hard_frame_pointer_rtx,
25186 GEN_INT (-floats_from_frame)));
25187 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
25188 ip_rtx, hard_frame_pointer_rtx);
25191 /* Generate VFP register multi-pop. */
25192 start_reg = FIRST_VFP_REGNUM;
25194 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
25195 /* Look for a case where a reg does not need restoring. */
25196 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25197 && (!df_regs_ever_live_p (i + 1)
25198 || call_used_regs[i + 1]))
25200 if (start_reg != i)
25201 arm_emit_vfp_multi_reg_pop (start_reg,
25202 (i - start_reg) / 2,
25203 gen_rtx_REG (SImode,
25204 IP_REGNUM));
25205 start_reg = i + 2;
25208 /* Restore the remaining regs that we have discovered (or possibly
25209 even all of them, if the conditional in the for loop never
25210 fired). */
25211 if (start_reg != i)
25212 arm_emit_vfp_multi_reg_pop (start_reg,
25213 (i - start_reg) / 2,
25214 gen_rtx_REG (SImode, IP_REGNUM));
25217 if (TARGET_IWMMXT)
25219 /* The frame pointer is guaranteed to be non-double-word aligned, as
25220 it is set to double-word-aligned old_stack_pointer - 4. */
25221 rtx_insn *insn;
25222 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
25224 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
25225 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25227 rtx addr = gen_frame_mem (V2SImode,
25228 plus_constant (Pmode, hard_frame_pointer_rtx,
25229 - lrm_count * 4));
25230 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25231 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25232 gen_rtx_REG (V2SImode, i),
25233 NULL_RTX);
25234 lrm_count += 2;
25238 /* saved_regs_mask should contain IP which contains old stack pointer
25239 at the time of activation creation. Since SP and IP are adjacent registers,
25240 we can restore the value directly into SP. */
25241 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
25242 saved_regs_mask &= ~(1 << IP_REGNUM);
25243 saved_regs_mask |= (1 << SP_REGNUM);
25245 /* There are two registers left in saved_regs_mask - LR and PC. We
25246 only need to restore LR (the return address), but to
25247 save time we can load it directly into PC, unless we need a
25248 special function exit sequence, or we are not really returning. */
25249 if (really_return
25250 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
25251 && !crtl->calls_eh_return)
25252 /* Delete LR from the register mask, so that LR on
25253 the stack is loaded into the PC in the register mask. */
25254 saved_regs_mask &= ~(1 << LR_REGNUM);
25255 else
25256 saved_regs_mask &= ~(1 << PC_REGNUM);
25258 num_regs = bit_count (saved_regs_mask);
25259 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
25261 rtx_insn *insn;
25262 emit_insn (gen_blockage ());
25263 /* Unwind the stack to just below the saved registers. */
25264 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25265 hard_frame_pointer_rtx,
25266 GEN_INT (- 4 * num_regs)));
25268 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
25269 stack_pointer_rtx, hard_frame_pointer_rtx);
25272 arm_emit_multi_reg_pop (saved_regs_mask);
25274 if (IS_INTERRUPT (func_type))
25276 /* Interrupt handlers will have pushed the
25277 IP onto the stack, so restore it now. */
25278 rtx_insn *insn;
25279 rtx addr = gen_rtx_MEM (SImode,
25280 gen_rtx_POST_INC (SImode,
25281 stack_pointer_rtx));
25282 set_mem_alias_set (addr, get_frame_alias_set ());
25283 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
25284 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25285 gen_rtx_REG (SImode, IP_REGNUM),
25286 NULL_RTX);
25289 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
25290 return;
25292 if (crtl->calls_eh_return)
25293 emit_insn (gen_addsi3 (stack_pointer_rtx,
25294 stack_pointer_rtx,
25295 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25297 if (IS_STACKALIGN (func_type))
25298 /* Restore the original stack pointer. Before prologue, the stack was
25299 realigned and the original stack pointer saved in r0. For details,
25300 see comment in arm_expand_prologue. */
25301 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25303 emit_jump_insn (simple_return_rtx);
25306 /* Generate RTL to represent ARM epilogue. Really_return is true if the
25307 function is not a sibcall. */
25308 void
25309 arm_expand_epilogue (bool really_return)
25311 unsigned long func_type;
25312 unsigned long saved_regs_mask;
25313 int num_regs = 0;
25314 int i;
25315 int amount;
25316 arm_stack_offsets *offsets;
25318 func_type = arm_current_func_type ();
25320 /* Naked functions don't have epilogue. Hence, generate return pattern, and
25321 let output_return_instruction take care of instruction emission if any. */
25322 if (IS_NAKED (func_type)
25323 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
25325 if (really_return)
25326 emit_jump_insn (simple_return_rtx);
25327 return;
25330 /* If we are throwing an exception, then we really must be doing a
25331 return, so we can't tail-call. */
25332 gcc_assert (!crtl->calls_eh_return || really_return);
25334 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
25336 arm_expand_epilogue_apcs_frame (really_return);
25337 return;
25340 /* Get frame offsets for ARM. */
25341 offsets = arm_get_frame_offsets ();
25342 saved_regs_mask = offsets->saved_regs_mask;
25343 num_regs = bit_count (saved_regs_mask);
25345 if (frame_pointer_needed)
25347 rtx_insn *insn;
25348 /* Restore stack pointer if necessary. */
25349 if (TARGET_ARM)
25351 /* In ARM mode, frame pointer points to first saved register.
25352 Restore stack pointer to last saved register. */
25353 amount = offsets->frame - offsets->saved_regs;
25355 /* Force out any pending memory operations that reference stacked data
25356 before stack de-allocation occurs. */
25357 emit_insn (gen_blockage ());
25358 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25359 hard_frame_pointer_rtx,
25360 GEN_INT (amount)));
25361 arm_add_cfa_adjust_cfa_note (insn, amount,
25362 stack_pointer_rtx,
25363 hard_frame_pointer_rtx);
25365 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25366 deleted. */
25367 emit_insn (gen_force_register_use (stack_pointer_rtx));
25369 else
25371 /* In Thumb-2 mode, the frame pointer points to the last saved
25372 register. */
25373 amount = offsets->locals_base - offsets->saved_regs;
25374 if (amount)
25376 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
25377 hard_frame_pointer_rtx,
25378 GEN_INT (amount)));
25379 arm_add_cfa_adjust_cfa_note (insn, amount,
25380 hard_frame_pointer_rtx,
25381 hard_frame_pointer_rtx);
25384 /* Force out any pending memory operations that reference stacked data
25385 before stack de-allocation occurs. */
25386 emit_insn (gen_blockage ());
25387 insn = emit_insn (gen_movsi (stack_pointer_rtx,
25388 hard_frame_pointer_rtx));
25389 arm_add_cfa_adjust_cfa_note (insn, 0,
25390 stack_pointer_rtx,
25391 hard_frame_pointer_rtx);
25392 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25393 deleted. */
25394 emit_insn (gen_force_register_use (stack_pointer_rtx));
25397 else
25399 /* Pop off outgoing args and local frame to adjust stack pointer to
25400 last saved register. */
25401 amount = offsets->outgoing_args - offsets->saved_regs;
25402 if (amount)
25404 rtx_insn *tmp;
25405 /* Force out any pending memory operations that reference stacked data
25406 before stack de-allocation occurs. */
25407 emit_insn (gen_blockage ());
25408 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
25409 stack_pointer_rtx,
25410 GEN_INT (amount)));
25411 arm_add_cfa_adjust_cfa_note (tmp, amount,
25412 stack_pointer_rtx, stack_pointer_rtx);
25413 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25414 not deleted. */
25415 emit_insn (gen_force_register_use (stack_pointer_rtx));
25419 if (TARGET_HARD_FLOAT)
25421 /* Generate VFP register multi-pop. */
25422 int end_reg = LAST_VFP_REGNUM + 1;
25424 /* Scan the registers in reverse order. We need to match
25425 any groupings made in the prologue and generate matching
25426 vldm operations. The need to match groups is because,
25427 unlike pop, vldm can only do consecutive regs. */
25428 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
25429 /* Look for a case where a reg does not need restoring. */
25430 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25431 && (!df_regs_ever_live_p (i + 1)
25432 || call_used_regs[i + 1]))
25434 /* Restore the regs discovered so far (from reg+2 to
25435 end_reg). */
25436 if (end_reg > i + 2)
25437 arm_emit_vfp_multi_reg_pop (i + 2,
25438 (end_reg - (i + 2)) / 2,
25439 stack_pointer_rtx);
25440 end_reg = i;
25443 /* Restore the remaining regs that we have discovered (or possibly
25444 even all of them, if the conditional in the for loop never
25445 fired). */
25446 if (end_reg > i + 2)
25447 arm_emit_vfp_multi_reg_pop (i + 2,
25448 (end_reg - (i + 2)) / 2,
25449 stack_pointer_rtx);
25452 if (TARGET_IWMMXT)
25453 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
25454 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25456 rtx_insn *insn;
25457 rtx addr = gen_rtx_MEM (V2SImode,
25458 gen_rtx_POST_INC (SImode,
25459 stack_pointer_rtx));
25460 set_mem_alias_set (addr, get_frame_alias_set ());
25461 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25462 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25463 gen_rtx_REG (V2SImode, i),
25464 NULL_RTX);
25465 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25466 stack_pointer_rtx, stack_pointer_rtx);
25469 if (saved_regs_mask)
25471 rtx insn;
25472 bool return_in_pc = false;
25474 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
25475 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
25476 && !IS_CMSE_ENTRY (func_type)
25477 && !IS_STACKALIGN (func_type)
25478 && really_return
25479 && crtl->args.pretend_args_size == 0
25480 && saved_regs_mask & (1 << LR_REGNUM)
25481 && !crtl->calls_eh_return)
25483 saved_regs_mask &= ~(1 << LR_REGNUM);
25484 saved_regs_mask |= (1 << PC_REGNUM);
25485 return_in_pc = true;
25488 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
25490 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25491 if (saved_regs_mask & (1 << i))
25493 rtx addr = gen_rtx_MEM (SImode,
25494 gen_rtx_POST_INC (SImode,
25495 stack_pointer_rtx));
25496 set_mem_alias_set (addr, get_frame_alias_set ());
25498 if (i == PC_REGNUM)
25500 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25501 XVECEXP (insn, 0, 0) = ret_rtx;
25502 XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
25503 addr);
25504 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
25505 insn = emit_jump_insn (insn);
25507 else
25509 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
25510 addr));
25511 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25512 gen_rtx_REG (SImode, i),
25513 NULL_RTX);
25514 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25515 stack_pointer_rtx,
25516 stack_pointer_rtx);
25520 else
25522 if (TARGET_LDRD
25523 && current_tune->prefer_ldrd_strd
25524 && !optimize_function_for_size_p (cfun))
25526 if (TARGET_THUMB2)
25527 thumb2_emit_ldrd_pop (saved_regs_mask);
25528 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
25529 arm_emit_ldrd_pop (saved_regs_mask);
25530 else
25531 arm_emit_multi_reg_pop (saved_regs_mask);
25533 else
25534 arm_emit_multi_reg_pop (saved_regs_mask);
25537 if (return_in_pc)
25538 return;
25541 amount
25542 = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
25543 if (amount)
25545 int i, j;
25546 rtx dwarf = NULL_RTX;
25547 rtx_insn *tmp =
25548 emit_insn (gen_addsi3 (stack_pointer_rtx,
25549 stack_pointer_rtx,
25550 GEN_INT (amount)));
25552 RTX_FRAME_RELATED_P (tmp) = 1;
25554 if (cfun->machine->uses_anonymous_args)
25556 /* Restore pretend args. Refer arm_expand_prologue on how to save
25557 pretend_args in stack. */
25558 int num_regs = crtl->args.pretend_args_size / 4;
25559 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
25560 for (j = 0, i = 0; j < num_regs; i++)
25561 if (saved_regs_mask & (1 << i))
25563 rtx reg = gen_rtx_REG (SImode, i);
25564 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
25565 j++;
25567 REG_NOTES (tmp) = dwarf;
25569 arm_add_cfa_adjust_cfa_note (tmp, amount,
25570 stack_pointer_rtx, stack_pointer_rtx);
25573 /* Clear all caller-saved regs that are not used to return. */
25574 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25576 /* CMSE_ENTRY always returns. */
25577 gcc_assert (really_return);
25578 cmse_nonsecure_entry_clear_before_return ();
25581 if (!really_return)
25582 return;
25584 if (crtl->calls_eh_return)
25585 emit_insn (gen_addsi3 (stack_pointer_rtx,
25586 stack_pointer_rtx,
25587 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25589 if (IS_STACKALIGN (func_type))
25590 /* Restore the original stack pointer. Before prologue, the stack was
25591 realigned and the original stack pointer saved in r0. For details,
25592 see comment in arm_expand_prologue. */
25593 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25595 emit_jump_insn (simple_return_rtx);
25598 /* Implementation of insn prologue_thumb1_interwork. This is the first
25599 "instruction" of a function called in ARM mode. Swap to thumb mode. */
25601 const char *
25602 thumb1_output_interwork (void)
25604 const char * name;
25605 FILE *f = asm_out_file;
25607 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
25608 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
25609 == SYMBOL_REF);
25610 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25612 /* Generate code sequence to switch us into Thumb mode. */
25613 /* The .code 32 directive has already been emitted by
25614 ASM_DECLARE_FUNCTION_NAME. */
25615 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
25616 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
25618 /* Generate a label, so that the debugger will notice the
25619 change in instruction sets. This label is also used by
25620 the assembler to bypass the ARM code when this function
25621 is called from a Thumb encoded function elsewhere in the
25622 same file. Hence the definition of STUB_NAME here must
25623 agree with the definition in gas/config/tc-arm.c. */
25625 #define STUB_NAME ".real_start_of"
25627 fprintf (f, "\t.code\t16\n");
25628 #ifdef ARM_PE
25629 if (arm_dllexport_name_p (name))
25630 name = arm_strip_name_encoding (name);
25631 #endif
25632 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
25633 fprintf (f, "\t.thumb_func\n");
25634 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
25636 return "";
25639 /* Handle the case of a double word load into a low register from
25640 a computed memory address. The computed address may involve a
25641 register which is overwritten by the load. */
25642 const char *
25643 thumb_load_double_from_address (rtx *operands)
25645 rtx addr;
25646 rtx base;
25647 rtx offset;
25648 rtx arg1;
25649 rtx arg2;
25651 gcc_assert (REG_P (operands[0]));
25652 gcc_assert (MEM_P (operands[1]));
25654 /* Get the memory address. */
25655 addr = XEXP (operands[1], 0);
25657 /* Work out how the memory address is computed. */
25658 switch (GET_CODE (addr))
25660 case REG:
25661 operands[2] = adjust_address (operands[1], SImode, 4);
25663 if (REGNO (operands[0]) == REGNO (addr))
25665 output_asm_insn ("ldr\t%H0, %2", operands);
25666 output_asm_insn ("ldr\t%0, %1", operands);
25668 else
25670 output_asm_insn ("ldr\t%0, %1", operands);
25671 output_asm_insn ("ldr\t%H0, %2", operands);
25673 break;
25675 case CONST:
25676 /* Compute <address> + 4 for the high order load. */
25677 operands[2] = adjust_address (operands[1], SImode, 4);
25679 output_asm_insn ("ldr\t%0, %1", operands);
25680 output_asm_insn ("ldr\t%H0, %2", operands);
25681 break;
25683 case PLUS:
25684 arg1 = XEXP (addr, 0);
25685 arg2 = XEXP (addr, 1);
25687 if (CONSTANT_P (arg1))
25688 base = arg2, offset = arg1;
25689 else
25690 base = arg1, offset = arg2;
25692 gcc_assert (REG_P (base));
25694 /* Catch the case of <address> = <reg> + <reg> */
25695 if (REG_P (offset))
25697 int reg_offset = REGNO (offset);
25698 int reg_base = REGNO (base);
25699 int reg_dest = REGNO (operands[0]);
25701 /* Add the base and offset registers together into the
25702 higher destination register. */
25703 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
25704 reg_dest + 1, reg_base, reg_offset);
25706 /* Load the lower destination register from the address in
25707 the higher destination register. */
25708 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
25709 reg_dest, reg_dest + 1);
25711 /* Load the higher destination register from its own address
25712 plus 4. */
25713 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
25714 reg_dest + 1, reg_dest + 1);
25716 else
25718 /* Compute <address> + 4 for the high order load. */
25719 operands[2] = adjust_address (operands[1], SImode, 4);
25721 /* If the computed address is held in the low order register
25722 then load the high order register first, otherwise always
25723 load the low order register first. */
25724 if (REGNO (operands[0]) == REGNO (base))
25726 output_asm_insn ("ldr\t%H0, %2", operands);
25727 output_asm_insn ("ldr\t%0, %1", operands);
25729 else
25731 output_asm_insn ("ldr\t%0, %1", operands);
25732 output_asm_insn ("ldr\t%H0, %2", operands);
25735 break;
25737 case LABEL_REF:
25738 /* With no registers to worry about we can just load the value
25739 directly. */
25740 operands[2] = adjust_address (operands[1], SImode, 4);
25742 output_asm_insn ("ldr\t%H0, %2", operands);
25743 output_asm_insn ("ldr\t%0, %1", operands);
25744 break;
25746 default:
25747 gcc_unreachable ();
25750 return "";
25753 const char *
25754 thumb_output_move_mem_multiple (int n, rtx *operands)
25756 switch (n)
25758 case 2:
25759 if (REGNO (operands[4]) > REGNO (operands[5]))
25760 std::swap (operands[4], operands[5]);
25762 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
25763 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
25764 break;
25766 case 3:
25767 if (REGNO (operands[4]) > REGNO (operands[5]))
25768 std::swap (operands[4], operands[5]);
25769 if (REGNO (operands[5]) > REGNO (operands[6]))
25770 std::swap (operands[5], operands[6]);
25771 if (REGNO (operands[4]) > REGNO (operands[5]))
25772 std::swap (operands[4], operands[5]);
25774 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
25775 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
25776 break;
25778 default:
25779 gcc_unreachable ();
25782 return "";
25785 /* Output a call-via instruction for thumb state. */
25786 const char *
25787 thumb_call_via_reg (rtx reg)
25789 int regno = REGNO (reg);
25790 rtx *labelp;
25792 gcc_assert (regno < LR_REGNUM);
25794 /* If we are in the normal text section we can use a single instance
25795 per compilation unit. If we are doing function sections, then we need
25796 an entry per section, since we can't rely on reachability. */
25797 if (in_section == text_section)
25799 thumb_call_reg_needed = 1;
25801 if (thumb_call_via_label[regno] == NULL)
25802 thumb_call_via_label[regno] = gen_label_rtx ();
25803 labelp = thumb_call_via_label + regno;
25805 else
25807 if (cfun->machine->call_via[regno] == NULL)
25808 cfun->machine->call_via[regno] = gen_label_rtx ();
25809 labelp = cfun->machine->call_via + regno;
25812 output_asm_insn ("bl\t%a0", labelp);
25813 return "";
25816 /* Routines for generating rtl. */
25817 void
25818 thumb_expand_movmemqi (rtx *operands)
25820 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
25821 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
25822 HOST_WIDE_INT len = INTVAL (operands[2]);
25823 HOST_WIDE_INT offset = 0;
25825 while (len >= 12)
25827 emit_insn (gen_movmem12b (out, in, out, in));
25828 len -= 12;
25831 if (len >= 8)
25833 emit_insn (gen_movmem8b (out, in, out, in));
25834 len -= 8;
25837 if (len >= 4)
25839 rtx reg = gen_reg_rtx (SImode);
25840 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
25841 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
25842 len -= 4;
25843 offset += 4;
25846 if (len >= 2)
25848 rtx reg = gen_reg_rtx (HImode);
25849 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
25850 plus_constant (Pmode, in,
25851 offset))));
25852 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
25853 offset)),
25854 reg));
25855 len -= 2;
25856 offset += 2;
25859 if (len)
25861 rtx reg = gen_reg_rtx (QImode);
25862 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
25863 plus_constant (Pmode, in,
25864 offset))));
25865 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
25866 offset)),
25867 reg));
25871 void
25872 thumb_reload_out_hi (rtx *operands)
25874 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
25877 /* Return the length of a function name prefix
25878 that starts with the character 'c'. */
25879 static int
25880 arm_get_strip_length (int c)
25882 switch (c)
25884 ARM_NAME_ENCODING_LENGTHS
25885 default: return 0;
25889 /* Return a pointer to a function's name with any
25890 and all prefix encodings stripped from it. */
25891 const char *
25892 arm_strip_name_encoding (const char *name)
25894 int skip;
25896 while ((skip = arm_get_strip_length (* name)))
25897 name += skip;
25899 return name;
25902 /* If there is a '*' anywhere in the name's prefix, then
25903 emit the stripped name verbatim, otherwise prepend an
25904 underscore if leading underscores are being used. */
25905 void
25906 arm_asm_output_labelref (FILE *stream, const char *name)
25908 int skip;
25909 int verbatim = 0;
25911 while ((skip = arm_get_strip_length (* name)))
25913 verbatim |= (*name == '*');
25914 name += skip;
25917 if (verbatim)
25918 fputs (name, stream);
25919 else
25920 asm_fprintf (stream, "%U%s", name);
25923 /* This function is used to emit an EABI tag and its associated value.
25924 We emit the numerical value of the tag in case the assembler does not
25925 support textual tags. (Eg gas prior to 2.20). If requested we include
25926 the tag name in a comment so that anyone reading the assembler output
25927 will know which tag is being set.
25929 This function is not static because arm-c.c needs it too. */
25931 void
25932 arm_emit_eabi_attribute (const char *name, int num, int val)
25934 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
25935 if (flag_verbose_asm || flag_debug_asm)
25936 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
25937 asm_fprintf (asm_out_file, "\n");
25940 /* This function is used to print CPU tuning information as comment
25941 in assembler file. Pointers are not printed for now. */
25943 void
25944 arm_print_tune_info (void)
25946 asm_fprintf (asm_out_file, "\t@.tune parameters\n");
25947 asm_fprintf (asm_out_file, "\t\t@constant_limit:\t%d\n",
25948 current_tune->constant_limit);
25949 asm_fprintf (asm_out_file, "\t\t@max_insns_skipped:\t%d\n",
25950 current_tune->max_insns_skipped);
25951 asm_fprintf (asm_out_file, "\t\t@prefetch.num_slots:\t%d\n",
25952 current_tune->prefetch.num_slots);
25953 asm_fprintf (asm_out_file, "\t\t@prefetch.l1_cache_size:\t%d\n",
25954 current_tune->prefetch.l1_cache_size);
25955 asm_fprintf (asm_out_file, "\t\t@prefetch.l1_cache_line_size:\t%d\n",
25956 current_tune->prefetch.l1_cache_line_size);
25957 asm_fprintf (asm_out_file, "\t\t@prefer_constant_pool:\t%d\n",
25958 (int) current_tune->prefer_constant_pool);
25959 asm_fprintf (asm_out_file, "\t\t@branch_cost:\t(s:speed, p:predictable)\n");
25960 asm_fprintf (asm_out_file, "\t\t\t\ts&p\tcost\n");
25961 asm_fprintf (asm_out_file, "\t\t\t\t00\t%d\n",
25962 current_tune->branch_cost (false, false));
25963 asm_fprintf (asm_out_file, "\t\t\t\t01\t%d\n",
25964 current_tune->branch_cost (false, true));
25965 asm_fprintf (asm_out_file, "\t\t\t\t10\t%d\n",
25966 current_tune->branch_cost (true, false));
25967 asm_fprintf (asm_out_file, "\t\t\t\t11\t%d\n",
25968 current_tune->branch_cost (true, true));
25969 asm_fprintf (asm_out_file, "\t\t@prefer_ldrd_strd:\t%d\n",
25970 (int) current_tune->prefer_ldrd_strd);
25971 asm_fprintf (asm_out_file, "\t\t@logical_op_non_short_circuit:\t[%d,%d]\n",
25972 (int) current_tune->logical_op_non_short_circuit_thumb,
25973 (int) current_tune->logical_op_non_short_circuit_arm);
25974 asm_fprintf (asm_out_file, "\t\t@prefer_neon_for_64bits:\t%d\n",
25975 (int) current_tune->prefer_neon_for_64bits);
25976 asm_fprintf (asm_out_file,
25977 "\t\t@disparage_flag_setting_t16_encodings:\t%d\n",
25978 (int) current_tune->disparage_flag_setting_t16_encodings);
25979 asm_fprintf (asm_out_file, "\t\t@string_ops_prefer_neon:\t%d\n",
25980 (int) current_tune->string_ops_prefer_neon);
25981 asm_fprintf (asm_out_file, "\t\t@max_insns_inline_memset:\t%d\n",
25982 current_tune->max_insns_inline_memset);
25983 asm_fprintf (asm_out_file, "\t\t@fusible_ops:\t%u\n",
25984 current_tune->fusible_ops);
25985 asm_fprintf (asm_out_file, "\t\t@sched_autopref:\t%d\n",
25986 (int) current_tune->sched_autopref);
25989 static void
25990 arm_file_start (void)
25992 int val;
25994 if (TARGET_BPABI)
25996 /* We don't have a specified CPU. Use the architecture to
25997 generate the tags.
25999 Note: it might be better to do this unconditionally, then the
26000 assembler would not need to know about all new CPU names as
26001 they are added. */
26002 if (!arm_active_target.core_name)
26004 /* armv7ve doesn't support any extensions. */
26005 if (strcmp (arm_active_target.arch_name, "armv7ve") == 0)
26007 /* Keep backward compatability for assemblers
26008 which don't support armv7ve. */
26009 asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
26010 asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
26011 asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
26012 asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
26013 asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
26015 else
26017 const char* pos = strchr (arm_active_target.arch_name, '+');
26018 if (pos)
26020 char buf[32];
26021 gcc_assert (strlen (arm_active_target.arch_name)
26022 <= sizeof (buf) / sizeof (*pos));
26023 strncpy (buf, arm_active_target.arch_name,
26024 (pos - arm_active_target.arch_name) * sizeof (*pos));
26025 buf[pos - arm_active_target.arch_name] = '\0';
26026 asm_fprintf (asm_out_file, "\t.arch %s\n", buf);
26027 asm_fprintf (asm_out_file, "\t.arch_extension %s\n", pos + 1);
26029 else
26030 asm_fprintf (asm_out_file, "\t.arch %s\n",
26031 arm_active_target.arch_name);
26034 else if (strncmp (arm_active_target.core_name, "generic", 7) == 0)
26035 asm_fprintf (asm_out_file, "\t.arch %s\n",
26036 arm_active_target.core_name + 8);
26037 else
26039 const char* truncated_name
26040 = arm_rewrite_selected_cpu (arm_active_target.core_name);
26041 asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
26044 if (print_tune_info)
26045 arm_print_tune_info ();
26047 if (! TARGET_SOFT_FLOAT)
26049 if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
26050 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
26052 if (TARGET_HARD_FLOAT_ABI)
26053 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
26056 /* Some of these attributes only apply when the corresponding features
26057 are used. However we don't have any easy way of figuring this out.
26058 Conservatively record the setting that would have been used. */
26060 if (flag_rounding_math)
26061 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
26063 if (!flag_unsafe_math_optimizations)
26065 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
26066 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
26068 if (flag_signaling_nans)
26069 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
26071 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
26072 flag_finite_math_only ? 1 : 3);
26074 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
26075 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
26076 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
26077 flag_short_enums ? 1 : 2);
26079 /* Tag_ABI_optimization_goals. */
26080 if (optimize_size)
26081 val = 4;
26082 else if (optimize >= 2)
26083 val = 2;
26084 else if (optimize)
26085 val = 1;
26086 else
26087 val = 6;
26088 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
26090 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
26091 unaligned_access);
26093 if (arm_fp16_format)
26094 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
26095 (int) arm_fp16_format);
26097 if (arm_lang_output_object_attributes_hook)
26098 arm_lang_output_object_attributes_hook();
26101 default_file_start ();
26104 static void
26105 arm_file_end (void)
26107 int regno;
26109 if (NEED_INDICATE_EXEC_STACK)
26110 /* Add .note.GNU-stack. */
26111 file_end_indicate_exec_stack ();
26113 if (! thumb_call_reg_needed)
26114 return;
26116 switch_to_section (text_section);
26117 asm_fprintf (asm_out_file, "\t.code 16\n");
26118 ASM_OUTPUT_ALIGN (asm_out_file, 1);
26120 for (regno = 0; regno < LR_REGNUM; regno++)
26122 rtx label = thumb_call_via_label[regno];
26124 if (label != 0)
26126 targetm.asm_out.internal_label (asm_out_file, "L",
26127 CODE_LABEL_NUMBER (label));
26128 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
26133 #ifndef ARM_PE
26134 /* Symbols in the text segment can be accessed without indirecting via the
26135 constant pool; it may take an extra binary operation, but this is still
26136 faster than indirecting via memory. Don't do this when not optimizing,
26137 since we won't be calculating al of the offsets necessary to do this
26138 simplification. */
26140 static void
26141 arm_encode_section_info (tree decl, rtx rtl, int first)
26143 if (optimize > 0 && TREE_CONSTANT (decl))
26144 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
26146 default_encode_section_info (decl, rtl, first);
26148 #endif /* !ARM_PE */
26150 static void
26151 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
26153 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
26154 && !strcmp (prefix, "L"))
26156 arm_ccfsm_state = 0;
26157 arm_target_insn = NULL;
26159 default_internal_label (stream, prefix, labelno);
26162 /* Output code to add DELTA to the first argument, and then jump
26163 to FUNCTION. Used for C++ multiple inheritance. */
26165 static void
26166 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26167 HOST_WIDE_INT, tree function)
26169 static int thunk_label = 0;
26170 char label[256];
26171 char labelpc[256];
26172 int mi_delta = delta;
26173 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
26174 int shift = 0;
26175 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
26176 ? 1 : 0);
26177 if (mi_delta < 0)
26178 mi_delta = - mi_delta;
26180 final_start_function (emit_barrier (), file, 1);
26182 if (TARGET_THUMB1)
26184 int labelno = thunk_label++;
26185 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
26186 /* Thunks are entered in arm mode when avaiable. */
26187 if (TARGET_THUMB1_ONLY)
26189 /* push r3 so we can use it as a temporary. */
26190 /* TODO: Omit this save if r3 is not used. */
26191 fputs ("\tpush {r3}\n", file);
26192 fputs ("\tldr\tr3, ", file);
26194 else
26196 fputs ("\tldr\tr12, ", file);
26198 assemble_name (file, label);
26199 fputc ('\n', file);
26200 if (flag_pic)
26202 /* If we are generating PIC, the ldr instruction below loads
26203 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
26204 the address of the add + 8, so we have:
26206 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
26207 = target + 1.
26209 Note that we have "+ 1" because some versions of GNU ld
26210 don't set the low bit of the result for R_ARM_REL32
26211 relocations against thumb function symbols.
26212 On ARMv6M this is +4, not +8. */
26213 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
26214 assemble_name (file, labelpc);
26215 fputs (":\n", file);
26216 if (TARGET_THUMB1_ONLY)
26218 /* This is 2 insns after the start of the thunk, so we know it
26219 is 4-byte aligned. */
26220 fputs ("\tadd\tr3, pc, r3\n", file);
26221 fputs ("\tmov r12, r3\n", file);
26223 else
26224 fputs ("\tadd\tr12, pc, r12\n", file);
26226 else if (TARGET_THUMB1_ONLY)
26227 fputs ("\tmov r12, r3\n", file);
26229 if (TARGET_THUMB1_ONLY)
26231 if (mi_delta > 255)
26233 fputs ("\tldr\tr3, ", file);
26234 assemble_name (file, label);
26235 fputs ("+4\n", file);
26236 asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
26237 mi_op, this_regno, this_regno);
26239 else if (mi_delta != 0)
26241 /* Thumb1 unified syntax requires s suffix in instruction name when
26242 one of the operands is immediate. */
26243 asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
26244 mi_op, this_regno, this_regno,
26245 mi_delta);
26248 else
26250 /* TODO: Use movw/movt for large constants when available. */
26251 while (mi_delta != 0)
26253 if ((mi_delta & (3 << shift)) == 0)
26254 shift += 2;
26255 else
26257 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
26258 mi_op, this_regno, this_regno,
26259 mi_delta & (0xff << shift));
26260 mi_delta &= ~(0xff << shift);
26261 shift += 8;
26265 if (TARGET_THUMB1)
26267 if (TARGET_THUMB1_ONLY)
26268 fputs ("\tpop\t{r3}\n", file);
26270 fprintf (file, "\tbx\tr12\n");
26271 ASM_OUTPUT_ALIGN (file, 2);
26272 assemble_name (file, label);
26273 fputs (":\n", file);
26274 if (flag_pic)
26276 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
26277 rtx tem = XEXP (DECL_RTL (function), 0);
26278 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
26279 pipeline offset is four rather than eight. Adjust the offset
26280 accordingly. */
26281 tem = plus_constant (GET_MODE (tem), tem,
26282 TARGET_THUMB1_ONLY ? -3 : -7);
26283 tem = gen_rtx_MINUS (GET_MODE (tem),
26284 tem,
26285 gen_rtx_SYMBOL_REF (Pmode,
26286 ggc_strdup (labelpc)));
26287 assemble_integer (tem, 4, BITS_PER_WORD, 1);
26289 else
26290 /* Output ".word .LTHUNKn". */
26291 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
26293 if (TARGET_THUMB1_ONLY && mi_delta > 255)
26294 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
26296 else
26298 fputs ("\tb\t", file);
26299 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
26300 if (NEED_PLT_RELOC)
26301 fputs ("(PLT)", file);
26302 fputc ('\n', file);
26305 final_end_function ();
26308 /* MI thunk handling for TARGET_32BIT. */
26310 static void
26311 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26312 HOST_WIDE_INT vcall_offset, tree function)
26314 /* On ARM, this_regno is R0 or R1 depending on
26315 whether the function returns an aggregate or not.
26317 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
26318 function)
26319 ? R1_REGNUM : R0_REGNUM);
26321 rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
26322 rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
26323 reload_completed = 1;
26324 emit_note (NOTE_INSN_PROLOGUE_END);
26326 /* Add DELTA to THIS_RTX. */
26327 if (delta != 0)
26328 arm_split_constant (PLUS, Pmode, NULL_RTX,
26329 delta, this_rtx, this_rtx, false);
26331 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
26332 if (vcall_offset != 0)
26334 /* Load *THIS_RTX. */
26335 emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
26336 /* Compute *THIS_RTX + VCALL_OFFSET. */
26337 arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
26338 false);
26339 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
26340 emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
26341 emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
26344 /* Generate a tail call to the target function. */
26345 if (!TREE_USED (function))
26347 assemble_external (function);
26348 TREE_USED (function) = 1;
26350 rtx funexp = XEXP (DECL_RTL (function), 0);
26351 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
26352 rtx_insn * insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
26353 SIBLING_CALL_P (insn) = 1;
26355 insn = get_insns ();
26356 shorten_branches (insn);
26357 final_start_function (insn, file, 1);
26358 final (insn, file, 1);
26359 final_end_function ();
26361 /* Stop pretending this is a post-reload pass. */
26362 reload_completed = 0;
26365 /* Output code to add DELTA to the first argument, and then jump
26366 to FUNCTION. Used for C++ multiple inheritance. */
26368 static void
26369 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
26370 HOST_WIDE_INT vcall_offset, tree function)
26372 if (TARGET_32BIT)
26373 arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
26374 else
26375 arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
26379 arm_emit_vector_const (FILE *file, rtx x)
26381 int i;
26382 const char * pattern;
26384 gcc_assert (GET_CODE (x) == CONST_VECTOR);
26386 switch (GET_MODE (x))
26388 case V2SImode: pattern = "%08x"; break;
26389 case V4HImode: pattern = "%04x"; break;
26390 case V8QImode: pattern = "%02x"; break;
26391 default: gcc_unreachable ();
26394 fprintf (file, "0x");
26395 for (i = CONST_VECTOR_NUNITS (x); i--;)
26397 rtx element;
26399 element = CONST_VECTOR_ELT (x, i);
26400 fprintf (file, pattern, INTVAL (element));
26403 return 1;
26406 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
26407 HFmode constant pool entries are actually loaded with ldr. */
26408 void
26409 arm_emit_fp16_const (rtx c)
26411 long bits;
26413 bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
26414 if (WORDS_BIG_ENDIAN)
26415 assemble_zeros (2);
26416 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
26417 if (!WORDS_BIG_ENDIAN)
26418 assemble_zeros (2);
26421 const char *
26422 arm_output_load_gr (rtx *operands)
26424 rtx reg;
26425 rtx offset;
26426 rtx wcgr;
26427 rtx sum;
26429 if (!MEM_P (operands [1])
26430 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
26431 || !REG_P (reg = XEXP (sum, 0))
26432 || !CONST_INT_P (offset = XEXP (sum, 1))
26433 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
26434 return "wldrw%?\t%0, %1";
26436 /* Fix up an out-of-range load of a GR register. */
26437 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
26438 wcgr = operands[0];
26439 operands[0] = reg;
26440 output_asm_insn ("ldr%?\t%0, %1", operands);
26442 operands[0] = wcgr;
26443 operands[1] = reg;
26444 output_asm_insn ("tmcr%?\t%0, %1", operands);
26445 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
26447 return "";
26450 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
26452 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
26453 named arg and all anonymous args onto the stack.
26454 XXX I know the prologue shouldn't be pushing registers, but it is faster
26455 that way. */
26457 static void
26458 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
26459 machine_mode mode,
26460 tree type,
26461 int *pretend_size,
26462 int second_time ATTRIBUTE_UNUSED)
26464 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
26465 int nregs;
26467 cfun->machine->uses_anonymous_args = 1;
26468 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
26470 nregs = pcum->aapcs_ncrn;
26471 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
26472 nregs++;
26474 else
26475 nregs = pcum->nregs;
26477 if (nregs < NUM_ARG_REGS)
26478 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
26481 /* We can't rely on the caller doing the proper promotion when
26482 using APCS or ATPCS. */
26484 static bool
26485 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
26487 return !TARGET_AAPCS_BASED;
26490 static machine_mode
26491 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
26492 machine_mode mode,
26493 int *punsignedp ATTRIBUTE_UNUSED,
26494 const_tree fntype ATTRIBUTE_UNUSED,
26495 int for_return ATTRIBUTE_UNUSED)
26497 if (GET_MODE_CLASS (mode) == MODE_INT
26498 && GET_MODE_SIZE (mode) < 4)
26499 return SImode;
26501 return mode;
26504 /* AAPCS based ABIs use short enums by default. */
26506 static bool
26507 arm_default_short_enums (void)
26509 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
26513 /* AAPCS requires that anonymous bitfields affect structure alignment. */
26515 static bool
26516 arm_align_anon_bitfield (void)
26518 return TARGET_AAPCS_BASED;
26522 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
26524 static tree
26525 arm_cxx_guard_type (void)
26527 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
26531 /* The EABI says test the least significant bit of a guard variable. */
26533 static bool
26534 arm_cxx_guard_mask_bit (void)
26536 return TARGET_AAPCS_BASED;
26540 /* The EABI specifies that all array cookies are 8 bytes long. */
26542 static tree
26543 arm_get_cookie_size (tree type)
26545 tree size;
26547 if (!TARGET_AAPCS_BASED)
26548 return default_cxx_get_cookie_size (type);
26550 size = build_int_cst (sizetype, 8);
26551 return size;
26555 /* The EABI says that array cookies should also contain the element size. */
26557 static bool
26558 arm_cookie_has_size (void)
26560 return TARGET_AAPCS_BASED;
26564 /* The EABI says constructors and destructors should return a pointer to
26565 the object constructed/destroyed. */
26567 static bool
26568 arm_cxx_cdtor_returns_this (void)
26570 return TARGET_AAPCS_BASED;
26573 /* The EABI says that an inline function may never be the key
26574 method. */
26576 static bool
26577 arm_cxx_key_method_may_be_inline (void)
26579 return !TARGET_AAPCS_BASED;
26582 static void
26583 arm_cxx_determine_class_data_visibility (tree decl)
26585 if (!TARGET_AAPCS_BASED
26586 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
26587 return;
26589 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
26590 is exported. However, on systems without dynamic vague linkage,
26591 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
26592 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
26593 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
26594 else
26595 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
26596 DECL_VISIBILITY_SPECIFIED (decl) = 1;
26599 static bool
26600 arm_cxx_class_data_always_comdat (void)
26602 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26603 vague linkage if the class has no key function. */
26604 return !TARGET_AAPCS_BASED;
26608 /* The EABI says __aeabi_atexit should be used to register static
26609 destructors. */
26611 static bool
26612 arm_cxx_use_aeabi_atexit (void)
26614 return TARGET_AAPCS_BASED;
26618 void
26619 arm_set_return_address (rtx source, rtx scratch)
26621 arm_stack_offsets *offsets;
26622 HOST_WIDE_INT delta;
26623 rtx addr;
26624 unsigned long saved_regs;
26626 offsets = arm_get_frame_offsets ();
26627 saved_regs = offsets->saved_regs_mask;
26629 if ((saved_regs & (1 << LR_REGNUM)) == 0)
26630 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26631 else
26633 if (frame_pointer_needed)
26634 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
26635 else
26637 /* LR will be the first saved register. */
26638 delta = offsets->outgoing_args - (offsets->frame + 4);
26641 if (delta >= 4096)
26643 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
26644 GEN_INT (delta & ~4095)));
26645 addr = scratch;
26646 delta &= 4095;
26648 else
26649 addr = stack_pointer_rtx;
26651 addr = plus_constant (Pmode, addr, delta);
26653 /* The store needs to be marked as frame related in order to prevent
26654 DSE from deleting it as dead if it is based on fp. */
26655 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26656 RTX_FRAME_RELATED_P (insn) = 1;
26657 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26662 void
26663 thumb_set_return_address (rtx source, rtx scratch)
26665 arm_stack_offsets *offsets;
26666 HOST_WIDE_INT delta;
26667 HOST_WIDE_INT limit;
26668 int reg;
26669 rtx addr;
26670 unsigned long mask;
26672 emit_use (source);
26674 offsets = arm_get_frame_offsets ();
26675 mask = offsets->saved_regs_mask;
26676 if (mask & (1 << LR_REGNUM))
26678 limit = 1024;
26679 /* Find the saved regs. */
26680 if (frame_pointer_needed)
26682 delta = offsets->soft_frame - offsets->saved_args;
26683 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
26684 if (TARGET_THUMB1)
26685 limit = 128;
26687 else
26689 delta = offsets->outgoing_args - offsets->saved_args;
26690 reg = SP_REGNUM;
26692 /* Allow for the stack frame. */
26693 if (TARGET_THUMB1 && TARGET_BACKTRACE)
26694 delta -= 16;
26695 /* The link register is always the first saved register. */
26696 delta -= 4;
26698 /* Construct the address. */
26699 addr = gen_rtx_REG (SImode, reg);
26700 if (delta > limit)
26702 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
26703 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
26704 addr = scratch;
26706 else
26707 addr = plus_constant (Pmode, addr, delta);
26709 /* The store needs to be marked as frame related in order to prevent
26710 DSE from deleting it as dead if it is based on fp. */
26711 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26712 RTX_FRAME_RELATED_P (insn) = 1;
26713 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26715 else
26716 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26719 /* Implements target hook vector_mode_supported_p. */
26720 bool
26721 arm_vector_mode_supported_p (machine_mode mode)
26723 /* Neon also supports V2SImode, etc. listed in the clause below. */
26724 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
26725 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
26726 || mode == V2DImode || mode == V8HFmode))
26727 return true;
26729 if ((TARGET_NEON || TARGET_IWMMXT)
26730 && ((mode == V2SImode)
26731 || (mode == V4HImode)
26732 || (mode == V8QImode)))
26733 return true;
26735 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
26736 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
26737 || mode == V2HAmode))
26738 return true;
26740 return false;
26743 /* Implements target hook array_mode_supported_p. */
26745 static bool
26746 arm_array_mode_supported_p (machine_mode mode,
26747 unsigned HOST_WIDE_INT nelems)
26749 if (TARGET_NEON
26750 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
26751 && (nelems >= 2 && nelems <= 4))
26752 return true;
26754 return false;
26757 /* Use the option -mvectorize-with-neon-double to override the use of quardword
26758 registers when autovectorizing for Neon, at least until multiple vector
26759 widths are supported properly by the middle-end. */
26761 static machine_mode
26762 arm_preferred_simd_mode (machine_mode mode)
26764 if (TARGET_NEON)
26765 switch (mode)
26767 case SFmode:
26768 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
26769 case SImode:
26770 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
26771 case HImode:
26772 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
26773 case QImode:
26774 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
26775 case DImode:
26776 if (!TARGET_NEON_VECTORIZE_DOUBLE)
26777 return V2DImode;
26778 break;
26780 default:;
26783 if (TARGET_REALLY_IWMMXT)
26784 switch (mode)
26786 case SImode:
26787 return V2SImode;
26788 case HImode:
26789 return V4HImode;
26790 case QImode:
26791 return V8QImode;
26793 default:;
26796 return word_mode;
26799 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
26801 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
26802 using r0-r4 for function arguments, r7 for the stack frame and don't have
26803 enough left over to do doubleword arithmetic. For Thumb-2 all the
26804 potentially problematic instructions accept high registers so this is not
26805 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
26806 that require many low registers. */
26807 static bool
26808 arm_class_likely_spilled_p (reg_class_t rclass)
26810 if ((TARGET_THUMB1 && rclass == LO_REGS)
26811 || rclass == CC_REG)
26812 return true;
26814 return false;
26817 /* Implements target hook small_register_classes_for_mode_p. */
26818 bool
26819 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
26821 return TARGET_THUMB1;
26824 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
26825 ARM insns and therefore guarantee that the shift count is modulo 256.
26826 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
26827 guarantee no particular behavior for out-of-range counts. */
26829 static unsigned HOST_WIDE_INT
26830 arm_shift_truncation_mask (machine_mode mode)
26832 return mode == SImode ? 255 : 0;
26836 /* Map internal gcc register numbers to DWARF2 register numbers. */
26838 unsigned int
26839 arm_dbx_register_number (unsigned int regno)
26841 if (regno < 16)
26842 return regno;
26844 if (IS_VFP_REGNUM (regno))
26846 /* See comment in arm_dwarf_register_span. */
26847 if (VFP_REGNO_OK_FOR_SINGLE (regno))
26848 return 64 + regno - FIRST_VFP_REGNUM;
26849 else
26850 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
26853 if (IS_IWMMXT_GR_REGNUM (regno))
26854 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
26856 if (IS_IWMMXT_REGNUM (regno))
26857 return 112 + regno - FIRST_IWMMXT_REGNUM;
26859 return DWARF_FRAME_REGISTERS;
26862 /* Dwarf models VFPv3 registers as 32 64-bit registers.
26863 GCC models tham as 64 32-bit registers, so we need to describe this to
26864 the DWARF generation code. Other registers can use the default. */
26865 static rtx
26866 arm_dwarf_register_span (rtx rtl)
26868 machine_mode mode;
26869 unsigned regno;
26870 rtx parts[16];
26871 int nregs;
26872 int i;
26874 regno = REGNO (rtl);
26875 if (!IS_VFP_REGNUM (regno))
26876 return NULL_RTX;
26878 /* XXX FIXME: The EABI defines two VFP register ranges:
26879 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
26880 256-287: D0-D31
26881 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
26882 corresponding D register. Until GDB supports this, we shall use the
26883 legacy encodings. We also use these encodings for D0-D15 for
26884 compatibility with older debuggers. */
26885 mode = GET_MODE (rtl);
26886 if (GET_MODE_SIZE (mode) < 8)
26887 return NULL_RTX;
26889 if (VFP_REGNO_OK_FOR_SINGLE (regno))
26891 nregs = GET_MODE_SIZE (mode) / 4;
26892 for (i = 0; i < nregs; i += 2)
26893 if (TARGET_BIG_END)
26895 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
26896 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
26898 else
26900 parts[i] = gen_rtx_REG (SImode, regno + i);
26901 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
26904 else
26906 nregs = GET_MODE_SIZE (mode) / 8;
26907 for (i = 0; i < nregs; i++)
26908 parts[i] = gen_rtx_REG (DImode, regno + i);
26911 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
26914 #if ARM_UNWIND_INFO
26915 /* Emit unwind directives for a store-multiple instruction or stack pointer
26916 push during alignment.
26917 These should only ever be generated by the function prologue code, so
26918 expect them to have a particular form.
26919 The store-multiple instruction sometimes pushes pc as the last register,
26920 although it should not be tracked into unwind information, or for -Os
26921 sometimes pushes some dummy registers before first register that needs
26922 to be tracked in unwind information; such dummy registers are there just
26923 to avoid separate stack adjustment, and will not be restored in the
26924 epilogue. */
26926 static void
26927 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
26929 int i;
26930 HOST_WIDE_INT offset;
26931 HOST_WIDE_INT nregs;
26932 int reg_size;
26933 unsigned reg;
26934 unsigned lastreg;
26935 unsigned padfirst = 0, padlast = 0;
26936 rtx e;
26938 e = XVECEXP (p, 0, 0);
26939 gcc_assert (GET_CODE (e) == SET);
26941 /* First insn will adjust the stack pointer. */
26942 gcc_assert (GET_CODE (e) == SET
26943 && REG_P (SET_DEST (e))
26944 && REGNO (SET_DEST (e)) == SP_REGNUM
26945 && GET_CODE (SET_SRC (e)) == PLUS);
26947 offset = -INTVAL (XEXP (SET_SRC (e), 1));
26948 nregs = XVECLEN (p, 0) - 1;
26949 gcc_assert (nregs);
26951 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
26952 if (reg < 16)
26954 /* For -Os dummy registers can be pushed at the beginning to
26955 avoid separate stack pointer adjustment. */
26956 e = XVECEXP (p, 0, 1);
26957 e = XEXP (SET_DEST (e), 0);
26958 if (GET_CODE (e) == PLUS)
26959 padfirst = INTVAL (XEXP (e, 1));
26960 gcc_assert (padfirst == 0 || optimize_size);
26961 /* The function prologue may also push pc, but not annotate it as it is
26962 never restored. We turn this into a stack pointer adjustment. */
26963 e = XVECEXP (p, 0, nregs);
26964 e = XEXP (SET_DEST (e), 0);
26965 if (GET_CODE (e) == PLUS)
26966 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
26967 else
26968 padlast = offset - 4;
26969 gcc_assert (padlast == 0 || padlast == 4);
26970 if (padlast == 4)
26971 fprintf (asm_out_file, "\t.pad #4\n");
26972 reg_size = 4;
26973 fprintf (asm_out_file, "\t.save {");
26975 else if (IS_VFP_REGNUM (reg))
26977 reg_size = 8;
26978 fprintf (asm_out_file, "\t.vsave {");
26980 else
26981 /* Unknown register type. */
26982 gcc_unreachable ();
26984 /* If the stack increment doesn't match the size of the saved registers,
26985 something has gone horribly wrong. */
26986 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
26988 offset = padfirst;
26989 lastreg = 0;
26990 /* The remaining insns will describe the stores. */
26991 for (i = 1; i <= nregs; i++)
26993 /* Expect (set (mem <addr>) (reg)).
26994 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
26995 e = XVECEXP (p, 0, i);
26996 gcc_assert (GET_CODE (e) == SET
26997 && MEM_P (SET_DEST (e))
26998 && REG_P (SET_SRC (e)));
27000 reg = REGNO (SET_SRC (e));
27001 gcc_assert (reg >= lastreg);
27003 if (i != 1)
27004 fprintf (asm_out_file, ", ");
27005 /* We can't use %r for vfp because we need to use the
27006 double precision register names. */
27007 if (IS_VFP_REGNUM (reg))
27008 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
27009 else
27010 asm_fprintf (asm_out_file, "%r", reg);
27012 if (flag_checking)
27014 /* Check that the addresses are consecutive. */
27015 e = XEXP (SET_DEST (e), 0);
27016 if (GET_CODE (e) == PLUS)
27017 gcc_assert (REG_P (XEXP (e, 0))
27018 && REGNO (XEXP (e, 0)) == SP_REGNUM
27019 && CONST_INT_P (XEXP (e, 1))
27020 && offset == INTVAL (XEXP (e, 1)));
27021 else
27022 gcc_assert (i == 1
27023 && REG_P (e)
27024 && REGNO (e) == SP_REGNUM);
27025 offset += reg_size;
27028 fprintf (asm_out_file, "}\n");
27029 if (padfirst)
27030 fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
27033 /* Emit unwind directives for a SET. */
27035 static void
27036 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
27038 rtx e0;
27039 rtx e1;
27040 unsigned reg;
27042 e0 = XEXP (p, 0);
27043 e1 = XEXP (p, 1);
27044 switch (GET_CODE (e0))
27046 case MEM:
27047 /* Pushing a single register. */
27048 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
27049 || !REG_P (XEXP (XEXP (e0, 0), 0))
27050 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
27051 abort ();
27053 asm_fprintf (asm_out_file, "\t.save ");
27054 if (IS_VFP_REGNUM (REGNO (e1)))
27055 asm_fprintf(asm_out_file, "{d%d}\n",
27056 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
27057 else
27058 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
27059 break;
27061 case REG:
27062 if (REGNO (e0) == SP_REGNUM)
27064 /* A stack increment. */
27065 if (GET_CODE (e1) != PLUS
27066 || !REG_P (XEXP (e1, 0))
27067 || REGNO (XEXP (e1, 0)) != SP_REGNUM
27068 || !CONST_INT_P (XEXP (e1, 1)))
27069 abort ();
27071 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
27072 -INTVAL (XEXP (e1, 1)));
27074 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
27076 HOST_WIDE_INT offset;
27078 if (GET_CODE (e1) == PLUS)
27080 if (!REG_P (XEXP (e1, 0))
27081 || !CONST_INT_P (XEXP (e1, 1)))
27082 abort ();
27083 reg = REGNO (XEXP (e1, 0));
27084 offset = INTVAL (XEXP (e1, 1));
27085 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
27086 HARD_FRAME_POINTER_REGNUM, reg,
27087 offset);
27089 else if (REG_P (e1))
27091 reg = REGNO (e1);
27092 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
27093 HARD_FRAME_POINTER_REGNUM, reg);
27095 else
27096 abort ();
27098 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
27100 /* Move from sp to reg. */
27101 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
27103 else if (GET_CODE (e1) == PLUS
27104 && REG_P (XEXP (e1, 0))
27105 && REGNO (XEXP (e1, 0)) == SP_REGNUM
27106 && CONST_INT_P (XEXP (e1, 1)))
27108 /* Set reg to offset from sp. */
27109 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
27110 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
27112 else
27113 abort ();
27114 break;
27116 default:
27117 abort ();
27122 /* Emit unwind directives for the given insn. */
27124 static void
27125 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
27127 rtx note, pat;
27128 bool handled_one = false;
27130 if (arm_except_unwind_info (&global_options) != UI_TARGET)
27131 return;
27133 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27134 && (TREE_NOTHROW (current_function_decl)
27135 || crtl->all_throwers_are_sibcalls))
27136 return;
27138 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
27139 return;
27141 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
27143 switch (REG_NOTE_KIND (note))
27145 case REG_FRAME_RELATED_EXPR:
27146 pat = XEXP (note, 0);
27147 goto found;
27149 case REG_CFA_REGISTER:
27150 pat = XEXP (note, 0);
27151 if (pat == NULL)
27153 pat = PATTERN (insn);
27154 if (GET_CODE (pat) == PARALLEL)
27155 pat = XVECEXP (pat, 0, 0);
27158 /* Only emitted for IS_STACKALIGN re-alignment. */
27160 rtx dest, src;
27161 unsigned reg;
27163 src = SET_SRC (pat);
27164 dest = SET_DEST (pat);
27166 gcc_assert (src == stack_pointer_rtx);
27167 reg = REGNO (dest);
27168 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
27169 reg + 0x90, reg);
27171 handled_one = true;
27172 break;
27174 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
27175 to get correct dwarf information for shrink-wrap. We should not
27176 emit unwind information for it because these are used either for
27177 pretend arguments or notes to adjust sp and restore registers from
27178 stack. */
27179 case REG_CFA_DEF_CFA:
27180 case REG_CFA_ADJUST_CFA:
27181 case REG_CFA_RESTORE:
27182 return;
27184 case REG_CFA_EXPRESSION:
27185 case REG_CFA_OFFSET:
27186 /* ??? Only handling here what we actually emit. */
27187 gcc_unreachable ();
27189 default:
27190 break;
27193 if (handled_one)
27194 return;
27195 pat = PATTERN (insn);
27196 found:
27198 switch (GET_CODE (pat))
27200 case SET:
27201 arm_unwind_emit_set (asm_out_file, pat);
27202 break;
27204 case SEQUENCE:
27205 /* Store multiple. */
27206 arm_unwind_emit_sequence (asm_out_file, pat);
27207 break;
27209 default:
27210 abort();
27215 /* Output a reference from a function exception table to the type_info
27216 object X. The EABI specifies that the symbol should be relocated by
27217 an R_ARM_TARGET2 relocation. */
27219 static bool
27220 arm_output_ttype (rtx x)
27222 fputs ("\t.word\t", asm_out_file);
27223 output_addr_const (asm_out_file, x);
27224 /* Use special relocations for symbol references. */
27225 if (!CONST_INT_P (x))
27226 fputs ("(TARGET2)", asm_out_file);
27227 fputc ('\n', asm_out_file);
27229 return TRUE;
27232 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
27234 static void
27235 arm_asm_emit_except_personality (rtx personality)
27237 fputs ("\t.personality\t", asm_out_file);
27238 output_addr_const (asm_out_file, personality);
27239 fputc ('\n', asm_out_file);
27241 #endif /* ARM_UNWIND_INFO */
27243 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
27245 static void
27246 arm_asm_init_sections (void)
27248 #if ARM_UNWIND_INFO
27249 exception_section = get_unnamed_section (0, output_section_asm_op,
27250 "\t.handlerdata");
27251 #endif /* ARM_UNWIND_INFO */
27253 #ifdef OBJECT_FORMAT_ELF
27254 if (target_pure_code)
27255 text_section->unnamed.data = "\t.section .text,\"0x20000006\",%progbits";
27256 #endif
27259 /* Output unwind directives for the start/end of a function. */
27261 void
27262 arm_output_fn_unwind (FILE * f, bool prologue)
27264 if (arm_except_unwind_info (&global_options) != UI_TARGET)
27265 return;
27267 if (prologue)
27268 fputs ("\t.fnstart\n", f);
27269 else
27271 /* If this function will never be unwound, then mark it as such.
27272 The came condition is used in arm_unwind_emit to suppress
27273 the frame annotations. */
27274 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27275 && (TREE_NOTHROW (current_function_decl)
27276 || crtl->all_throwers_are_sibcalls))
27277 fputs("\t.cantunwind\n", f);
27279 fputs ("\t.fnend\n", f);
27283 static bool
27284 arm_emit_tls_decoration (FILE *fp, rtx x)
27286 enum tls_reloc reloc;
27287 rtx val;
27289 val = XVECEXP (x, 0, 0);
27290 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
27292 output_addr_const (fp, val);
27294 switch (reloc)
27296 case TLS_GD32:
27297 fputs ("(tlsgd)", fp);
27298 break;
27299 case TLS_LDM32:
27300 fputs ("(tlsldm)", fp);
27301 break;
27302 case TLS_LDO32:
27303 fputs ("(tlsldo)", fp);
27304 break;
27305 case TLS_IE32:
27306 fputs ("(gottpoff)", fp);
27307 break;
27308 case TLS_LE32:
27309 fputs ("(tpoff)", fp);
27310 break;
27311 case TLS_DESCSEQ:
27312 fputs ("(tlsdesc)", fp);
27313 break;
27314 default:
27315 gcc_unreachable ();
27318 switch (reloc)
27320 case TLS_GD32:
27321 case TLS_LDM32:
27322 case TLS_IE32:
27323 case TLS_DESCSEQ:
27324 fputs (" + (. - ", fp);
27325 output_addr_const (fp, XVECEXP (x, 0, 2));
27326 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
27327 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
27328 output_addr_const (fp, XVECEXP (x, 0, 3));
27329 fputc (')', fp);
27330 break;
27331 default:
27332 break;
27335 return TRUE;
27338 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
27340 static void
27341 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
27343 gcc_assert (size == 4);
27344 fputs ("\t.word\t", file);
27345 output_addr_const (file, x);
27346 fputs ("(tlsldo)", file);
27349 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
27351 static bool
27352 arm_output_addr_const_extra (FILE *fp, rtx x)
27354 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
27355 return arm_emit_tls_decoration (fp, x);
27356 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
27358 char label[256];
27359 int labelno = INTVAL (XVECEXP (x, 0, 0));
27361 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
27362 assemble_name_raw (fp, label);
27364 return TRUE;
27366 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
27368 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
27369 if (GOT_PCREL)
27370 fputs ("+.", fp);
27371 fputs ("-(", fp);
27372 output_addr_const (fp, XVECEXP (x, 0, 0));
27373 fputc (')', fp);
27374 return TRUE;
27376 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
27378 output_addr_const (fp, XVECEXP (x, 0, 0));
27379 if (GOT_PCREL)
27380 fputs ("+.", fp);
27381 fputs ("-(", fp);
27382 output_addr_const (fp, XVECEXP (x, 0, 1));
27383 fputc (')', fp);
27384 return TRUE;
27386 else if (GET_CODE (x) == CONST_VECTOR)
27387 return arm_emit_vector_const (fp, x);
27389 return FALSE;
27392 /* Output assembly for a shift instruction.
27393 SET_FLAGS determines how the instruction modifies the condition codes.
27394 0 - Do not set condition codes.
27395 1 - Set condition codes.
27396 2 - Use smallest instruction. */
27397 const char *
27398 arm_output_shift(rtx * operands, int set_flags)
27400 char pattern[100];
27401 static const char flag_chars[3] = {'?', '.', '!'};
27402 const char *shift;
27403 HOST_WIDE_INT val;
27404 char c;
27406 c = flag_chars[set_flags];
27407 shift = shift_op(operands[3], &val);
27408 if (shift)
27410 if (val != -1)
27411 operands[2] = GEN_INT(val);
27412 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
27414 else
27415 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
27417 output_asm_insn (pattern, operands);
27418 return "";
27421 /* Output assembly for a WMMX immediate shift instruction. */
27422 const char *
27423 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
27425 int shift = INTVAL (operands[2]);
27426 char templ[50];
27427 machine_mode opmode = GET_MODE (operands[0]);
27429 gcc_assert (shift >= 0);
27431 /* If the shift value in the register versions is > 63 (for D qualifier),
27432 31 (for W qualifier) or 15 (for H qualifier). */
27433 if (((opmode == V4HImode) && (shift > 15))
27434 || ((opmode == V2SImode) && (shift > 31))
27435 || ((opmode == DImode) && (shift > 63)))
27437 if (wror_or_wsra)
27439 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27440 output_asm_insn (templ, operands);
27441 if (opmode == DImode)
27443 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
27444 output_asm_insn (templ, operands);
27447 else
27449 /* The destination register will contain all zeros. */
27450 sprintf (templ, "wzero\t%%0");
27451 output_asm_insn (templ, operands);
27453 return "";
27456 if ((opmode == DImode) && (shift > 32))
27458 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27459 output_asm_insn (templ, operands);
27460 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
27461 output_asm_insn (templ, operands);
27463 else
27465 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
27466 output_asm_insn (templ, operands);
27468 return "";
27471 /* Output assembly for a WMMX tinsr instruction. */
27472 const char *
27473 arm_output_iwmmxt_tinsr (rtx *operands)
27475 int mask = INTVAL (operands[3]);
27476 int i;
27477 char templ[50];
27478 int units = mode_nunits[GET_MODE (operands[0])];
27479 gcc_assert ((mask & (mask - 1)) == 0);
27480 for (i = 0; i < units; ++i)
27482 if ((mask & 0x01) == 1)
27484 break;
27486 mask >>= 1;
27488 gcc_assert (i < units);
27490 switch (GET_MODE (operands[0]))
27492 case V8QImode:
27493 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
27494 break;
27495 case V4HImode:
27496 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
27497 break;
27498 case V2SImode:
27499 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
27500 break;
27501 default:
27502 gcc_unreachable ();
27503 break;
27505 output_asm_insn (templ, operands);
27507 return "";
27510 /* Output a Thumb-1 casesi dispatch sequence. */
27511 const char *
27512 thumb1_output_casesi (rtx *operands)
27514 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
27516 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27518 switch (GET_MODE(diff_vec))
27520 case QImode:
27521 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27522 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
27523 case HImode:
27524 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27525 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
27526 case SImode:
27527 return "bl\t%___gnu_thumb1_case_si";
27528 default:
27529 gcc_unreachable ();
27533 /* Output a Thumb-2 casesi instruction. */
27534 const char *
27535 thumb2_output_casesi (rtx *operands)
27537 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
27539 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27541 output_asm_insn ("cmp\t%0, %1", operands);
27542 output_asm_insn ("bhi\t%l3", operands);
27543 switch (GET_MODE(diff_vec))
27545 case QImode:
27546 return "tbb\t[%|pc, %0]";
27547 case HImode:
27548 return "tbh\t[%|pc, %0, lsl #1]";
27549 case SImode:
27550 if (flag_pic)
27552 output_asm_insn ("adr\t%4, %l2", operands);
27553 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
27554 output_asm_insn ("add\t%4, %4, %5", operands);
27555 return "bx\t%4";
27557 else
27559 output_asm_insn ("adr\t%4, %l2", operands);
27560 return "ldr\t%|pc, [%4, %0, lsl #2]";
27562 default:
27563 gcc_unreachable ();
27567 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
27568 per-core tuning structs. */
27569 static int
27570 arm_issue_rate (void)
27572 return current_tune->issue_rate;
27575 /* Return how many instructions should scheduler lookahead to choose the
27576 best one. */
27577 static int
27578 arm_first_cycle_multipass_dfa_lookahead (void)
27580 int issue_rate = arm_issue_rate ();
27582 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
27585 /* Enable modeling of L2 auto-prefetcher. */
27586 static int
27587 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
27589 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
27592 const char *
27593 arm_mangle_type (const_tree type)
27595 /* The ARM ABI documents (10th October 2008) say that "__va_list"
27596 has to be managled as if it is in the "std" namespace. */
27597 if (TARGET_AAPCS_BASED
27598 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
27599 return "St9__va_list";
27601 /* Half-precision float. */
27602 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
27603 return "Dh";
27605 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
27606 builtin type. */
27607 if (TYPE_NAME (type) != NULL)
27608 return arm_mangle_builtin_type (type);
27610 /* Use the default mangling. */
27611 return NULL;
27614 /* Order of allocation of core registers for Thumb: this allocation is
27615 written over the corresponding initial entries of the array
27616 initialized with REG_ALLOC_ORDER. We allocate all low registers
27617 first. Saving and restoring a low register is usually cheaper than
27618 using a call-clobbered high register. */
27620 static const int thumb_core_reg_alloc_order[] =
27622 3, 2, 1, 0, 4, 5, 6, 7,
27623 12, 14, 8, 9, 10, 11
27626 /* Adjust register allocation order when compiling for Thumb. */
27628 void
27629 arm_order_regs_for_local_alloc (void)
27631 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
27632 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
27633 if (TARGET_THUMB)
27634 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
27635 sizeof (thumb_core_reg_alloc_order));
27638 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
27640 bool
27641 arm_frame_pointer_required (void)
27643 if (SUBTARGET_FRAME_POINTER_REQUIRED)
27644 return true;
27646 /* If the function receives nonlocal gotos, it needs to save the frame
27647 pointer in the nonlocal_goto_save_area object. */
27648 if (cfun->has_nonlocal_label)
27649 return true;
27651 /* The frame pointer is required for non-leaf APCS frames. */
27652 if (TARGET_ARM && TARGET_APCS_FRAME && !crtl->is_leaf)
27653 return true;
27655 /* If we are probing the stack in the prologue, we will have a faulting
27656 instruction prior to the stack adjustment and this requires a frame
27657 pointer if we want to catch the exception using the EABI unwinder. */
27658 if (!IS_INTERRUPT (arm_current_func_type ())
27659 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK
27660 && arm_except_unwind_info (&global_options) == UI_TARGET
27661 && cfun->can_throw_non_call_exceptions)
27663 HOST_WIDE_INT size = get_frame_size ();
27665 /* That's irrelevant if there is no stack adjustment. */
27666 if (size <= 0)
27667 return false;
27669 /* That's relevant only if there is a stack probe. */
27670 if (crtl->is_leaf && !cfun->calls_alloca)
27672 /* We don't have the final size of the frame so adjust. */
27673 size += 32 * UNITS_PER_WORD;
27674 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
27675 return true;
27677 else
27678 return true;
27681 return false;
27684 /* Only thumb1 can't support conditional execution, so return true if
27685 the target is not thumb1. */
27686 static bool
27687 arm_have_conditional_execution (void)
27689 return !TARGET_THUMB1;
27692 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
27693 static HOST_WIDE_INT
27694 arm_vector_alignment (const_tree type)
27696 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
27698 if (TARGET_AAPCS_BASED)
27699 align = MIN (align, 64);
27701 return align;
27704 static unsigned int
27705 arm_autovectorize_vector_sizes (void)
27707 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
27710 static bool
27711 arm_vector_alignment_reachable (const_tree type, bool is_packed)
27713 /* Vectors which aren't in packed structures will not be less aligned than
27714 the natural alignment of their element type, so this is safe. */
27715 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27716 return !is_packed;
27718 return default_builtin_vector_alignment_reachable (type, is_packed);
27721 static bool
27722 arm_builtin_support_vector_misalignment (machine_mode mode,
27723 const_tree type, int misalignment,
27724 bool is_packed)
27726 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27728 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
27730 if (is_packed)
27731 return align == 1;
27733 /* If the misalignment is unknown, we should be able to handle the access
27734 so long as it is not to a member of a packed data structure. */
27735 if (misalignment == -1)
27736 return true;
27738 /* Return true if the misalignment is a multiple of the natural alignment
27739 of the vector's element type. This is probably always going to be
27740 true in practice, since we've already established that this isn't a
27741 packed access. */
27742 return ((misalignment % align) == 0);
27745 return default_builtin_support_vector_misalignment (mode, type, misalignment,
27746 is_packed);
27749 static void
27750 arm_conditional_register_usage (void)
27752 int regno;
27754 if (TARGET_THUMB1 && optimize_size)
27756 /* When optimizing for size on Thumb-1, it's better not
27757 to use the HI regs, because of the overhead of
27758 stacking them. */
27759 for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
27760 fixed_regs[regno] = call_used_regs[regno] = 1;
27763 /* The link register can be clobbered by any branch insn,
27764 but we have no way to track that at present, so mark
27765 it as unavailable. */
27766 if (TARGET_THUMB1)
27767 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
27769 if (TARGET_32BIT && TARGET_HARD_FLOAT)
27771 /* VFPv3 registers are disabled when earlier VFP
27772 versions are selected due to the definition of
27773 LAST_VFP_REGNUM. */
27774 for (regno = FIRST_VFP_REGNUM;
27775 regno <= LAST_VFP_REGNUM; ++ regno)
27777 fixed_regs[regno] = 0;
27778 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
27779 || regno >= FIRST_VFP_REGNUM + 32;
27783 if (TARGET_REALLY_IWMMXT)
27785 regno = FIRST_IWMMXT_GR_REGNUM;
27786 /* The 2002/10/09 revision of the XScale ABI has wCG0
27787 and wCG1 as call-preserved registers. The 2002/11/21
27788 revision changed this so that all wCG registers are
27789 scratch registers. */
27790 for (regno = FIRST_IWMMXT_GR_REGNUM;
27791 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
27792 fixed_regs[regno] = 0;
27793 /* The XScale ABI has wR0 - wR9 as scratch registers,
27794 the rest as call-preserved registers. */
27795 for (regno = FIRST_IWMMXT_REGNUM;
27796 regno <= LAST_IWMMXT_REGNUM; ++ regno)
27798 fixed_regs[regno] = 0;
27799 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
27803 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
27805 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27806 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27808 else if (TARGET_APCS_STACK)
27810 fixed_regs[10] = 1;
27811 call_used_regs[10] = 1;
27813 /* -mcaller-super-interworking reserves r11 for calls to
27814 _interwork_r11_call_via_rN(). Making the register global
27815 is an easy way of ensuring that it remains valid for all
27816 calls. */
27817 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
27818 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
27820 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27821 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27822 if (TARGET_CALLER_INTERWORKING)
27823 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27825 SUBTARGET_CONDITIONAL_REGISTER_USAGE
27828 static reg_class_t
27829 arm_preferred_rename_class (reg_class_t rclass)
27831 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
27832 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
27833 and code size can be reduced. */
27834 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
27835 return LO_REGS;
27836 else
27837 return NO_REGS;
27840 /* Compute the attribute "length" of insn "*push_multi".
27841 So this function MUST be kept in sync with that insn pattern. */
27843 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
27845 int i, regno, hi_reg;
27846 int num_saves = XVECLEN (parallel_op, 0);
27848 /* ARM mode. */
27849 if (TARGET_ARM)
27850 return 4;
27851 /* Thumb1 mode. */
27852 if (TARGET_THUMB1)
27853 return 2;
27855 /* Thumb2 mode. */
27856 regno = REGNO (first_op);
27857 /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
27858 list is 8-bit. Normally this means all registers in the list must be
27859 LO_REGS, that is (R0 -R7). If any HI_REGS used, then we must use 32-bit
27860 encodings. There is one exception for PUSH that LR in HI_REGS can be used
27861 with 16-bit encoding. */
27862 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
27863 for (i = 1; i < num_saves && !hi_reg; i++)
27865 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
27866 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
27869 if (!hi_reg)
27870 return 2;
27871 return 4;
27874 /* Compute the attribute "length" of insn. Currently, this function is used
27875 for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
27876 "*pop_multiple_with_writeback_and_return". OPERANDS is the toplevel PARALLEL
27877 rtx, RETURN_PC is true if OPERANDS contains return insn. WRITE_BACK_P is
27878 true if OPERANDS contains insn which explicit updates base register. */
27881 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
27883 /* ARM mode. */
27884 if (TARGET_ARM)
27885 return 4;
27886 /* Thumb1 mode. */
27887 if (TARGET_THUMB1)
27888 return 2;
27890 rtx parallel_op = operands[0];
27891 /* Initialize to elements number of PARALLEL. */
27892 unsigned indx = XVECLEN (parallel_op, 0) - 1;
27893 /* Initialize the value to base register. */
27894 unsigned regno = REGNO (operands[1]);
27895 /* Skip return and write back pattern.
27896 We only need register pop pattern for later analysis. */
27897 unsigned first_indx = 0;
27898 first_indx += return_pc ? 1 : 0;
27899 first_indx += write_back_p ? 1 : 0;
27901 /* A pop operation can be done through LDM or POP. If the base register is SP
27902 and if it's with write back, then a LDM will be alias of POP. */
27903 bool pop_p = (regno == SP_REGNUM && write_back_p);
27904 bool ldm_p = !pop_p;
27906 /* Check base register for LDM. */
27907 if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
27908 return 4;
27910 /* Check each register in the list. */
27911 for (; indx >= first_indx; indx--)
27913 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
27914 /* For POP, PC in HI_REGS can be used with 16-bit encoding. See similar
27915 comment in arm_attr_length_push_multi. */
27916 if (REGNO_REG_CLASS (regno) == HI_REGS
27917 && (regno != PC_REGNUM || ldm_p))
27918 return 4;
27921 return 2;
27924 /* Compute the number of instructions emitted by output_move_double. */
27926 arm_count_output_move_double_insns (rtx *operands)
27928 int count;
27929 rtx ops[2];
27930 /* output_move_double may modify the operands array, so call it
27931 here on a copy of the array. */
27932 ops[0] = operands[0];
27933 ops[1] = operands[1];
27934 output_move_double (ops, false, &count);
27935 return count;
27939 vfp3_const_double_for_fract_bits (rtx operand)
27941 REAL_VALUE_TYPE r0;
27943 if (!CONST_DOUBLE_P (operand))
27944 return 0;
27946 r0 = *CONST_DOUBLE_REAL_VALUE (operand);
27947 if (exact_real_inverse (DFmode, &r0)
27948 && !REAL_VALUE_NEGATIVE (r0))
27950 if (exact_real_truncate (DFmode, &r0))
27952 HOST_WIDE_INT value = real_to_integer (&r0);
27953 value = value & 0xffffffff;
27954 if ((value != 0) && ( (value & (value - 1)) == 0))
27956 int ret = exact_log2 (value);
27957 gcc_assert (IN_RANGE (ret, 0, 31));
27958 return ret;
27962 return 0;
27965 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
27966 log2 is in [1, 32], return that log2. Otherwise return -1.
27967 This is used in the patterns for vcvt.s32.f32 floating-point to
27968 fixed-point conversions. */
27971 vfp3_const_double_for_bits (rtx x)
27973 const REAL_VALUE_TYPE *r;
27975 if (!CONST_DOUBLE_P (x))
27976 return -1;
27978 r = CONST_DOUBLE_REAL_VALUE (x);
27980 if (REAL_VALUE_NEGATIVE (*r)
27981 || REAL_VALUE_ISNAN (*r)
27982 || REAL_VALUE_ISINF (*r)
27983 || !real_isinteger (r, SFmode))
27984 return -1;
27986 HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
27988 /* The exact_log2 above will have returned -1 if this is
27989 not an exact log2. */
27990 if (!IN_RANGE (hwint, 1, 32))
27991 return -1;
27993 return hwint;
27997 /* Emit a memory barrier around an atomic sequence according to MODEL. */
27999 static void
28000 arm_pre_atomic_barrier (enum memmodel model)
28002 if (need_atomic_barrier_p (model, true))
28003 emit_insn (gen_memory_barrier ());
28006 static void
28007 arm_post_atomic_barrier (enum memmodel model)
28009 if (need_atomic_barrier_p (model, false))
28010 emit_insn (gen_memory_barrier ());
28013 /* Emit the load-exclusive and store-exclusive instructions.
28014 Use acquire and release versions if necessary. */
28016 static void
28017 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
28019 rtx (*gen) (rtx, rtx);
28021 if (acq)
28023 switch (mode)
28025 case QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
28026 case HImode: gen = gen_arm_load_acquire_exclusivehi; break;
28027 case SImode: gen = gen_arm_load_acquire_exclusivesi; break;
28028 case DImode: gen = gen_arm_load_acquire_exclusivedi; break;
28029 default:
28030 gcc_unreachable ();
28033 else
28035 switch (mode)
28037 case QImode: gen = gen_arm_load_exclusiveqi; break;
28038 case HImode: gen = gen_arm_load_exclusivehi; break;
28039 case SImode: gen = gen_arm_load_exclusivesi; break;
28040 case DImode: gen = gen_arm_load_exclusivedi; break;
28041 default:
28042 gcc_unreachable ();
28046 emit_insn (gen (rval, mem));
28049 static void
28050 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
28051 rtx mem, bool rel)
28053 rtx (*gen) (rtx, rtx, rtx);
28055 if (rel)
28057 switch (mode)
28059 case QImode: gen = gen_arm_store_release_exclusiveqi; break;
28060 case HImode: gen = gen_arm_store_release_exclusivehi; break;
28061 case SImode: gen = gen_arm_store_release_exclusivesi; break;
28062 case DImode: gen = gen_arm_store_release_exclusivedi; break;
28063 default:
28064 gcc_unreachable ();
28067 else
28069 switch (mode)
28071 case QImode: gen = gen_arm_store_exclusiveqi; break;
28072 case HImode: gen = gen_arm_store_exclusivehi; break;
28073 case SImode: gen = gen_arm_store_exclusivesi; break;
28074 case DImode: gen = gen_arm_store_exclusivedi; break;
28075 default:
28076 gcc_unreachable ();
28080 emit_insn (gen (bval, rval, mem));
28083 /* Mark the previous jump instruction as unlikely. */
28085 static void
28086 emit_unlikely_jump (rtx insn)
28088 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
28090 rtx_insn *jump = emit_jump_insn (insn);
28091 add_int_reg_note (jump, REG_BR_PROB, very_unlikely);
28094 /* Expand a compare and swap pattern. */
28096 void
28097 arm_expand_compare_and_swap (rtx operands[])
28099 rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
28100 machine_mode mode;
28101 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx);
28103 bval = operands[0];
28104 rval = operands[1];
28105 mem = operands[2];
28106 oldval = operands[3];
28107 newval = operands[4];
28108 is_weak = operands[5];
28109 mod_s = operands[6];
28110 mod_f = operands[7];
28111 mode = GET_MODE (mem);
28113 /* Normally the succ memory model must be stronger than fail, but in the
28114 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
28115 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
28117 if (TARGET_HAVE_LDACQ
28118 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
28119 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
28120 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
28122 switch (mode)
28124 case QImode:
28125 case HImode:
28126 /* For narrow modes, we're going to perform the comparison in SImode,
28127 so do the zero-extension now. */
28128 rval = gen_reg_rtx (SImode);
28129 oldval = convert_modes (SImode, mode, oldval, true);
28130 /* FALLTHRU */
28132 case SImode:
28133 /* Force the value into a register if needed. We waited until after
28134 the zero-extension above to do this properly. */
28135 if (!arm_add_operand (oldval, SImode))
28136 oldval = force_reg (SImode, oldval);
28137 break;
28139 case DImode:
28140 if (!cmpdi_operand (oldval, mode))
28141 oldval = force_reg (mode, oldval);
28142 break;
28144 default:
28145 gcc_unreachable ();
28148 switch (mode)
28150 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
28151 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
28152 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
28153 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
28154 default:
28155 gcc_unreachable ();
28158 bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CCmode, CC_REGNUM);
28159 emit_insn (gen (bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f));
28161 if (mode == QImode || mode == HImode)
28162 emit_move_insn (operands[1], gen_lowpart (mode, rval));
28164 /* In all cases, we arrange for success to be signaled by Z set.
28165 This arrangement allows for the boolean result to be used directly
28166 in a subsequent branch, post optimization. For Thumb-1 targets, the
28167 boolean negation of the result is also stored in bval because Thumb-1
28168 backend lacks dependency tracking for CC flag due to flag-setting not
28169 being represented at RTL level. */
28170 if (TARGET_THUMB1)
28171 emit_insn (gen_cstoresi_eq0_thumb1 (bval, bdst));
28172 else
28174 x = gen_rtx_EQ (SImode, bdst, const0_rtx);
28175 emit_insn (gen_rtx_SET (bval, x));
28179 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
28180 another memory store between the load-exclusive and store-exclusive can
28181 reset the monitor from Exclusive to Open state. This means we must wait
28182 until after reload to split the pattern, lest we get a register spill in
28183 the middle of the atomic sequence. Success of the compare and swap is
28184 indicated by the Z flag set for 32bit targets and by neg_bval being zero
28185 for Thumb-1 targets (ie. negation of the boolean value returned by
28186 atomic_compare_and_swapmode standard pattern in operand 0). */
28188 void
28189 arm_split_compare_and_swap (rtx operands[])
28191 rtx rval, mem, oldval, newval, neg_bval;
28192 machine_mode mode;
28193 enum memmodel mod_s, mod_f;
28194 bool is_weak;
28195 rtx_code_label *label1, *label2;
28196 rtx x, cond;
28198 rval = operands[1];
28199 mem = operands[2];
28200 oldval = operands[3];
28201 newval = operands[4];
28202 is_weak = (operands[5] != const0_rtx);
28203 mod_s = memmodel_from_int (INTVAL (operands[6]));
28204 mod_f = memmodel_from_int (INTVAL (operands[7]));
28205 neg_bval = TARGET_THUMB1 ? operands[0] : operands[8];
28206 mode = GET_MODE (mem);
28208 bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
28210 bool use_acquire = TARGET_HAVE_LDACQ
28211 && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28212 || is_mm_release (mod_s));
28214 bool use_release = TARGET_HAVE_LDACQ
28215 && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28216 || is_mm_acquire (mod_s));
28218 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
28219 a full barrier is emitted after the store-release. */
28220 if (is_armv8_sync)
28221 use_acquire = false;
28223 /* Checks whether a barrier is needed and emits one accordingly. */
28224 if (!(use_acquire || use_release))
28225 arm_pre_atomic_barrier (mod_s);
28227 label1 = NULL;
28228 if (!is_weak)
28230 label1 = gen_label_rtx ();
28231 emit_label (label1);
28233 label2 = gen_label_rtx ();
28235 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
28237 /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
28238 as required to communicate with arm_expand_compare_and_swap. */
28239 if (TARGET_32BIT)
28241 cond = arm_gen_compare_reg (NE, rval, oldval, neg_bval);
28242 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28243 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
28244 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
28245 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
28247 else
28249 emit_move_insn (neg_bval, const1_rtx);
28250 cond = gen_rtx_NE (VOIDmode, rval, oldval);
28251 if (thumb1_cmpneg_operand (oldval, SImode))
28252 emit_unlikely_jump (gen_cbranchsi4_scratch (neg_bval, rval, oldval,
28253 label2, cond));
28254 else
28255 emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2));
28258 arm_emit_store_exclusive (mode, neg_bval, mem, newval, use_release);
28260 /* Weak or strong, we want EQ to be true for success, so that we
28261 match the flags that we got from the compare above. */
28262 if (TARGET_32BIT)
28264 cond = gen_rtx_REG (CCmode, CC_REGNUM);
28265 x = gen_rtx_COMPARE (CCmode, neg_bval, const0_rtx);
28266 emit_insn (gen_rtx_SET (cond, x));
28269 if (!is_weak)
28271 /* Z is set to boolean value of !neg_bval, as required to communicate
28272 with arm_expand_compare_and_swap. */
28273 x = gen_rtx_NE (VOIDmode, neg_bval, const0_rtx);
28274 emit_unlikely_jump (gen_cbranchsi4 (x, neg_bval, const0_rtx, label1));
28277 if (!is_mm_relaxed (mod_f))
28278 emit_label (label2);
28280 /* Checks whether a barrier is needed and emits one accordingly. */
28281 if (is_armv8_sync
28282 || !(use_acquire || use_release))
28283 arm_post_atomic_barrier (mod_s);
28285 if (is_mm_relaxed (mod_f))
28286 emit_label (label2);
28289 /* Split an atomic operation pattern. Operation is given by CODE and is one
28290 of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
28291 operation). Operation is performed on the content at MEM and on VALUE
28292 following the memory model MODEL_RTX. The content at MEM before and after
28293 the operation is returned in OLD_OUT and NEW_OUT respectively while the
28294 success of the operation is returned in COND. Using a scratch register or
28295 an operand register for these determines what result is returned for that
28296 pattern. */
28298 void
28299 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
28300 rtx value, rtx model_rtx, rtx cond)
28302 enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
28303 machine_mode mode = GET_MODE (mem);
28304 machine_mode wmode = (mode == DImode ? DImode : SImode);
28305 rtx_code_label *label;
28306 bool all_low_regs, bind_old_new;
28307 rtx x;
28309 bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
28311 bool use_acquire = TARGET_HAVE_LDACQ
28312 && !(is_mm_relaxed (model) || is_mm_consume (model)
28313 || is_mm_release (model));
28315 bool use_release = TARGET_HAVE_LDACQ
28316 && !(is_mm_relaxed (model) || is_mm_consume (model)
28317 || is_mm_acquire (model));
28319 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
28320 a full barrier is emitted after the store-release. */
28321 if (is_armv8_sync)
28322 use_acquire = false;
28324 /* Checks whether a barrier is needed and emits one accordingly. */
28325 if (!(use_acquire || use_release))
28326 arm_pre_atomic_barrier (model);
28328 label = gen_label_rtx ();
28329 emit_label (label);
28331 if (new_out)
28332 new_out = gen_lowpart (wmode, new_out);
28333 if (old_out)
28334 old_out = gen_lowpart (wmode, old_out);
28335 else
28336 old_out = new_out;
28337 value = simplify_gen_subreg (wmode, value, mode, 0);
28339 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
28341 /* Does the operation require destination and first operand to use the same
28342 register? This is decided by register constraints of relevant insn
28343 patterns in thumb1.md. */
28344 gcc_assert (!new_out || REG_P (new_out));
28345 all_low_regs = REG_P (value) && REGNO_REG_CLASS (REGNO (value)) == LO_REGS
28346 && new_out && REGNO_REG_CLASS (REGNO (new_out)) == LO_REGS
28347 && REGNO_REG_CLASS (REGNO (old_out)) == LO_REGS;
28348 bind_old_new =
28349 (TARGET_THUMB1
28350 && code != SET
28351 && code != MINUS
28352 && (code != PLUS || (!all_low_regs && !satisfies_constraint_L (value))));
28354 /* We want to return the old value while putting the result of the operation
28355 in the same register as the old value so copy the old value over to the
28356 destination register and use that register for the operation. */
28357 if (old_out && bind_old_new)
28359 emit_move_insn (new_out, old_out);
28360 old_out = new_out;
28363 switch (code)
28365 case SET:
28366 new_out = value;
28367 break;
28369 case NOT:
28370 x = gen_rtx_AND (wmode, old_out, value);
28371 emit_insn (gen_rtx_SET (new_out, x));
28372 x = gen_rtx_NOT (wmode, new_out);
28373 emit_insn (gen_rtx_SET (new_out, x));
28374 break;
28376 case MINUS:
28377 if (CONST_INT_P (value))
28379 value = GEN_INT (-INTVAL (value));
28380 code = PLUS;
28382 /* FALLTHRU */
28384 case PLUS:
28385 if (mode == DImode)
28387 /* DImode plus/minus need to clobber flags. */
28388 /* The adddi3 and subdi3 patterns are incorrectly written so that
28389 they require matching operands, even when we could easily support
28390 three operands. Thankfully, this can be fixed up post-splitting,
28391 as the individual add+adc patterns do accept three operands and
28392 post-reload cprop can make these moves go away. */
28393 emit_move_insn (new_out, old_out);
28394 if (code == PLUS)
28395 x = gen_adddi3 (new_out, new_out, value);
28396 else
28397 x = gen_subdi3 (new_out, new_out, value);
28398 emit_insn (x);
28399 break;
28401 /* FALLTHRU */
28403 default:
28404 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
28405 emit_insn (gen_rtx_SET (new_out, x));
28406 break;
28409 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
28410 use_release);
28412 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28413 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
28415 /* Checks whether a barrier is needed and emits one accordingly. */
28416 if (is_armv8_sync
28417 || !(use_acquire || use_release))
28418 arm_post_atomic_barrier (model);
28421 #define MAX_VECT_LEN 16
28423 struct expand_vec_perm_d
28425 rtx target, op0, op1;
28426 unsigned char perm[MAX_VECT_LEN];
28427 machine_mode vmode;
28428 unsigned char nelt;
28429 bool one_vector_p;
28430 bool testing_p;
28433 /* Generate a variable permutation. */
28435 static void
28436 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
28438 machine_mode vmode = GET_MODE (target);
28439 bool one_vector_p = rtx_equal_p (op0, op1);
28441 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
28442 gcc_checking_assert (GET_MODE (op0) == vmode);
28443 gcc_checking_assert (GET_MODE (op1) == vmode);
28444 gcc_checking_assert (GET_MODE (sel) == vmode);
28445 gcc_checking_assert (TARGET_NEON);
28447 if (one_vector_p)
28449 if (vmode == V8QImode)
28450 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
28451 else
28452 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
28454 else
28456 rtx pair;
28458 if (vmode == V8QImode)
28460 pair = gen_reg_rtx (V16QImode);
28461 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
28462 pair = gen_lowpart (TImode, pair);
28463 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
28465 else
28467 pair = gen_reg_rtx (OImode);
28468 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
28469 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
28474 void
28475 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
28477 machine_mode vmode = GET_MODE (target);
28478 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
28479 bool one_vector_p = rtx_equal_p (op0, op1);
28480 rtx rmask[MAX_VECT_LEN], mask;
28482 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28483 numbering of elements for big-endian, we must reverse the order. */
28484 gcc_checking_assert (!BYTES_BIG_ENDIAN);
28486 /* The VTBL instruction does not use a modulo index, so we must take care
28487 of that ourselves. */
28488 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
28489 for (i = 0; i < nelt; ++i)
28490 rmask[i] = mask;
28491 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
28492 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
28494 arm_expand_vec_perm_1 (target, op0, op1, sel);
28497 /* Map lane ordering between architectural lane order, and GCC lane order,
28498 taking into account ABI. See comment above output_move_neon for details. */
28500 static int
28501 neon_endian_lane_map (machine_mode mode, int lane)
28503 if (BYTES_BIG_ENDIAN)
28505 int nelems = GET_MODE_NUNITS (mode);
28506 /* Reverse lane order. */
28507 lane = (nelems - 1 - lane);
28508 /* Reverse D register order, to match ABI. */
28509 if (GET_MODE_SIZE (mode) == 16)
28510 lane = lane ^ (nelems / 2);
28512 return lane;
28515 /* Some permutations index into pairs of vectors, this is a helper function
28516 to map indexes into those pairs of vectors. */
28518 static int
28519 neon_pair_endian_lane_map (machine_mode mode, int lane)
28521 int nelem = GET_MODE_NUNITS (mode);
28522 if (BYTES_BIG_ENDIAN)
28523 lane =
28524 neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
28525 return lane;
28528 /* Generate or test for an insn that supports a constant permutation. */
28530 /* Recognize patterns for the VUZP insns. */
28532 static bool
28533 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
28535 unsigned int i, odd, mask, nelt = d->nelt;
28536 rtx out0, out1, in0, in1;
28537 rtx (*gen)(rtx, rtx, rtx, rtx);
28538 int first_elem;
28539 int swap_nelt;
28541 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28542 return false;
28544 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
28545 big endian pattern on 64 bit vectors, so we correct for that. */
28546 swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
28547 && GET_MODE_SIZE (d->vmode) == 8 ? d->nelt : 0;
28549 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
28551 if (first_elem == neon_endian_lane_map (d->vmode, 0))
28552 odd = 0;
28553 else if (first_elem == neon_endian_lane_map (d->vmode, 1))
28554 odd = 1;
28555 else
28556 return false;
28557 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28559 for (i = 0; i < nelt; i++)
28561 unsigned elt =
28562 (neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
28563 if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
28564 return false;
28567 /* Success! */
28568 if (d->testing_p)
28569 return true;
28571 switch (d->vmode)
28573 case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
28574 case V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
28575 case V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
28576 case V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
28577 case V8HFmode: gen = gen_neon_vuzpv8hf_internal; break;
28578 case V4HFmode: gen = gen_neon_vuzpv4hf_internal; break;
28579 case V4SImode: gen = gen_neon_vuzpv4si_internal; break;
28580 case V2SImode: gen = gen_neon_vuzpv2si_internal; break;
28581 case V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
28582 case V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
28583 default:
28584 gcc_unreachable ();
28587 in0 = d->op0;
28588 in1 = d->op1;
28589 if (swap_nelt != 0)
28590 std::swap (in0, in1);
28592 out0 = d->target;
28593 out1 = gen_reg_rtx (d->vmode);
28594 if (odd)
28595 std::swap (out0, out1);
28597 emit_insn (gen (out0, in0, in1, out1));
28598 return true;
28601 /* Recognize patterns for the VZIP insns. */
28603 static bool
28604 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
28606 unsigned int i, high, mask, nelt = d->nelt;
28607 rtx out0, out1, in0, in1;
28608 rtx (*gen)(rtx, rtx, rtx, rtx);
28609 int first_elem;
28610 bool is_swapped;
28612 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28613 return false;
28615 is_swapped = BYTES_BIG_ENDIAN;
28617 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
28619 high = nelt / 2;
28620 if (first_elem == neon_endian_lane_map (d->vmode, high))
28622 else if (first_elem == neon_endian_lane_map (d->vmode, 0))
28623 high = 0;
28624 else
28625 return false;
28626 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28628 for (i = 0; i < nelt / 2; i++)
28630 unsigned elt =
28631 neon_pair_endian_lane_map (d->vmode, i + high) & mask;
28632 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
28633 != elt)
28634 return false;
28635 elt =
28636 neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
28637 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
28638 != elt)
28639 return false;
28642 /* Success! */
28643 if (d->testing_p)
28644 return true;
28646 switch (d->vmode)
28648 case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
28649 case V8QImode: gen = gen_neon_vzipv8qi_internal; break;
28650 case V8HImode: gen = gen_neon_vzipv8hi_internal; break;
28651 case V4HImode: gen = gen_neon_vzipv4hi_internal; break;
28652 case V8HFmode: gen = gen_neon_vzipv8hf_internal; break;
28653 case V4HFmode: gen = gen_neon_vzipv4hf_internal; break;
28654 case V4SImode: gen = gen_neon_vzipv4si_internal; break;
28655 case V2SImode: gen = gen_neon_vzipv2si_internal; break;
28656 case V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
28657 case V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
28658 default:
28659 gcc_unreachable ();
28662 in0 = d->op0;
28663 in1 = d->op1;
28664 if (is_swapped)
28665 std::swap (in0, in1);
28667 out0 = d->target;
28668 out1 = gen_reg_rtx (d->vmode);
28669 if (high)
28670 std::swap (out0, out1);
28672 emit_insn (gen (out0, in0, in1, out1));
28673 return true;
28676 /* Recognize patterns for the VREV insns. */
28678 static bool
28679 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
28681 unsigned int i, j, diff, nelt = d->nelt;
28682 rtx (*gen)(rtx, rtx);
28684 if (!d->one_vector_p)
28685 return false;
28687 diff = d->perm[0];
28688 switch (diff)
28690 case 7:
28691 switch (d->vmode)
28693 case V16QImode: gen = gen_neon_vrev64v16qi; break;
28694 case V8QImode: gen = gen_neon_vrev64v8qi; break;
28695 default:
28696 return false;
28698 break;
28699 case 3:
28700 switch (d->vmode)
28702 case V16QImode: gen = gen_neon_vrev32v16qi; break;
28703 case V8QImode: gen = gen_neon_vrev32v8qi; break;
28704 case V8HImode: gen = gen_neon_vrev64v8hi; break;
28705 case V4HImode: gen = gen_neon_vrev64v4hi; break;
28706 case V8HFmode: gen = gen_neon_vrev64v8hf; break;
28707 case V4HFmode: gen = gen_neon_vrev64v4hf; break;
28708 default:
28709 return false;
28711 break;
28712 case 1:
28713 switch (d->vmode)
28715 case V16QImode: gen = gen_neon_vrev16v16qi; break;
28716 case V8QImode: gen = gen_neon_vrev16v8qi; break;
28717 case V8HImode: gen = gen_neon_vrev32v8hi; break;
28718 case V4HImode: gen = gen_neon_vrev32v4hi; break;
28719 case V4SImode: gen = gen_neon_vrev64v4si; break;
28720 case V2SImode: gen = gen_neon_vrev64v2si; break;
28721 case V4SFmode: gen = gen_neon_vrev64v4sf; break;
28722 case V2SFmode: gen = gen_neon_vrev64v2sf; break;
28723 default:
28724 return false;
28726 break;
28727 default:
28728 return false;
28731 for (i = 0; i < nelt ; i += diff + 1)
28732 for (j = 0; j <= diff; j += 1)
28734 /* This is guaranteed to be true as the value of diff
28735 is 7, 3, 1 and we should have enough elements in the
28736 queue to generate this. Getting a vector mask with a
28737 value of diff other than these values implies that
28738 something is wrong by the time we get here. */
28739 gcc_assert (i + j < nelt);
28740 if (d->perm[i + j] != i + diff - j)
28741 return false;
28744 /* Success! */
28745 if (d->testing_p)
28746 return true;
28748 emit_insn (gen (d->target, d->op0));
28749 return true;
28752 /* Recognize patterns for the VTRN insns. */
28754 static bool
28755 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
28757 unsigned int i, odd, mask, nelt = d->nelt;
28758 rtx out0, out1, in0, in1;
28759 rtx (*gen)(rtx, rtx, rtx, rtx);
28761 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28762 return false;
28764 /* Note that these are little-endian tests. Adjust for big-endian later. */
28765 if (d->perm[0] == 0)
28766 odd = 0;
28767 else if (d->perm[0] == 1)
28768 odd = 1;
28769 else
28770 return false;
28771 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28773 for (i = 0; i < nelt; i += 2)
28775 if (d->perm[i] != i + odd)
28776 return false;
28777 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
28778 return false;
28781 /* Success! */
28782 if (d->testing_p)
28783 return true;
28785 switch (d->vmode)
28787 case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
28788 case V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
28789 case V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
28790 case V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
28791 case V8HFmode: gen = gen_neon_vtrnv8hf_internal; break;
28792 case V4HFmode: gen = gen_neon_vtrnv4hf_internal; break;
28793 case V4SImode: gen = gen_neon_vtrnv4si_internal; break;
28794 case V2SImode: gen = gen_neon_vtrnv2si_internal; break;
28795 case V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
28796 case V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
28797 default:
28798 gcc_unreachable ();
28801 in0 = d->op0;
28802 in1 = d->op1;
28803 if (BYTES_BIG_ENDIAN)
28805 std::swap (in0, in1);
28806 odd = !odd;
28809 out0 = d->target;
28810 out1 = gen_reg_rtx (d->vmode);
28811 if (odd)
28812 std::swap (out0, out1);
28814 emit_insn (gen (out0, in0, in1, out1));
28815 return true;
28818 /* Recognize patterns for the VEXT insns. */
28820 static bool
28821 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
28823 unsigned int i, nelt = d->nelt;
28824 rtx (*gen) (rtx, rtx, rtx, rtx);
28825 rtx offset;
28827 unsigned int location;
28829 unsigned int next = d->perm[0] + 1;
28831 /* TODO: Handle GCC's numbering of elements for big-endian. */
28832 if (BYTES_BIG_ENDIAN)
28833 return false;
28835 /* Check if the extracted indexes are increasing by one. */
28836 for (i = 1; i < nelt; next++, i++)
28838 /* If we hit the most significant element of the 2nd vector in
28839 the previous iteration, no need to test further. */
28840 if (next == 2 * nelt)
28841 return false;
28843 /* If we are operating on only one vector: it could be a
28844 rotation. If there are only two elements of size < 64, let
28845 arm_evpc_neon_vrev catch it. */
28846 if (d->one_vector_p && (next == nelt))
28848 if ((nelt == 2) && (d->vmode != V2DImode))
28849 return false;
28850 else
28851 next = 0;
28854 if (d->perm[i] != next)
28855 return false;
28858 location = d->perm[0];
28860 switch (d->vmode)
28862 case V16QImode: gen = gen_neon_vextv16qi; break;
28863 case V8QImode: gen = gen_neon_vextv8qi; break;
28864 case V4HImode: gen = gen_neon_vextv4hi; break;
28865 case V8HImode: gen = gen_neon_vextv8hi; break;
28866 case V2SImode: gen = gen_neon_vextv2si; break;
28867 case V4SImode: gen = gen_neon_vextv4si; break;
28868 case V4HFmode: gen = gen_neon_vextv4hf; break;
28869 case V8HFmode: gen = gen_neon_vextv8hf; break;
28870 case V2SFmode: gen = gen_neon_vextv2sf; break;
28871 case V4SFmode: gen = gen_neon_vextv4sf; break;
28872 case V2DImode: gen = gen_neon_vextv2di; break;
28873 default:
28874 return false;
28877 /* Success! */
28878 if (d->testing_p)
28879 return true;
28881 offset = GEN_INT (location);
28882 emit_insn (gen (d->target, d->op0, d->op1, offset));
28883 return true;
28886 /* The NEON VTBL instruction is a fully variable permuation that's even
28887 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
28888 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
28889 can do slightly better by expanding this as a constant where we don't
28890 have to apply a mask. */
28892 static bool
28893 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
28895 rtx rperm[MAX_VECT_LEN], sel;
28896 machine_mode vmode = d->vmode;
28897 unsigned int i, nelt = d->nelt;
28899 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28900 numbering of elements for big-endian, we must reverse the order. */
28901 if (BYTES_BIG_ENDIAN)
28902 return false;
28904 if (d->testing_p)
28905 return true;
28907 /* Generic code will try constant permutation twice. Once with the
28908 original mode and again with the elements lowered to QImode.
28909 So wait and don't do the selector expansion ourselves. */
28910 if (vmode != V8QImode && vmode != V16QImode)
28911 return false;
28913 for (i = 0; i < nelt; ++i)
28914 rperm[i] = GEN_INT (d->perm[i]);
28915 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
28916 sel = force_reg (vmode, sel);
28918 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
28919 return true;
28922 static bool
28923 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
28925 /* Check if the input mask matches vext before reordering the
28926 operands. */
28927 if (TARGET_NEON)
28928 if (arm_evpc_neon_vext (d))
28929 return true;
28931 /* The pattern matching functions above are written to look for a small
28932 number to begin the sequence (0, 1, N/2). If we begin with an index
28933 from the second operand, we can swap the operands. */
28934 if (d->perm[0] >= d->nelt)
28936 unsigned i, nelt = d->nelt;
28938 for (i = 0; i < nelt; ++i)
28939 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
28941 std::swap (d->op0, d->op1);
28944 if (TARGET_NEON)
28946 if (arm_evpc_neon_vuzp (d))
28947 return true;
28948 if (arm_evpc_neon_vzip (d))
28949 return true;
28950 if (arm_evpc_neon_vrev (d))
28951 return true;
28952 if (arm_evpc_neon_vtrn (d))
28953 return true;
28954 return arm_evpc_neon_vtbl (d);
28956 return false;
28959 /* Expand a vec_perm_const pattern. */
28961 bool
28962 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
28964 struct expand_vec_perm_d d;
28965 int i, nelt, which;
28967 d.target = target;
28968 d.op0 = op0;
28969 d.op1 = op1;
28971 d.vmode = GET_MODE (target);
28972 gcc_assert (VECTOR_MODE_P (d.vmode));
28973 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
28974 d.testing_p = false;
28976 for (i = which = 0; i < nelt; ++i)
28978 rtx e = XVECEXP (sel, 0, i);
28979 int ei = INTVAL (e) & (2 * nelt - 1);
28980 which |= (ei < nelt ? 1 : 2);
28981 d.perm[i] = ei;
28984 switch (which)
28986 default:
28987 gcc_unreachable();
28989 case 3:
28990 d.one_vector_p = false;
28991 if (!rtx_equal_p (op0, op1))
28992 break;
28994 /* The elements of PERM do not suggest that only the first operand
28995 is used, but both operands are identical. Allow easier matching
28996 of the permutation by folding the permutation into the single
28997 input vector. */
28998 /* FALLTHRU */
28999 case 2:
29000 for (i = 0; i < nelt; ++i)
29001 d.perm[i] &= nelt - 1;
29002 d.op0 = op1;
29003 d.one_vector_p = true;
29004 break;
29006 case 1:
29007 d.op1 = op0;
29008 d.one_vector_p = true;
29009 break;
29012 return arm_expand_vec_perm_const_1 (&d);
29015 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
29017 static bool
29018 arm_vectorize_vec_perm_const_ok (machine_mode vmode,
29019 const unsigned char *sel)
29021 struct expand_vec_perm_d d;
29022 unsigned int i, nelt, which;
29023 bool ret;
29025 d.vmode = vmode;
29026 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
29027 d.testing_p = true;
29028 memcpy (d.perm, sel, nelt);
29030 /* Categorize the set of elements in the selector. */
29031 for (i = which = 0; i < nelt; ++i)
29033 unsigned char e = d.perm[i];
29034 gcc_assert (e < 2 * nelt);
29035 which |= (e < nelt ? 1 : 2);
29038 /* For all elements from second vector, fold the elements to first. */
29039 if (which == 2)
29040 for (i = 0; i < nelt; ++i)
29041 d.perm[i] -= nelt;
29043 /* Check whether the mask can be applied to the vector type. */
29044 d.one_vector_p = (which != 3);
29046 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
29047 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
29048 if (!d.one_vector_p)
29049 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
29051 start_sequence ();
29052 ret = arm_expand_vec_perm_const_1 (&d);
29053 end_sequence ();
29055 return ret;
29058 bool
29059 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
29061 /* If we are soft float and we do not have ldrd
29062 then all auto increment forms are ok. */
29063 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
29064 return true;
29066 switch (code)
29068 /* Post increment and Pre Decrement are supported for all
29069 instruction forms except for vector forms. */
29070 case ARM_POST_INC:
29071 case ARM_PRE_DEC:
29072 if (VECTOR_MODE_P (mode))
29074 if (code != ARM_PRE_DEC)
29075 return true;
29076 else
29077 return false;
29080 return true;
29082 case ARM_POST_DEC:
29083 case ARM_PRE_INC:
29084 /* Without LDRD and mode size greater than
29085 word size, there is no point in auto-incrementing
29086 because ldm and stm will not have these forms. */
29087 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
29088 return false;
29090 /* Vector and floating point modes do not support
29091 these auto increment forms. */
29092 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
29093 return false;
29095 return true;
29097 default:
29098 return false;
29102 return false;
29105 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
29106 on ARM, since we know that shifts by negative amounts are no-ops.
29107 Additionally, the default expansion code is not available or suitable
29108 for post-reload insn splits (this can occur when the register allocator
29109 chooses not to do a shift in NEON).
29111 This function is used in both initial expand and post-reload splits, and
29112 handles all kinds of 64-bit shifts.
29114 Input requirements:
29115 - It is safe for the input and output to be the same register, but
29116 early-clobber rules apply for the shift amount and scratch registers.
29117 - Shift by register requires both scratch registers. In all other cases
29118 the scratch registers may be NULL.
29119 - Ashiftrt by a register also clobbers the CC register. */
29120 void
29121 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
29122 rtx amount, rtx scratch1, rtx scratch2)
29124 rtx out_high = gen_highpart (SImode, out);
29125 rtx out_low = gen_lowpart (SImode, out);
29126 rtx in_high = gen_highpart (SImode, in);
29127 rtx in_low = gen_lowpart (SImode, in);
29129 /* Terminology:
29130 in = the register pair containing the input value.
29131 out = the destination register pair.
29132 up = the high- or low-part of each pair.
29133 down = the opposite part to "up".
29134 In a shift, we can consider bits to shift from "up"-stream to
29135 "down"-stream, so in a left-shift "up" is the low-part and "down"
29136 is the high-part of each register pair. */
29138 rtx out_up = code == ASHIFT ? out_low : out_high;
29139 rtx out_down = code == ASHIFT ? out_high : out_low;
29140 rtx in_up = code == ASHIFT ? in_low : in_high;
29141 rtx in_down = code == ASHIFT ? in_high : in_low;
29143 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
29144 gcc_assert (out
29145 && (REG_P (out) || GET_CODE (out) == SUBREG)
29146 && GET_MODE (out) == DImode);
29147 gcc_assert (in
29148 && (REG_P (in) || GET_CODE (in) == SUBREG)
29149 && GET_MODE (in) == DImode);
29150 gcc_assert (amount
29151 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
29152 && GET_MODE (amount) == SImode)
29153 || CONST_INT_P (amount)));
29154 gcc_assert (scratch1 == NULL
29155 || (GET_CODE (scratch1) == SCRATCH)
29156 || (GET_MODE (scratch1) == SImode
29157 && REG_P (scratch1)));
29158 gcc_assert (scratch2 == NULL
29159 || (GET_CODE (scratch2) == SCRATCH)
29160 || (GET_MODE (scratch2) == SImode
29161 && REG_P (scratch2)));
29162 gcc_assert (!REG_P (out) || !REG_P (amount)
29163 || !HARD_REGISTER_P (out)
29164 || (REGNO (out) != REGNO (amount)
29165 && REGNO (out) + 1 != REGNO (amount)));
29167 /* Macros to make following code more readable. */
29168 #define SUB_32(DEST,SRC) \
29169 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
29170 #define RSB_32(DEST,SRC) \
29171 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
29172 #define SUB_S_32(DEST,SRC) \
29173 gen_addsi3_compare0 ((DEST), (SRC), \
29174 GEN_INT (-32))
29175 #define SET(DEST,SRC) \
29176 gen_rtx_SET ((DEST), (SRC))
29177 #define SHIFT(CODE,SRC,AMOUNT) \
29178 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
29179 #define LSHIFT(CODE,SRC,AMOUNT) \
29180 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
29181 SImode, (SRC), (AMOUNT))
29182 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
29183 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
29184 SImode, (SRC), (AMOUNT))
29185 #define ORR(A,B) \
29186 gen_rtx_IOR (SImode, (A), (B))
29187 #define BRANCH(COND,LABEL) \
29188 gen_arm_cond_branch ((LABEL), \
29189 gen_rtx_ ## COND (CCmode, cc_reg, \
29190 const0_rtx), \
29191 cc_reg)
29193 /* Shifts by register and shifts by constant are handled separately. */
29194 if (CONST_INT_P (amount))
29196 /* We have a shift-by-constant. */
29198 /* First, handle out-of-range shift amounts.
29199 In both cases we try to match the result an ARM instruction in a
29200 shift-by-register would give. This helps reduce execution
29201 differences between optimization levels, but it won't stop other
29202 parts of the compiler doing different things. This is "undefined
29203 behavior, in any case. */
29204 if (INTVAL (amount) <= 0)
29205 emit_insn (gen_movdi (out, in));
29206 else if (INTVAL (amount) >= 64)
29208 if (code == ASHIFTRT)
29210 rtx const31_rtx = GEN_INT (31);
29211 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
29212 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
29214 else
29215 emit_insn (gen_movdi (out, const0_rtx));
29218 /* Now handle valid shifts. */
29219 else if (INTVAL (amount) < 32)
29221 /* Shifts by a constant less than 32. */
29222 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
29224 /* Clearing the out register in DImode first avoids lots
29225 of spilling and results in less stack usage.
29226 Later this redundant insn is completely removed.
29227 Do that only if "in" and "out" are different registers. */
29228 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29229 emit_insn (SET (out, const0_rtx));
29230 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29231 emit_insn (SET (out_down,
29232 ORR (REV_LSHIFT (code, in_up, reverse_amount),
29233 out_down)));
29234 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29236 else
29238 /* Shifts by a constant greater than 31. */
29239 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
29241 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29242 emit_insn (SET (out, const0_rtx));
29243 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
29244 if (code == ASHIFTRT)
29245 emit_insn (gen_ashrsi3 (out_up, in_up,
29246 GEN_INT (31)));
29247 else
29248 emit_insn (SET (out_up, const0_rtx));
29251 else
29253 /* We have a shift-by-register. */
29254 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
29256 /* This alternative requires the scratch registers. */
29257 gcc_assert (scratch1 && REG_P (scratch1));
29258 gcc_assert (scratch2 && REG_P (scratch2));
29260 /* We will need the values "amount-32" and "32-amount" later.
29261 Swapping them around now allows the later code to be more general. */
29262 switch (code)
29264 case ASHIFT:
29265 emit_insn (SUB_32 (scratch1, amount));
29266 emit_insn (RSB_32 (scratch2, amount));
29267 break;
29268 case ASHIFTRT:
29269 emit_insn (RSB_32 (scratch1, amount));
29270 /* Also set CC = amount > 32. */
29271 emit_insn (SUB_S_32 (scratch2, amount));
29272 break;
29273 case LSHIFTRT:
29274 emit_insn (RSB_32 (scratch1, amount));
29275 emit_insn (SUB_32 (scratch2, amount));
29276 break;
29277 default:
29278 gcc_unreachable ();
29281 /* Emit code like this:
29283 arithmetic-left:
29284 out_down = in_down << amount;
29285 out_down = (in_up << (amount - 32)) | out_down;
29286 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
29287 out_up = in_up << amount;
29289 arithmetic-right:
29290 out_down = in_down >> amount;
29291 out_down = (in_up << (32 - amount)) | out_down;
29292 if (amount < 32)
29293 out_down = ((signed)in_up >> (amount - 32)) | out_down;
29294 out_up = in_up << amount;
29296 logical-right:
29297 out_down = in_down >> amount;
29298 out_down = (in_up << (32 - amount)) | out_down;
29299 if (amount < 32)
29300 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
29301 out_up = in_up << amount;
29303 The ARM and Thumb2 variants are the same but implemented slightly
29304 differently. If this were only called during expand we could just
29305 use the Thumb2 case and let combine do the right thing, but this
29306 can also be called from post-reload splitters. */
29308 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29310 if (!TARGET_THUMB2)
29312 /* Emit code for ARM mode. */
29313 emit_insn (SET (out_down,
29314 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
29315 if (code == ASHIFTRT)
29317 rtx_code_label *done_label = gen_label_rtx ();
29318 emit_jump_insn (BRANCH (LT, done_label));
29319 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
29320 out_down)));
29321 emit_label (done_label);
29323 else
29324 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
29325 out_down)));
29327 else
29329 /* Emit code for Thumb2 mode.
29330 Thumb2 can't do shift and or in one insn. */
29331 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
29332 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
29334 if (code == ASHIFTRT)
29336 rtx_code_label *done_label = gen_label_rtx ();
29337 emit_jump_insn (BRANCH (LT, done_label));
29338 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
29339 emit_insn (SET (out_down, ORR (out_down, scratch2)));
29340 emit_label (done_label);
29342 else
29344 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
29345 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
29349 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29352 #undef SUB_32
29353 #undef RSB_32
29354 #undef SUB_S_32
29355 #undef SET
29356 #undef SHIFT
29357 #undef LSHIFT
29358 #undef REV_LSHIFT
29359 #undef ORR
29360 #undef BRANCH
29363 /* Returns true if the pattern is a valid symbolic address, which is either a
29364 symbol_ref or (symbol_ref + addend).
29366 According to the ARM ELF ABI, the initial addend of REL-type relocations
29367 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
29368 literal field of the instruction as a 16-bit signed value in the range
29369 -32768 <= A < 32768. */
29371 bool
29372 arm_valid_symbolic_address_p (rtx addr)
29374 rtx xop0, xop1 = NULL_RTX;
29375 rtx tmp = addr;
29377 if (GET_CODE (tmp) == SYMBOL_REF || GET_CODE (tmp) == LABEL_REF)
29378 return true;
29380 /* (const (plus: symbol_ref const_int)) */
29381 if (GET_CODE (addr) == CONST)
29382 tmp = XEXP (addr, 0);
29384 if (GET_CODE (tmp) == PLUS)
29386 xop0 = XEXP (tmp, 0);
29387 xop1 = XEXP (tmp, 1);
29389 if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
29390 return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
29393 return false;
29396 /* Returns true if a valid comparison operation and makes
29397 the operands in a form that is valid. */
29398 bool
29399 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
29401 enum rtx_code code = GET_CODE (*comparison);
29402 int code_int;
29403 machine_mode mode = (GET_MODE (*op1) == VOIDmode)
29404 ? GET_MODE (*op2) : GET_MODE (*op1);
29406 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
29408 if (code == UNEQ || code == LTGT)
29409 return false;
29411 code_int = (int)code;
29412 arm_canonicalize_comparison (&code_int, op1, op2, 0);
29413 PUT_CODE (*comparison, (enum rtx_code)code_int);
29415 switch (mode)
29417 case SImode:
29418 if (!arm_add_operand (*op1, mode))
29419 *op1 = force_reg (mode, *op1);
29420 if (!arm_add_operand (*op2, mode))
29421 *op2 = force_reg (mode, *op2);
29422 return true;
29424 case DImode:
29425 if (!cmpdi_operand (*op1, mode))
29426 *op1 = force_reg (mode, *op1);
29427 if (!cmpdi_operand (*op2, mode))
29428 *op2 = force_reg (mode, *op2);
29429 return true;
29431 case HFmode:
29432 if (!TARGET_VFP_FP16INST)
29433 break;
29434 /* FP16 comparisons are done in SF mode. */
29435 mode = SFmode;
29436 *op1 = convert_to_mode (mode, *op1, 1);
29437 *op2 = convert_to_mode (mode, *op2, 1);
29438 /* Fall through. */
29439 case SFmode:
29440 case DFmode:
29441 if (!vfp_compare_operand (*op1, mode))
29442 *op1 = force_reg (mode, *op1);
29443 if (!vfp_compare_operand (*op2, mode))
29444 *op2 = force_reg (mode, *op2);
29445 return true;
29446 default:
29447 break;
29450 return false;
29454 /* Maximum number of instructions to set block of memory. */
29455 static int
29456 arm_block_set_max_insns (void)
29458 if (optimize_function_for_size_p (cfun))
29459 return 4;
29460 else
29461 return current_tune->max_insns_inline_memset;
29464 /* Return TRUE if it's profitable to set block of memory for
29465 non-vectorized case. VAL is the value to set the memory
29466 with. LENGTH is the number of bytes to set. ALIGN is the
29467 alignment of the destination memory in bytes. UNALIGNED_P
29468 is TRUE if we can only set the memory with instructions
29469 meeting alignment requirements. USE_STRD_P is TRUE if we
29470 can use strd to set the memory. */
29471 static bool
29472 arm_block_set_non_vect_profit_p (rtx val,
29473 unsigned HOST_WIDE_INT length,
29474 unsigned HOST_WIDE_INT align,
29475 bool unaligned_p, bool use_strd_p)
29477 int num = 0;
29478 /* For leftovers in bytes of 0-7, we can set the memory block using
29479 strb/strh/str with minimum instruction number. */
29480 const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
29482 if (unaligned_p)
29484 num = arm_const_inline_cost (SET, val);
29485 num += length / align + length % align;
29487 else if (use_strd_p)
29489 num = arm_const_double_inline_cost (val);
29490 num += (length >> 3) + leftover[length & 7];
29492 else
29494 num = arm_const_inline_cost (SET, val);
29495 num += (length >> 2) + leftover[length & 3];
29498 /* We may be able to combine last pair STRH/STRB into a single STR
29499 by shifting one byte back. */
29500 if (unaligned_access && length > 3 && (length & 3) == 3)
29501 num--;
29503 return (num <= arm_block_set_max_insns ());
29506 /* Return TRUE if it's profitable to set block of memory for
29507 vectorized case. LENGTH is the number of bytes to set.
29508 ALIGN is the alignment of destination memory in bytes.
29509 MODE is the vector mode used to set the memory. */
29510 static bool
29511 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
29512 unsigned HOST_WIDE_INT align,
29513 machine_mode mode)
29515 int num;
29516 bool unaligned_p = ((align & 3) != 0);
29517 unsigned int nelt = GET_MODE_NUNITS (mode);
29519 /* Instruction loading constant value. */
29520 num = 1;
29521 /* Instructions storing the memory. */
29522 num += (length + nelt - 1) / nelt;
29523 /* Instructions adjusting the address expression. Only need to
29524 adjust address expression if it's 4 bytes aligned and bytes
29525 leftover can only be stored by mis-aligned store instruction. */
29526 if (!unaligned_p && (length & 3) != 0)
29527 num++;
29529 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
29530 if (!unaligned_p && mode == V16QImode)
29531 num--;
29533 return (num <= arm_block_set_max_insns ());
29536 /* Set a block of memory using vectorization instructions for the
29537 unaligned case. We fill the first LENGTH bytes of the memory
29538 area starting from DSTBASE with byte constant VALUE. ALIGN is
29539 the alignment requirement of memory. Return TRUE if succeeded. */
29540 static bool
29541 arm_block_set_unaligned_vect (rtx dstbase,
29542 unsigned HOST_WIDE_INT length,
29543 unsigned HOST_WIDE_INT value,
29544 unsigned HOST_WIDE_INT align)
29546 unsigned int i, j, nelt_v16, nelt_v8, nelt_mode;
29547 rtx dst, mem;
29548 rtx val_elt, val_vec, reg;
29549 rtx rval[MAX_VECT_LEN];
29550 rtx (*gen_func) (rtx, rtx);
29551 machine_mode mode;
29552 unsigned HOST_WIDE_INT v = value;
29553 unsigned int offset = 0;
29554 gcc_assert ((align & 0x3) != 0);
29555 nelt_v8 = GET_MODE_NUNITS (V8QImode);
29556 nelt_v16 = GET_MODE_NUNITS (V16QImode);
29557 if (length >= nelt_v16)
29559 mode = V16QImode;
29560 gen_func = gen_movmisalignv16qi;
29562 else
29564 mode = V8QImode;
29565 gen_func = gen_movmisalignv8qi;
29567 nelt_mode = GET_MODE_NUNITS (mode);
29568 gcc_assert (length >= nelt_mode);
29569 /* Skip if it isn't profitable. */
29570 if (!arm_block_set_vect_profit_p (length, align, mode))
29571 return false;
29573 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29574 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29576 v = sext_hwi (v, BITS_PER_WORD);
29577 val_elt = GEN_INT (v);
29578 for (j = 0; j < nelt_mode; j++)
29579 rval[j] = val_elt;
29581 reg = gen_reg_rtx (mode);
29582 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
29583 /* Emit instruction loading the constant value. */
29584 emit_move_insn (reg, val_vec);
29586 /* Handle nelt_mode bytes in a vector. */
29587 for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
29589 emit_insn ((*gen_func) (mem, reg));
29590 if (i + 2 * nelt_mode <= length)
29592 emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
29593 offset += nelt_mode;
29594 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29598 /* If there are not less than nelt_v8 bytes leftover, we must be in
29599 V16QI mode. */
29600 gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
29602 /* Handle (8, 16) bytes leftover. */
29603 if (i + nelt_v8 < length)
29605 emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
29606 offset += length - i;
29607 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29609 /* We are shifting bytes back, set the alignment accordingly. */
29610 if ((length & 1) != 0 && align >= 2)
29611 set_mem_align (mem, BITS_PER_UNIT);
29613 emit_insn (gen_movmisalignv16qi (mem, reg));
29615 /* Handle (0, 8] bytes leftover. */
29616 else if (i < length && i + nelt_v8 >= length)
29618 if (mode == V16QImode)
29619 reg = gen_lowpart (V8QImode, reg);
29621 emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
29622 + (nelt_mode - nelt_v8))));
29623 offset += (length - i) + (nelt_mode - nelt_v8);
29624 mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
29626 /* We are shifting bytes back, set the alignment accordingly. */
29627 if ((length & 1) != 0 && align >= 2)
29628 set_mem_align (mem, BITS_PER_UNIT);
29630 emit_insn (gen_movmisalignv8qi (mem, reg));
29633 return true;
29636 /* Set a block of memory using vectorization instructions for the
29637 aligned case. We fill the first LENGTH bytes of the memory area
29638 starting from DSTBASE with byte constant VALUE. ALIGN is the
29639 alignment requirement of memory. Return TRUE if succeeded. */
29640 static bool
29641 arm_block_set_aligned_vect (rtx dstbase,
29642 unsigned HOST_WIDE_INT length,
29643 unsigned HOST_WIDE_INT value,
29644 unsigned HOST_WIDE_INT align)
29646 unsigned int i, j, nelt_v8, nelt_v16, nelt_mode;
29647 rtx dst, addr, mem;
29648 rtx val_elt, val_vec, reg;
29649 rtx rval[MAX_VECT_LEN];
29650 machine_mode mode;
29651 unsigned HOST_WIDE_INT v = value;
29652 unsigned int offset = 0;
29654 gcc_assert ((align & 0x3) == 0);
29655 nelt_v8 = GET_MODE_NUNITS (V8QImode);
29656 nelt_v16 = GET_MODE_NUNITS (V16QImode);
29657 if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
29658 mode = V16QImode;
29659 else
29660 mode = V8QImode;
29662 nelt_mode = GET_MODE_NUNITS (mode);
29663 gcc_assert (length >= nelt_mode);
29664 /* Skip if it isn't profitable. */
29665 if (!arm_block_set_vect_profit_p (length, align, mode))
29666 return false;
29668 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29670 v = sext_hwi (v, BITS_PER_WORD);
29671 val_elt = GEN_INT (v);
29672 for (j = 0; j < nelt_mode; j++)
29673 rval[j] = val_elt;
29675 reg = gen_reg_rtx (mode);
29676 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
29677 /* Emit instruction loading the constant value. */
29678 emit_move_insn (reg, val_vec);
29680 i = 0;
29681 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
29682 if (mode == V16QImode)
29684 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29685 emit_insn (gen_movmisalignv16qi (mem, reg));
29686 i += nelt_mode;
29687 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
29688 if (i + nelt_v8 < length && i + nelt_v16 > length)
29690 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
29691 offset += length - nelt_mode;
29692 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29693 /* We are shifting bytes back, set the alignment accordingly. */
29694 if ((length & 0x3) == 0)
29695 set_mem_align (mem, BITS_PER_UNIT * 4);
29696 else if ((length & 0x1) == 0)
29697 set_mem_align (mem, BITS_PER_UNIT * 2);
29698 else
29699 set_mem_align (mem, BITS_PER_UNIT);
29701 emit_insn (gen_movmisalignv16qi (mem, reg));
29702 return true;
29704 /* Fall through for bytes leftover. */
29705 mode = V8QImode;
29706 nelt_mode = GET_MODE_NUNITS (mode);
29707 reg = gen_lowpart (V8QImode, reg);
29710 /* Handle 8 bytes in a vector. */
29711 for (; (i + nelt_mode <= length); i += nelt_mode)
29713 addr = plus_constant (Pmode, dst, i);
29714 mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
29715 emit_move_insn (mem, reg);
29718 /* Handle single word leftover by shifting 4 bytes back. We can
29719 use aligned access for this case. */
29720 if (i + UNITS_PER_WORD == length)
29722 addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
29723 offset += i - UNITS_PER_WORD;
29724 mem = adjust_automodify_address (dstbase, mode, addr, offset);
29725 /* We are shifting 4 bytes back, set the alignment accordingly. */
29726 if (align > UNITS_PER_WORD)
29727 set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
29729 emit_move_insn (mem, reg);
29731 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
29732 We have to use unaligned access for this case. */
29733 else if (i < length)
29735 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
29736 offset += length - nelt_mode;
29737 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29738 /* We are shifting bytes back, set the alignment accordingly. */
29739 if ((length & 1) == 0)
29740 set_mem_align (mem, BITS_PER_UNIT * 2);
29741 else
29742 set_mem_align (mem, BITS_PER_UNIT);
29744 emit_insn (gen_movmisalignv8qi (mem, reg));
29747 return true;
29750 /* Set a block of memory using plain strh/strb instructions, only
29751 using instructions allowed by ALIGN on processor. We fill the
29752 first LENGTH bytes of the memory area starting from DSTBASE
29753 with byte constant VALUE. ALIGN is the alignment requirement
29754 of memory. */
29755 static bool
29756 arm_block_set_unaligned_non_vect (rtx dstbase,
29757 unsigned HOST_WIDE_INT length,
29758 unsigned HOST_WIDE_INT value,
29759 unsigned HOST_WIDE_INT align)
29761 unsigned int i;
29762 rtx dst, addr, mem;
29763 rtx val_exp, val_reg, reg;
29764 machine_mode mode;
29765 HOST_WIDE_INT v = value;
29767 gcc_assert (align == 1 || align == 2);
29769 if (align == 2)
29770 v |= (value << BITS_PER_UNIT);
29772 v = sext_hwi (v, BITS_PER_WORD);
29773 val_exp = GEN_INT (v);
29774 /* Skip if it isn't profitable. */
29775 if (!arm_block_set_non_vect_profit_p (val_exp, length,
29776 align, true, false))
29777 return false;
29779 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29780 mode = (align == 2 ? HImode : QImode);
29781 val_reg = force_reg (SImode, val_exp);
29782 reg = gen_lowpart (mode, val_reg);
29784 for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
29786 addr = plus_constant (Pmode, dst, i);
29787 mem = adjust_automodify_address (dstbase, mode, addr, i);
29788 emit_move_insn (mem, reg);
29791 /* Handle single byte leftover. */
29792 if (i + 1 == length)
29794 reg = gen_lowpart (QImode, val_reg);
29795 addr = plus_constant (Pmode, dst, i);
29796 mem = adjust_automodify_address (dstbase, QImode, addr, i);
29797 emit_move_insn (mem, reg);
29798 i++;
29801 gcc_assert (i == length);
29802 return true;
29805 /* Set a block of memory using plain strd/str/strh/strb instructions,
29806 to permit unaligned copies on processors which support unaligned
29807 semantics for those instructions. We fill the first LENGTH bytes
29808 of the memory area starting from DSTBASE with byte constant VALUE.
29809 ALIGN is the alignment requirement of memory. */
29810 static bool
29811 arm_block_set_aligned_non_vect (rtx dstbase,
29812 unsigned HOST_WIDE_INT length,
29813 unsigned HOST_WIDE_INT value,
29814 unsigned HOST_WIDE_INT align)
29816 unsigned int i;
29817 rtx dst, addr, mem;
29818 rtx val_exp, val_reg, reg;
29819 unsigned HOST_WIDE_INT v;
29820 bool use_strd_p;
29822 use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
29823 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
29825 v = (value | (value << 8) | (value << 16) | (value << 24));
29826 if (length < UNITS_PER_WORD)
29827 v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
29829 if (use_strd_p)
29830 v |= (v << BITS_PER_WORD);
29831 else
29832 v = sext_hwi (v, BITS_PER_WORD);
29834 val_exp = GEN_INT (v);
29835 /* Skip if it isn't profitable. */
29836 if (!arm_block_set_non_vect_profit_p (val_exp, length,
29837 align, false, use_strd_p))
29839 if (!use_strd_p)
29840 return false;
29842 /* Try without strd. */
29843 v = (v >> BITS_PER_WORD);
29844 v = sext_hwi (v, BITS_PER_WORD);
29845 val_exp = GEN_INT (v);
29846 use_strd_p = false;
29847 if (!arm_block_set_non_vect_profit_p (val_exp, length,
29848 align, false, use_strd_p))
29849 return false;
29852 i = 0;
29853 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29854 /* Handle double words using strd if possible. */
29855 if (use_strd_p)
29857 val_reg = force_reg (DImode, val_exp);
29858 reg = val_reg;
29859 for (; (i + 8 <= length); i += 8)
29861 addr = plus_constant (Pmode, dst, i);
29862 mem = adjust_automodify_address (dstbase, DImode, addr, i);
29863 emit_move_insn (mem, reg);
29866 else
29867 val_reg = force_reg (SImode, val_exp);
29869 /* Handle words. */
29870 reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
29871 for (; (i + 4 <= length); i += 4)
29873 addr = plus_constant (Pmode, dst, i);
29874 mem = adjust_automodify_address (dstbase, SImode, addr, i);
29875 if ((align & 3) == 0)
29876 emit_move_insn (mem, reg);
29877 else
29878 emit_insn (gen_unaligned_storesi (mem, reg));
29881 /* Merge last pair of STRH and STRB into a STR if possible. */
29882 if (unaligned_access && i > 0 && (i + 3) == length)
29884 addr = plus_constant (Pmode, dst, i - 1);
29885 mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
29886 /* We are shifting one byte back, set the alignment accordingly. */
29887 if ((align & 1) == 0)
29888 set_mem_align (mem, BITS_PER_UNIT);
29890 /* Most likely this is an unaligned access, and we can't tell at
29891 compilation time. */
29892 emit_insn (gen_unaligned_storesi (mem, reg));
29893 return true;
29896 /* Handle half word leftover. */
29897 if (i + 2 <= length)
29899 reg = gen_lowpart (HImode, val_reg);
29900 addr = plus_constant (Pmode, dst, i);
29901 mem = adjust_automodify_address (dstbase, HImode, addr, i);
29902 if ((align & 1) == 0)
29903 emit_move_insn (mem, reg);
29904 else
29905 emit_insn (gen_unaligned_storehi (mem, reg));
29907 i += 2;
29910 /* Handle single byte leftover. */
29911 if (i + 1 == length)
29913 reg = gen_lowpart (QImode, val_reg);
29914 addr = plus_constant (Pmode, dst, i);
29915 mem = adjust_automodify_address (dstbase, QImode, addr, i);
29916 emit_move_insn (mem, reg);
29919 return true;
29922 /* Set a block of memory using vectorization instructions for both
29923 aligned and unaligned cases. We fill the first LENGTH bytes of
29924 the memory area starting from DSTBASE with byte constant VALUE.
29925 ALIGN is the alignment requirement of memory. */
29926 static bool
29927 arm_block_set_vect (rtx dstbase,
29928 unsigned HOST_WIDE_INT length,
29929 unsigned HOST_WIDE_INT value,
29930 unsigned HOST_WIDE_INT align)
29932 /* Check whether we need to use unaligned store instruction. */
29933 if (((align & 3) != 0 || (length & 3) != 0)
29934 /* Check whether unaligned store instruction is available. */
29935 && (!unaligned_access || BYTES_BIG_ENDIAN))
29936 return false;
29938 if ((align & 3) == 0)
29939 return arm_block_set_aligned_vect (dstbase, length, value, align);
29940 else
29941 return arm_block_set_unaligned_vect (dstbase, length, value, align);
29944 /* Expand string store operation. Firstly we try to do that by using
29945 vectorization instructions, then try with ARM unaligned access and
29946 double-word store if profitable. OPERANDS[0] is the destination,
29947 OPERANDS[1] is the number of bytes, operands[2] is the value to
29948 initialize the memory, OPERANDS[3] is the known alignment of the
29949 destination. */
29950 bool
29951 arm_gen_setmem (rtx *operands)
29953 rtx dstbase = operands[0];
29954 unsigned HOST_WIDE_INT length;
29955 unsigned HOST_WIDE_INT value;
29956 unsigned HOST_WIDE_INT align;
29958 if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
29959 return false;
29961 length = UINTVAL (operands[1]);
29962 if (length > 64)
29963 return false;
29965 value = (UINTVAL (operands[2]) & 0xFF);
29966 align = UINTVAL (operands[3]);
29967 if (TARGET_NEON && length >= 8
29968 && current_tune->string_ops_prefer_neon
29969 && arm_block_set_vect (dstbase, length, value, align))
29970 return true;
29972 if (!unaligned_access && (align & 3) != 0)
29973 return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
29975 return arm_block_set_aligned_non_vect (dstbase, length, value, align);
29979 static bool
29980 arm_macro_fusion_p (void)
29982 return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
29985 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
29986 for MOVW / MOVT macro fusion. */
29988 static bool
29989 arm_sets_movw_movt_fusible_p (rtx prev_set, rtx curr_set)
29991 /* We are trying to fuse
29992 movw imm / movt imm
29993 instructions as a group that gets scheduled together. */
29995 rtx set_dest = SET_DEST (curr_set);
29997 if (GET_MODE (set_dest) != SImode)
29998 return false;
30000 /* We are trying to match:
30001 prev (movw) == (set (reg r0) (const_int imm16))
30002 curr (movt) == (set (zero_extract (reg r0)
30003 (const_int 16)
30004 (const_int 16))
30005 (const_int imm16_1))
30007 prev (movw) == (set (reg r1)
30008 (high (symbol_ref ("SYM"))))
30009 curr (movt) == (set (reg r0)
30010 (lo_sum (reg r1)
30011 (symbol_ref ("SYM")))) */
30013 if (GET_CODE (set_dest) == ZERO_EXTRACT)
30015 if (CONST_INT_P (SET_SRC (curr_set))
30016 && CONST_INT_P (SET_SRC (prev_set))
30017 && REG_P (XEXP (set_dest, 0))
30018 && REG_P (SET_DEST (prev_set))
30019 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
30020 return true;
30023 else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
30024 && REG_P (SET_DEST (curr_set))
30025 && REG_P (SET_DEST (prev_set))
30026 && GET_CODE (SET_SRC (prev_set)) == HIGH
30027 && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
30028 return true;
30030 return false;
30033 static bool
30034 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
30036 rtx prev_set = single_set (prev);
30037 rtx curr_set = single_set (curr);
30039 if (!prev_set
30040 || !curr_set)
30041 return false;
30043 if (any_condjump_p (curr))
30044 return false;
30046 if (!arm_macro_fusion_p ())
30047 return false;
30049 if (current_tune->fusible_ops & tune_params::FUSE_AES_AESMC
30050 && aarch_crypto_can_dual_issue (prev, curr))
30051 return true;
30053 if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT
30054 && arm_sets_movw_movt_fusible_p (prev_set, curr_set))
30055 return true;
30057 return false;
30060 /* Return true iff the instruction fusion described by OP is enabled. */
30061 bool
30062 arm_fusion_enabled_p (tune_params::fuse_ops op)
30064 return current_tune->fusible_ops & op;
30067 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
30069 static unsigned HOST_WIDE_INT
30070 arm_asan_shadow_offset (void)
30072 return HOST_WIDE_INT_1U << 29;
30076 /* This is a temporary fix for PR60655. Ideally we need
30077 to handle most of these cases in the generic part but
30078 currently we reject minus (..) (sym_ref). We try to
30079 ameliorate the case with minus (sym_ref1) (sym_ref2)
30080 where they are in the same section. */
30082 static bool
30083 arm_const_not_ok_for_debug_p (rtx p)
30085 tree decl_op0 = NULL;
30086 tree decl_op1 = NULL;
30088 if (GET_CODE (p) == MINUS)
30090 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
30092 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
30093 if (decl_op1
30094 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
30095 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
30097 if ((VAR_P (decl_op1)
30098 || TREE_CODE (decl_op1) == CONST_DECL)
30099 && (VAR_P (decl_op0)
30100 || TREE_CODE (decl_op0) == CONST_DECL))
30101 return (get_variable_section (decl_op1, false)
30102 != get_variable_section (decl_op0, false));
30104 if (TREE_CODE (decl_op1) == LABEL_DECL
30105 && TREE_CODE (decl_op0) == LABEL_DECL)
30106 return (DECL_CONTEXT (decl_op1)
30107 != DECL_CONTEXT (decl_op0));
30110 return true;
30114 return false;
30117 /* return TRUE if x is a reference to a value in a constant pool */
30118 extern bool
30119 arm_is_constant_pool_ref (rtx x)
30121 return (MEM_P (x)
30122 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
30123 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
30126 /* Remember the last target of arm_set_current_function. */
30127 static GTY(()) tree arm_previous_fndecl;
30129 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
30131 void
30132 save_restore_target_globals (tree new_tree)
30134 /* If we have a previous state, use it. */
30135 if (TREE_TARGET_GLOBALS (new_tree))
30136 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
30137 else if (new_tree == target_option_default_node)
30138 restore_target_globals (&default_target_globals);
30139 else
30141 /* Call target_reinit and save the state for TARGET_GLOBALS. */
30142 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
30145 arm_option_params_internal ();
30148 /* Invalidate arm_previous_fndecl. */
30150 void
30151 arm_reset_previous_fndecl (void)
30153 arm_previous_fndecl = NULL_TREE;
30156 /* Establish appropriate back-end context for processing the function
30157 FNDECL. The argument might be NULL to indicate processing at top
30158 level, outside of any function scope. */
30160 static void
30161 arm_set_current_function (tree fndecl)
30163 if (!fndecl || fndecl == arm_previous_fndecl)
30164 return;
30166 tree old_tree = (arm_previous_fndecl
30167 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
30168 : NULL_TREE);
30170 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30172 /* If current function has no attributes but previous one did,
30173 use the default node. */
30174 if (! new_tree && old_tree)
30175 new_tree = target_option_default_node;
30177 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
30178 the default have been handled by save_restore_target_globals from
30179 arm_pragma_target_parse. */
30180 if (old_tree == new_tree)
30181 return;
30183 arm_previous_fndecl = fndecl;
30185 /* First set the target options. */
30186 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
30188 save_restore_target_globals (new_tree);
30191 /* Implement TARGET_OPTION_PRINT. */
30193 static void
30194 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
30196 int flags = ptr->x_target_flags;
30197 const char *fpu_name;
30199 fpu_name = (ptr->x_arm_fpu_index == TARGET_FPU_auto
30200 ? "auto" : all_fpus[ptr->x_arm_fpu_index].name);
30202 fprintf (file, "%*sselected arch %s\n", indent, "",
30203 TARGET_THUMB2_P (flags) ? "thumb2" :
30204 TARGET_THUMB_P (flags) ? "thumb1" :
30205 "arm");
30207 fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_name);
30210 /* Hook to determine if one function can safely inline another. */
30212 static bool
30213 arm_can_inline_p (tree caller, tree callee)
30215 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
30216 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
30217 bool can_inline = true;
30219 struct cl_target_option *caller_opts
30220 = TREE_TARGET_OPTION (caller_tree ? caller_tree
30221 : target_option_default_node);
30223 struct cl_target_option *callee_opts
30224 = TREE_TARGET_OPTION (callee_tree ? callee_tree
30225 : target_option_default_node);
30227 if (callee_opts == caller_opts)
30228 return true;
30230 /* Callee's ISA features should be a subset of the caller's. */
30231 struct arm_build_target caller_target;
30232 struct arm_build_target callee_target;
30233 caller_target.isa = sbitmap_alloc (isa_num_bits);
30234 callee_target.isa = sbitmap_alloc (isa_num_bits);
30236 arm_configure_build_target (&caller_target, caller_opts, &global_options_set,
30237 false);
30238 arm_configure_build_target (&callee_target, callee_opts, &global_options_set,
30239 false);
30240 if (!bitmap_subset_p (callee_target.isa, caller_target.isa))
30241 can_inline = false;
30243 sbitmap_free (caller_target.isa);
30244 sbitmap_free (callee_target.isa);
30246 /* OK to inline between different modes.
30247 Function with mode specific instructions, e.g using asm,
30248 must be explicitly protected with noinline. */
30249 return can_inline;
30252 /* Hook to fix function's alignment affected by target attribute. */
30254 static void
30255 arm_relayout_function (tree fndecl)
30257 if (DECL_USER_ALIGN (fndecl))
30258 return;
30260 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30262 if (!callee_tree)
30263 callee_tree = target_option_default_node;
30265 struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
30266 SET_DECL_ALIGN (fndecl, FUNCTION_BOUNDARY_P (opts->x_target_flags));
30269 /* Inner function to process the attribute((target(...))), take an argument and
30270 set the current options from the argument. If we have a list, recursively
30271 go over the list. */
30273 static bool
30274 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
30276 if (TREE_CODE (args) == TREE_LIST)
30278 bool ret = true;
30280 for (; args; args = TREE_CHAIN (args))
30281 if (TREE_VALUE (args)
30282 && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
30283 ret = false;
30284 return ret;
30287 else if (TREE_CODE (args) != STRING_CST)
30289 error ("attribute %<target%> argument not a string");
30290 return false;
30293 char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
30294 char *q;
30296 while ((q = strtok (argstr, ",")) != NULL)
30298 while (ISSPACE (*q)) ++q;
30300 argstr = NULL;
30301 if (!strncmp (q, "thumb", 5))
30302 opts->x_target_flags |= MASK_THUMB;
30304 else if (!strncmp (q, "arm", 3))
30305 opts->x_target_flags &= ~MASK_THUMB;
30307 else if (!strncmp (q, "fpu=", 4))
30309 int fpu_index;
30310 if (! opt_enum_arg_to_value (OPT_mfpu_, q+4,
30311 &fpu_index, CL_TARGET))
30313 error ("invalid fpu for attribute(target(\"%s\"))", q);
30314 return false;
30316 if (fpu_index == TARGET_FPU_auto)
30318 /* This doesn't really make sense until we support
30319 general dynamic selection of the architecture and all
30320 sub-features. */
30321 sorry ("auto fpu selection not currently permitted here");
30322 return false;
30324 opts->x_arm_fpu_index = (enum fpu_type) fpu_index;
30326 else
30328 error ("attribute(target(\"%s\")) is unknown", q);
30329 return false;
30333 return true;
30336 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
30338 tree
30339 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
30340 struct gcc_options *opts_set)
30342 tree t;
30344 if (!arm_valid_target_attribute_rec (args, opts))
30345 return NULL_TREE;
30347 t = build_target_option_node (opts);
30348 arm_configure_build_target (&arm_active_target, TREE_TARGET_OPTION (t),
30349 opts_set, false);
30350 arm_option_check_internal (opts);
30351 /* Do any overrides, such as global options arch=xxx. */
30352 arm_option_override_internal (opts, opts_set);
30354 /* Resynchronize the saved target options. */
30355 cl_target_option_save (TREE_TARGET_OPTION (t), opts);
30357 return t;
30360 static void
30361 add_attribute (const char * mode, tree *attributes)
30363 size_t len = strlen (mode);
30364 tree value = build_string (len, mode);
30366 TREE_TYPE (value) = build_array_type (char_type_node,
30367 build_index_type (size_int (len)));
30369 *attributes = tree_cons (get_identifier ("target"),
30370 build_tree_list (NULL_TREE, value),
30371 *attributes);
30374 /* For testing. Insert thumb or arm modes alternatively on functions. */
30376 static void
30377 arm_insert_attributes (tree fndecl, tree * attributes)
30379 const char *mode;
30381 if (! TARGET_FLIP_THUMB)
30382 return;
30384 if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
30385 || DECL_BUILT_IN (fndecl) || DECL_ARTIFICIAL (fndecl))
30386 return;
30388 /* Nested definitions must inherit mode. */
30389 if (current_function_decl)
30391 mode = TARGET_THUMB ? "thumb" : "arm";
30392 add_attribute (mode, attributes);
30393 return;
30396 /* If there is already a setting don't change it. */
30397 if (lookup_attribute ("target", *attributes) != NULL)
30398 return;
30400 mode = thumb_flipper ? "thumb" : "arm";
30401 add_attribute (mode, attributes);
30403 thumb_flipper = !thumb_flipper;
30406 /* Hook to validate attribute((target("string"))). */
30408 static bool
30409 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
30410 tree args, int ARG_UNUSED (flags))
30412 bool ret = true;
30413 struct gcc_options func_options;
30414 tree cur_tree, new_optimize;
30415 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
30417 /* Get the optimization options of the current function. */
30418 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
30420 /* If the function changed the optimization levels as well as setting target
30421 options, start with the optimizations specified. */
30422 if (!func_optimize)
30423 func_optimize = optimization_default_node;
30425 /* Init func_options. */
30426 memset (&func_options, 0, sizeof (func_options));
30427 init_options_struct (&func_options, NULL);
30428 lang_hooks.init_options_struct (&func_options);
30430 /* Initialize func_options to the defaults. */
30431 cl_optimization_restore (&func_options,
30432 TREE_OPTIMIZATION (func_optimize));
30434 cl_target_option_restore (&func_options,
30435 TREE_TARGET_OPTION (target_option_default_node));
30437 /* Set func_options flags with new target mode. */
30438 cur_tree = arm_valid_target_attribute_tree (args, &func_options,
30439 &global_options_set);
30441 if (cur_tree == NULL_TREE)
30442 ret = false;
30444 new_optimize = build_optimization_node (&func_options);
30446 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
30448 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
30450 finalize_options_struct (&func_options);
30452 return ret;
30455 /* Match an ISA feature bitmap to a named FPU. We always use the
30456 first entry that exactly matches the feature set, so that we
30457 effectively canonicalize the FPU name for the assembler. */
30458 static const char*
30459 arm_identify_fpu_from_isa (sbitmap isa)
30461 auto_sbitmap fpubits (isa_num_bits);
30462 auto_sbitmap cand_fpubits (isa_num_bits);
30464 bitmap_and (fpubits, isa, isa_all_fpubits);
30466 /* If there are no ISA feature bits relating to the FPU, we must be
30467 doing soft-float. */
30468 if (bitmap_empty_p (fpubits))
30469 return "softvfp";
30471 for (unsigned int i = 0; i < ARRAY_SIZE (all_fpus); i++)
30473 arm_initialize_isa (cand_fpubits, all_fpus[i].isa_bits);
30474 if (bitmap_equal_p (fpubits, cand_fpubits))
30475 return all_fpus[i].name;
30477 /* We must find an entry, or things have gone wrong. */
30478 gcc_unreachable ();
30481 void
30482 arm_declare_function_name (FILE *stream, const char *name, tree decl)
30485 fprintf (stream, "\t.syntax unified\n");
30487 if (TARGET_THUMB)
30489 if (is_called_in_ARM_mode (decl)
30490 || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
30491 && cfun->is_thunk))
30492 fprintf (stream, "\t.code 32\n");
30493 else if (TARGET_THUMB1)
30494 fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
30495 else
30496 fprintf (stream, "\t.thumb\n\t.thumb_func\n");
30498 else
30499 fprintf (stream, "\t.arm\n");
30501 asm_fprintf (asm_out_file, "\t.fpu %s\n",
30502 (TARGET_SOFT_FLOAT
30503 ? "softvfp"
30504 : arm_identify_fpu_from_isa (arm_active_target.isa)));
30506 if (TARGET_POKE_FUNCTION_NAME)
30507 arm_poke_function_name (stream, (const char *) name);
30510 /* If MEM is in the form of [base+offset], extract the two parts
30511 of address and set to BASE and OFFSET, otherwise return false
30512 after clearing BASE and OFFSET. */
30514 static bool
30515 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
30517 rtx addr;
30519 gcc_assert (MEM_P (mem));
30521 addr = XEXP (mem, 0);
30523 /* Strip off const from addresses like (const (addr)). */
30524 if (GET_CODE (addr) == CONST)
30525 addr = XEXP (addr, 0);
30527 if (GET_CODE (addr) == REG)
30529 *base = addr;
30530 *offset = const0_rtx;
30531 return true;
30534 if (GET_CODE (addr) == PLUS
30535 && GET_CODE (XEXP (addr, 0)) == REG
30536 && CONST_INT_P (XEXP (addr, 1)))
30538 *base = XEXP (addr, 0);
30539 *offset = XEXP (addr, 1);
30540 return true;
30543 *base = NULL_RTX;
30544 *offset = NULL_RTX;
30546 return false;
30549 /* If INSN is a load or store of address in the form of [base+offset],
30550 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
30551 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
30552 otherwise return FALSE. */
30554 static bool
30555 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
30557 rtx x, dest, src;
30559 gcc_assert (INSN_P (insn));
30560 x = PATTERN (insn);
30561 if (GET_CODE (x) != SET)
30562 return false;
30564 src = SET_SRC (x);
30565 dest = SET_DEST (x);
30566 if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
30568 *is_load = false;
30569 extract_base_offset_in_addr (dest, base, offset);
30571 else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
30573 *is_load = true;
30574 extract_base_offset_in_addr (src, base, offset);
30576 else
30577 return false;
30579 return (*base != NULL_RTX && *offset != NULL_RTX);
30582 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
30584 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
30585 and PRI are only calculated for these instructions. For other instruction,
30586 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
30587 instruction fusion can be supported by returning different priorities.
30589 It's important that irrelevant instructions get the largest FUSION_PRI. */
30591 static void
30592 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
30593 int *fusion_pri, int *pri)
30595 int tmp, off_val;
30596 bool is_load;
30597 rtx base, offset;
30599 gcc_assert (INSN_P (insn));
30601 tmp = max_pri - 1;
30602 if (!fusion_load_store (insn, &base, &offset, &is_load))
30604 *pri = tmp;
30605 *fusion_pri = tmp;
30606 return;
30609 /* Load goes first. */
30610 if (is_load)
30611 *fusion_pri = tmp - 1;
30612 else
30613 *fusion_pri = tmp - 2;
30615 tmp /= 2;
30617 /* INSN with smaller base register goes first. */
30618 tmp -= ((REGNO (base) & 0xff) << 20);
30620 /* INSN with smaller offset goes first. */
30621 off_val = (int)(INTVAL (offset));
30622 if (off_val >= 0)
30623 tmp -= (off_val & 0xfffff);
30624 else
30625 tmp += ((- off_val) & 0xfffff);
30627 *pri = tmp;
30628 return;
30632 /* Construct and return a PARALLEL RTX vector with elements numbering the
30633 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
30634 the vector - from the perspective of the architecture. This does not
30635 line up with GCC's perspective on lane numbers, so we end up with
30636 different masks depending on our target endian-ness. The diagram
30637 below may help. We must draw the distinction when building masks
30638 which select one half of the vector. An instruction selecting
30639 architectural low-lanes for a big-endian target, must be described using
30640 a mask selecting GCC high-lanes.
30642 Big-Endian Little-Endian
30644 GCC 0 1 2 3 3 2 1 0
30645 | x | x | x | x | | x | x | x | x |
30646 Architecture 3 2 1 0 3 2 1 0
30648 Low Mask: { 2, 3 } { 0, 1 }
30649 High Mask: { 0, 1 } { 2, 3 }
30653 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
30655 int nunits = GET_MODE_NUNITS (mode);
30656 rtvec v = rtvec_alloc (nunits / 2);
30657 int high_base = nunits / 2;
30658 int low_base = 0;
30659 int base;
30660 rtx t1;
30661 int i;
30663 if (BYTES_BIG_ENDIAN)
30664 base = high ? low_base : high_base;
30665 else
30666 base = high ? high_base : low_base;
30668 for (i = 0; i < nunits / 2; i++)
30669 RTVEC_ELT (v, i) = GEN_INT (base + i);
30671 t1 = gen_rtx_PARALLEL (mode, v);
30672 return t1;
30675 /* Check OP for validity as a PARALLEL RTX vector with elements
30676 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
30677 from the perspective of the architecture. See the diagram above
30678 arm_simd_vect_par_cnst_half_p for more details. */
30680 bool
30681 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
30682 bool high)
30684 rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
30685 HOST_WIDE_INT count_op = XVECLEN (op, 0);
30686 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
30687 int i = 0;
30689 if (!VECTOR_MODE_P (mode))
30690 return false;
30692 if (count_op != count_ideal)
30693 return false;
30695 for (i = 0; i < count_ideal; i++)
30697 rtx elt_op = XVECEXP (op, 0, i);
30698 rtx elt_ideal = XVECEXP (ideal, 0, i);
30700 if (!CONST_INT_P (elt_op)
30701 || INTVAL (elt_ideal) != INTVAL (elt_op))
30702 return false;
30704 return true;
30707 /* Can output mi_thunk for all cases except for non-zero vcall_offset
30708 in Thumb1. */
30709 static bool
30710 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
30711 const_tree)
30713 /* For now, we punt and not handle this for TARGET_THUMB1. */
30714 if (vcall_offset && TARGET_THUMB1)
30715 return false;
30717 /* Otherwise ok. */
30718 return true;
30721 /* Generate RTL for a conditional branch with rtx comparison CODE in
30722 mode CC_MODE. The destination of the unlikely conditional branch
30723 is LABEL_REF. */
30725 void
30726 arm_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
30727 rtx label_ref)
30729 rtx x;
30730 x = gen_rtx_fmt_ee (code, VOIDmode,
30731 gen_rtx_REG (cc_mode, CC_REGNUM),
30732 const0_rtx);
30734 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
30735 gen_rtx_LABEL_REF (VOIDmode, label_ref),
30736 pc_rtx);
30737 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
30740 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
30742 For pure-code sections there is no letter code for this attribute, so
30743 output all the section flags numerically when this is needed. */
30745 static bool
30746 arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num)
30749 if (flags & SECTION_ARM_PURECODE)
30751 *num = 0x20000000;
30753 if (!(flags & SECTION_DEBUG))
30754 *num |= 0x2;
30755 if (flags & SECTION_EXCLUDE)
30756 *num |= 0x80000000;
30757 if (flags & SECTION_WRITE)
30758 *num |= 0x1;
30759 if (flags & SECTION_CODE)
30760 *num |= 0x4;
30761 if (flags & SECTION_MERGE)
30762 *num |= 0x10;
30763 if (flags & SECTION_STRINGS)
30764 *num |= 0x20;
30765 if (flags & SECTION_TLS)
30766 *num |= 0x400;
30767 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
30768 *num |= 0x200;
30770 return true;
30773 return false;
30776 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
30778 If pure-code is passed as an option, make sure all functions are in
30779 sections that have the SHF_ARM_PURECODE attribute. */
30781 static section *
30782 arm_function_section (tree decl, enum node_frequency freq,
30783 bool startup, bool exit)
30785 const char * section_name;
30786 section * sec;
30788 if (!decl || TREE_CODE (decl) != FUNCTION_DECL)
30789 return default_function_section (decl, freq, startup, exit);
30791 if (!target_pure_code)
30792 return default_function_section (decl, freq, startup, exit);
30795 section_name = DECL_SECTION_NAME (decl);
30797 /* If a function is not in a named section then it falls under the 'default'
30798 text section, also known as '.text'. We can preserve previous behavior as
30799 the default text section already has the SHF_ARM_PURECODE section
30800 attribute. */
30801 if (!section_name)
30803 section *default_sec = default_function_section (decl, freq, startup,
30804 exit);
30806 /* If default_sec is not null, then it must be a special section like for
30807 example .text.startup. We set the pure-code attribute and return the
30808 same section to preserve existing behavior. */
30809 if (default_sec)
30810 default_sec->common.flags |= SECTION_ARM_PURECODE;
30811 return default_sec;
30814 /* Otherwise look whether a section has already been created with
30815 'section_name'. */
30816 sec = get_named_section (decl, section_name, 0);
30817 if (!sec)
30818 /* If that is not the case passing NULL as the section's name to
30819 'get_named_section' will create a section with the declaration's
30820 section name. */
30821 sec = get_named_section (decl, NULL, 0);
30823 /* Set the SHF_ARM_PURECODE attribute. */
30824 sec->common.flags |= SECTION_ARM_PURECODE;
30826 return sec;
30829 /* Implements the TARGET_SECTION_FLAGS hook.
30831 If DECL is a function declaration and pure-code is passed as an option
30832 then add the SFH_ARM_PURECODE attribute to the section flags. NAME is the
30833 section's name and RELOC indicates whether the declarations initializer may
30834 contain runtime relocations. */
30836 static unsigned int
30837 arm_elf_section_type_flags (tree decl, const char *name, int reloc)
30839 unsigned int flags = default_section_type_flags (decl, name, reloc);
30841 if (decl && TREE_CODE (decl) == FUNCTION_DECL && target_pure_code)
30842 flags |= SECTION_ARM_PURECODE;
30844 return flags;
30847 /* Generate call to __aeabi_[mode]divmod (op0, op1). */
30849 static void
30850 arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
30851 rtx op0, rtx op1,
30852 rtx *quot_p, rtx *rem_p)
30854 if (mode == SImode)
30855 gcc_assert (!TARGET_IDIV);
30857 machine_mode libval_mode = smallest_mode_for_size (2 * GET_MODE_BITSIZE (mode),
30858 MODE_INT);
30860 rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
30861 libval_mode, 2,
30862 op0, GET_MODE (op0),
30863 op1, GET_MODE (op1));
30865 rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
30866 rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
30867 GET_MODE_SIZE (mode));
30869 gcc_assert (quotient);
30870 gcc_assert (remainder);
30872 *quot_p = quotient;
30873 *rem_p = remainder;
30876 /* This function checks for the availability of the coprocessor builtin passed
30877 in BUILTIN for the current target. Returns true if it is available and
30878 false otherwise. If a BUILTIN is passed for which this function has not
30879 been implemented it will cause an exception. */
30881 bool
30882 arm_coproc_builtin_available (enum unspecv builtin)
30884 /* None of these builtins are available in Thumb mode if the target only
30885 supports Thumb-1. */
30886 if (TARGET_THUMB1)
30887 return false;
30889 switch (builtin)
30891 case VUNSPEC_CDP:
30892 case VUNSPEC_LDC:
30893 case VUNSPEC_LDCL:
30894 case VUNSPEC_STC:
30895 case VUNSPEC_STCL:
30896 case VUNSPEC_MCR:
30897 case VUNSPEC_MRC:
30898 if (arm_arch4)
30899 return true;
30900 break;
30901 case VUNSPEC_CDP2:
30902 case VUNSPEC_LDC2:
30903 case VUNSPEC_LDC2L:
30904 case VUNSPEC_STC2:
30905 case VUNSPEC_STC2L:
30906 case VUNSPEC_MCR2:
30907 case VUNSPEC_MRC2:
30908 /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
30909 ARMv8-{A,M}. */
30910 if (arm_arch5)
30911 return true;
30912 break;
30913 case VUNSPEC_MCRR:
30914 case VUNSPEC_MRRC:
30915 /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
30916 ARMv8-{A,M}. */
30917 if (arm_arch6 || arm_arch5te)
30918 return true;
30919 break;
30920 case VUNSPEC_MCRR2:
30921 case VUNSPEC_MRRC2:
30922 if (arm_arch6)
30923 return true;
30924 break;
30925 default:
30926 gcc_unreachable ();
30928 return false;
30931 /* This function returns true if OP is a valid memory operand for the ldc and
30932 stc coprocessor instructions and false otherwise. */
30934 bool
30935 arm_coproc_ldc_stc_legitimate_address (rtx op)
30937 HOST_WIDE_INT range;
30938 /* Has to be a memory operand. */
30939 if (!MEM_P (op))
30940 return false;
30942 op = XEXP (op, 0);
30944 /* We accept registers. */
30945 if (REG_P (op))
30946 return true;
30948 switch GET_CODE (op)
30950 case PLUS:
30952 /* Or registers with an offset. */
30953 if (!REG_P (XEXP (op, 0)))
30954 return false;
30956 op = XEXP (op, 1);
30958 /* The offset must be an immediate though. */
30959 if (!CONST_INT_P (op))
30960 return false;
30962 range = INTVAL (op);
30964 /* Within the range of [-1020,1020]. */
30965 if (!IN_RANGE (range, -1020, 1020))
30966 return false;
30968 /* And a multiple of 4. */
30969 return (range % 4) == 0;
30971 case PRE_INC:
30972 case POST_INC:
30973 case PRE_DEC:
30974 case POST_DEC:
30975 return REG_P (XEXP (op, 0));
30976 default:
30977 gcc_unreachable ();
30979 return false;
30981 #include "gt-arm.h"