PR82045: Avoid passing machine modes through "..."
[official-gcc.git] / gcc / config / arm / arm.c
blobb27853464a55dcf22339298da30431aba900180e
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2017 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "target.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "memmodel.h"
31 #include "cfghooks.h"
32 #include "df.h"
33 #include "tm_p.h"
34 #include "stringpool.h"
35 #include "attribs.h"
36 #include "optabs.h"
37 #include "regs.h"
38 #include "emit-rtl.h"
39 #include "recog.h"
40 #include "cgraph.h"
41 #include "diagnostic-core.h"
42 #include "alias.h"
43 #include "fold-const.h"
44 #include "stor-layout.h"
45 #include "calls.h"
46 #include "varasm.h"
47 #include "output.h"
48 #include "insn-attr.h"
49 #include "flags.h"
50 #include "reload.h"
51 #include "explow.h"
52 #include "expr.h"
53 #include "cfgrtl.h"
54 #include "sched-int.h"
55 #include "common/common-target.h"
56 #include "langhooks.h"
57 #include "intl.h"
58 #include "libfuncs.h"
59 #include "params.h"
60 #include "opts.h"
61 #include "dumpfile.h"
62 #include "target-globals.h"
63 #include "builtins.h"
64 #include "tm-constrs.h"
65 #include "rtl-iter.h"
66 #include "optabs-libfuncs.h"
67 #include "gimplify.h"
68 #include "gimple.h"
69 #include "selftest.h"
71 /* This file should be included last. */
72 #include "target-def.h"
74 /* Forward definitions of types. */
75 typedef struct minipool_node Mnode;
76 typedef struct minipool_fixup Mfix;
78 void (*arm_lang_output_object_attributes_hook)(void);
80 struct four_ints
82 int i[4];
85 /* Forward function declarations. */
86 static bool arm_const_not_ok_for_debug_p (rtx);
87 static int arm_needs_doubleword_align (machine_mode, const_tree);
88 static int arm_compute_static_chain_stack_bytes (void);
89 static arm_stack_offsets *arm_get_frame_offsets (void);
90 static void arm_compute_frame_layout (void);
91 static void arm_add_gc_roots (void);
92 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
93 unsigned HOST_WIDE_INT, rtx, rtx, int, int);
94 static unsigned bit_count (unsigned long);
95 static unsigned bitmap_popcount (const sbitmap);
96 static int arm_address_register_rtx_p (rtx, int);
97 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
98 static bool is_called_in_ARM_mode (tree);
99 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
100 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
101 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
102 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
103 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
104 inline static int thumb1_index_register_rtx_p (rtx, int);
105 static int thumb_far_jump_used_p (void);
106 static bool thumb_force_lr_save (void);
107 static unsigned arm_size_return_regs (void);
108 static bool arm_assemble_integer (rtx, unsigned int, int);
109 static void arm_print_operand (FILE *, rtx, int);
110 static void arm_print_operand_address (FILE *, machine_mode, rtx);
111 static bool arm_print_operand_punct_valid_p (unsigned char code);
112 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
113 static arm_cc get_arm_condition_code (rtx);
114 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
115 static const char *output_multi_immediate (rtx *, const char *, const char *,
116 int, HOST_WIDE_INT);
117 static const char *shift_op (rtx, HOST_WIDE_INT *);
118 static struct machine_function *arm_init_machine_status (void);
119 static void thumb_exit (FILE *, int);
120 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
121 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
122 static Mnode *add_minipool_forward_ref (Mfix *);
123 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
124 static Mnode *add_minipool_backward_ref (Mfix *);
125 static void assign_minipool_offsets (Mfix *);
126 static void arm_print_value (FILE *, rtx);
127 static void dump_minipool (rtx_insn *);
128 static int arm_barrier_cost (rtx_insn *);
129 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
130 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
131 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
132 machine_mode, rtx);
133 static void arm_reorg (void);
134 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
135 static unsigned long arm_compute_save_reg0_reg12_mask (void);
136 static unsigned long arm_compute_save_core_reg_mask (void);
137 static unsigned long arm_isr_value (tree);
138 static unsigned long arm_compute_func_type (void);
139 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
140 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
141 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
142 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
143 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
144 #endif
145 static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *);
146 static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree, int, bool *);
147 static void arm_output_function_epilogue (FILE *);
148 static void arm_output_function_prologue (FILE *);
149 static int arm_comp_type_attributes (const_tree, const_tree);
150 static void arm_set_default_type_attributes (tree);
151 static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
152 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
153 static int optimal_immediate_sequence (enum rtx_code code,
154 unsigned HOST_WIDE_INT val,
155 struct four_ints *return_sequence);
156 static int optimal_immediate_sequence_1 (enum rtx_code code,
157 unsigned HOST_WIDE_INT val,
158 struct four_ints *return_sequence,
159 int i);
160 static int arm_get_strip_length (int);
161 static bool arm_function_ok_for_sibcall (tree, tree);
162 static machine_mode arm_promote_function_mode (const_tree,
163 machine_mode, int *,
164 const_tree, int);
165 static bool arm_return_in_memory (const_tree, const_tree);
166 static rtx arm_function_value (const_tree, const_tree, bool);
167 static rtx arm_libcall_value_1 (machine_mode);
168 static rtx arm_libcall_value (machine_mode, const_rtx);
169 static bool arm_function_value_regno_p (const unsigned int);
170 static void arm_internal_label (FILE *, const char *, unsigned long);
171 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
172 tree);
173 static bool arm_have_conditional_execution (void);
174 static bool arm_cannot_force_const_mem (machine_mode, rtx);
175 static bool arm_legitimate_constant_p (machine_mode, rtx);
176 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
177 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
178 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
179 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
180 static void emit_constant_insn (rtx cond, rtx pattern);
181 static rtx_insn *emit_set_insn (rtx, rtx);
182 static rtx emit_multi_reg_push (unsigned long, unsigned long);
183 static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
184 tree, bool);
185 static rtx arm_function_arg (cumulative_args_t, machine_mode,
186 const_tree, bool);
187 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
188 const_tree, bool);
189 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
190 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
191 const_tree);
192 static rtx aapcs_libcall_value (machine_mode);
193 static int aapcs_select_return_coproc (const_tree, const_tree);
195 #ifdef OBJECT_FORMAT_ELF
196 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
197 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
198 #endif
199 #ifndef ARM_PE
200 static void arm_encode_section_info (tree, rtx, int);
201 #endif
203 static void arm_file_end (void);
204 static void arm_file_start (void);
205 static void arm_insert_attributes (tree, tree *);
207 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
208 tree, int *, int);
209 static bool arm_pass_by_reference (cumulative_args_t,
210 machine_mode, const_tree, bool);
211 static bool arm_promote_prototypes (const_tree);
212 static bool arm_default_short_enums (void);
213 static bool arm_align_anon_bitfield (void);
214 static bool arm_return_in_msb (const_tree);
215 static bool arm_must_pass_in_stack (machine_mode, const_tree);
216 static bool arm_return_in_memory (const_tree, const_tree);
217 #if ARM_UNWIND_INFO
218 static void arm_unwind_emit (FILE *, rtx_insn *);
219 static bool arm_output_ttype (rtx);
220 static void arm_asm_emit_except_personality (rtx);
221 #endif
222 static void arm_asm_init_sections (void);
223 static rtx arm_dwarf_register_span (rtx);
225 static tree arm_cxx_guard_type (void);
226 static bool arm_cxx_guard_mask_bit (void);
227 static tree arm_get_cookie_size (tree);
228 static bool arm_cookie_has_size (void);
229 static bool arm_cxx_cdtor_returns_this (void);
230 static bool arm_cxx_key_method_may_be_inline (void);
231 static void arm_cxx_determine_class_data_visibility (tree);
232 static bool arm_cxx_class_data_always_comdat (void);
233 static bool arm_cxx_use_aeabi_atexit (void);
234 static void arm_init_libfuncs (void);
235 static tree arm_build_builtin_va_list (void);
236 static void arm_expand_builtin_va_start (tree, rtx);
237 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
238 static void arm_option_override (void);
239 static void arm_option_save (struct cl_target_option *, struct gcc_options *);
240 static void arm_option_restore (struct gcc_options *,
241 struct cl_target_option *);
242 static void arm_override_options_after_change (void);
243 static void arm_option_print (FILE *, int, struct cl_target_option *);
244 static void arm_set_current_function (tree);
245 static bool arm_can_inline_p (tree, tree);
246 static void arm_relayout_function (tree);
247 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
248 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
249 static bool arm_sched_can_speculate_insn (rtx_insn *);
250 static bool arm_macro_fusion_p (void);
251 static bool arm_cannot_copy_insn_p (rtx_insn *);
252 static int arm_issue_rate (void);
253 static int arm_first_cycle_multipass_dfa_lookahead (void);
254 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
255 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
256 static bool arm_output_addr_const_extra (FILE *, rtx);
257 static bool arm_allocate_stack_slots_for_args (void);
258 static bool arm_warn_func_return (tree);
259 static tree arm_promoted_type (const_tree t);
260 static bool arm_scalar_mode_supported_p (scalar_mode);
261 static bool arm_frame_pointer_required (void);
262 static bool arm_can_eliminate (const int, const int);
263 static void arm_asm_trampoline_template (FILE *);
264 static void arm_trampoline_init (rtx, tree, rtx);
265 static rtx arm_trampoline_adjust_address (rtx);
266 static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg);
267 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
268 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
269 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
270 static bool arm_array_mode_supported_p (machine_mode,
271 unsigned HOST_WIDE_INT);
272 static machine_mode arm_preferred_simd_mode (scalar_mode);
273 static bool arm_class_likely_spilled_p (reg_class_t);
274 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
275 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
276 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
277 const_tree type,
278 int misalignment,
279 bool is_packed);
280 static void arm_conditional_register_usage (void);
281 static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
282 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
283 static unsigned int arm_autovectorize_vector_sizes (void);
284 static int arm_default_branch_cost (bool, bool);
285 static int arm_cortex_a5_branch_cost (bool, bool);
286 static int arm_cortex_m_branch_cost (bool, bool);
287 static int arm_cortex_m7_branch_cost (bool, bool);
289 static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode,
290 const unsigned char *sel);
292 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
294 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
295 tree vectype,
296 int misalign ATTRIBUTE_UNUSED);
297 static unsigned arm_add_stmt_cost (void *data, int count,
298 enum vect_cost_for_stmt kind,
299 struct _stmt_vec_info *stmt_info,
300 int misalign,
301 enum vect_cost_model_location where);
303 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
304 bool op0_preserve_value);
305 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
307 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
308 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
309 const_tree);
310 static section *arm_function_section (tree, enum node_frequency, bool, bool);
311 static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num);
312 static unsigned int arm_elf_section_type_flags (tree decl, const char *name,
313 int reloc);
314 static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *);
315 static opt_scalar_float_mode arm_floatn_mode (int, bool);
317 /* Table of machine attributes. */
318 static const struct attribute_spec arm_attribute_table[] =
320 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
321 affects_type_identity } */
322 /* Function calls made to this symbol must be done indirectly, because
323 it may lie outside of the 26 bit addressing range of a normal function
324 call. */
325 { "long_call", 0, 0, false, true, true, NULL, false },
326 /* Whereas these functions are always known to reside within the 26 bit
327 addressing range. */
328 { "short_call", 0, 0, false, true, true, NULL, false },
329 /* Specify the procedure call conventions for a function. */
330 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
331 false },
332 /* Interrupt Service Routines have special prologue and epilogue requirements. */
333 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
334 false },
335 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
336 false },
337 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
338 false },
339 #ifdef ARM_PE
340 /* ARM/PE has three new attributes:
341 interfacearm - ?
342 dllexport - for exporting a function/variable that will live in a dll
343 dllimport - for importing a function/variable from a dll
345 Microsoft allows multiple declspecs in one __declspec, separating
346 them with spaces. We do NOT support this. Instead, use __declspec
347 multiple times.
349 { "dllimport", 0, 0, true, false, false, NULL, false },
350 { "dllexport", 0, 0, true, false, false, NULL, false },
351 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
352 false },
353 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
354 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
355 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
356 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
357 false },
358 #endif
359 /* ARMv8-M Security Extensions support. */
360 { "cmse_nonsecure_entry", 0, 0, true, false, false,
361 arm_handle_cmse_nonsecure_entry, false },
362 { "cmse_nonsecure_call", 0, 0, true, false, false,
363 arm_handle_cmse_nonsecure_call, true },
364 { NULL, 0, 0, false, false, false, NULL, false }
367 /* Initialize the GCC target structure. */
368 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
369 #undef TARGET_MERGE_DECL_ATTRIBUTES
370 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
371 #endif
373 #undef TARGET_LEGITIMIZE_ADDRESS
374 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
376 #undef TARGET_ATTRIBUTE_TABLE
377 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
379 #undef TARGET_INSERT_ATTRIBUTES
380 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
382 #undef TARGET_ASM_FILE_START
383 #define TARGET_ASM_FILE_START arm_file_start
384 #undef TARGET_ASM_FILE_END
385 #define TARGET_ASM_FILE_END arm_file_end
387 #undef TARGET_ASM_ALIGNED_SI_OP
388 #define TARGET_ASM_ALIGNED_SI_OP NULL
389 #undef TARGET_ASM_INTEGER
390 #define TARGET_ASM_INTEGER arm_assemble_integer
392 #undef TARGET_PRINT_OPERAND
393 #define TARGET_PRINT_OPERAND arm_print_operand
394 #undef TARGET_PRINT_OPERAND_ADDRESS
395 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
396 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
397 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
399 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
400 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
402 #undef TARGET_ASM_FUNCTION_PROLOGUE
403 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
405 #undef TARGET_ASM_FUNCTION_EPILOGUE
406 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
408 #undef TARGET_CAN_INLINE_P
409 #define TARGET_CAN_INLINE_P arm_can_inline_p
411 #undef TARGET_RELAYOUT_FUNCTION
412 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
414 #undef TARGET_OPTION_OVERRIDE
415 #define TARGET_OPTION_OVERRIDE arm_option_override
417 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
418 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
420 #undef TARGET_OPTION_SAVE
421 #define TARGET_OPTION_SAVE arm_option_save
423 #undef TARGET_OPTION_RESTORE
424 #define TARGET_OPTION_RESTORE arm_option_restore
426 #undef TARGET_OPTION_PRINT
427 #define TARGET_OPTION_PRINT arm_option_print
429 #undef TARGET_COMP_TYPE_ATTRIBUTES
430 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
432 #undef TARGET_SCHED_CAN_SPECULATE_INSN
433 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
435 #undef TARGET_SCHED_MACRO_FUSION_P
436 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
438 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
439 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
441 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
442 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
444 #undef TARGET_SCHED_ADJUST_COST
445 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
447 #undef TARGET_SET_CURRENT_FUNCTION
448 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
450 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
451 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
453 #undef TARGET_SCHED_REORDER
454 #define TARGET_SCHED_REORDER arm_sched_reorder
456 #undef TARGET_REGISTER_MOVE_COST
457 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
459 #undef TARGET_MEMORY_MOVE_COST
460 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
462 #undef TARGET_ENCODE_SECTION_INFO
463 #ifdef ARM_PE
464 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
465 #else
466 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
467 #endif
469 #undef TARGET_STRIP_NAME_ENCODING
470 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
472 #undef TARGET_ASM_INTERNAL_LABEL
473 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
475 #undef TARGET_FLOATN_MODE
476 #define TARGET_FLOATN_MODE arm_floatn_mode
478 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
479 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
481 #undef TARGET_FUNCTION_VALUE
482 #define TARGET_FUNCTION_VALUE arm_function_value
484 #undef TARGET_LIBCALL_VALUE
485 #define TARGET_LIBCALL_VALUE arm_libcall_value
487 #undef TARGET_FUNCTION_VALUE_REGNO_P
488 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
490 #undef TARGET_ASM_OUTPUT_MI_THUNK
491 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
492 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
493 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
495 #undef TARGET_RTX_COSTS
496 #define TARGET_RTX_COSTS arm_rtx_costs
497 #undef TARGET_ADDRESS_COST
498 #define TARGET_ADDRESS_COST arm_address_cost
500 #undef TARGET_SHIFT_TRUNCATION_MASK
501 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
502 #undef TARGET_VECTOR_MODE_SUPPORTED_P
503 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
504 #undef TARGET_ARRAY_MODE_SUPPORTED_P
505 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
506 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
507 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
508 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
509 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
510 arm_autovectorize_vector_sizes
512 #undef TARGET_MACHINE_DEPENDENT_REORG
513 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
515 #undef TARGET_INIT_BUILTINS
516 #define TARGET_INIT_BUILTINS arm_init_builtins
517 #undef TARGET_EXPAND_BUILTIN
518 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
519 #undef TARGET_BUILTIN_DECL
520 #define TARGET_BUILTIN_DECL arm_builtin_decl
522 #undef TARGET_INIT_LIBFUNCS
523 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
525 #undef TARGET_PROMOTE_FUNCTION_MODE
526 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
527 #undef TARGET_PROMOTE_PROTOTYPES
528 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
529 #undef TARGET_PASS_BY_REFERENCE
530 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
531 #undef TARGET_ARG_PARTIAL_BYTES
532 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
533 #undef TARGET_FUNCTION_ARG
534 #define TARGET_FUNCTION_ARG arm_function_arg
535 #undef TARGET_FUNCTION_ARG_ADVANCE
536 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
537 #undef TARGET_FUNCTION_ARG_BOUNDARY
538 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
540 #undef TARGET_SETUP_INCOMING_VARARGS
541 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
543 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
544 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
546 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
547 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
548 #undef TARGET_TRAMPOLINE_INIT
549 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
550 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
551 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
553 #undef TARGET_WARN_FUNC_RETURN
554 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
556 #undef TARGET_DEFAULT_SHORT_ENUMS
557 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
559 #undef TARGET_ALIGN_ANON_BITFIELD
560 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
562 #undef TARGET_NARROW_VOLATILE_BITFIELD
563 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
565 #undef TARGET_CXX_GUARD_TYPE
566 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
568 #undef TARGET_CXX_GUARD_MASK_BIT
569 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
571 #undef TARGET_CXX_GET_COOKIE_SIZE
572 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
574 #undef TARGET_CXX_COOKIE_HAS_SIZE
575 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
577 #undef TARGET_CXX_CDTOR_RETURNS_THIS
578 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
580 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
581 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
583 #undef TARGET_CXX_USE_AEABI_ATEXIT
584 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
586 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
587 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
588 arm_cxx_determine_class_data_visibility
590 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
591 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
593 #undef TARGET_RETURN_IN_MSB
594 #define TARGET_RETURN_IN_MSB arm_return_in_msb
596 #undef TARGET_RETURN_IN_MEMORY
597 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
599 #undef TARGET_MUST_PASS_IN_STACK
600 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
602 #if ARM_UNWIND_INFO
603 #undef TARGET_ASM_UNWIND_EMIT
604 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
606 /* EABI unwinding tables use a different format for the typeinfo tables. */
607 #undef TARGET_ASM_TTYPE
608 #define TARGET_ASM_TTYPE arm_output_ttype
610 #undef TARGET_ARM_EABI_UNWINDER
611 #define TARGET_ARM_EABI_UNWINDER true
613 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
614 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
616 #endif /* ARM_UNWIND_INFO */
618 #undef TARGET_ASM_INIT_SECTIONS
619 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
621 #undef TARGET_DWARF_REGISTER_SPAN
622 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
624 #undef TARGET_CANNOT_COPY_INSN_P
625 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
627 #ifdef HAVE_AS_TLS
628 #undef TARGET_HAVE_TLS
629 #define TARGET_HAVE_TLS true
630 #endif
632 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
633 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
635 #undef TARGET_LEGITIMATE_CONSTANT_P
636 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
638 #undef TARGET_CANNOT_FORCE_CONST_MEM
639 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
641 #undef TARGET_MAX_ANCHOR_OFFSET
642 #define TARGET_MAX_ANCHOR_OFFSET 4095
644 /* The minimum is set such that the total size of the block
645 for a particular anchor is -4088 + 1 + 4095 bytes, which is
646 divisible by eight, ensuring natural spacing of anchors. */
647 #undef TARGET_MIN_ANCHOR_OFFSET
648 #define TARGET_MIN_ANCHOR_OFFSET -4088
650 #undef TARGET_SCHED_ISSUE_RATE
651 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
653 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
654 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
655 arm_first_cycle_multipass_dfa_lookahead
657 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
658 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
659 arm_first_cycle_multipass_dfa_lookahead_guard
661 #undef TARGET_MANGLE_TYPE
662 #define TARGET_MANGLE_TYPE arm_mangle_type
664 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
665 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
667 #undef TARGET_BUILD_BUILTIN_VA_LIST
668 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
669 #undef TARGET_EXPAND_BUILTIN_VA_START
670 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
671 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
672 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
674 #ifdef HAVE_AS_TLS
675 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
676 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
677 #endif
679 #undef TARGET_LEGITIMATE_ADDRESS_P
680 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
682 #undef TARGET_PREFERRED_RELOAD_CLASS
683 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
685 #undef TARGET_PROMOTED_TYPE
686 #define TARGET_PROMOTED_TYPE arm_promoted_type
688 #undef TARGET_SCALAR_MODE_SUPPORTED_P
689 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
691 #undef TARGET_COMPUTE_FRAME_LAYOUT
692 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
694 #undef TARGET_FRAME_POINTER_REQUIRED
695 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
697 #undef TARGET_CAN_ELIMINATE
698 #define TARGET_CAN_ELIMINATE arm_can_eliminate
700 #undef TARGET_CONDITIONAL_REGISTER_USAGE
701 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
703 #undef TARGET_CLASS_LIKELY_SPILLED_P
704 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
706 #undef TARGET_VECTORIZE_BUILTINS
707 #define TARGET_VECTORIZE_BUILTINS
709 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
710 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
711 arm_builtin_vectorized_function
713 #undef TARGET_VECTOR_ALIGNMENT
714 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
716 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
717 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
718 arm_vector_alignment_reachable
720 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
721 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
722 arm_builtin_support_vector_misalignment
724 #undef TARGET_PREFERRED_RENAME_CLASS
725 #define TARGET_PREFERRED_RENAME_CLASS \
726 arm_preferred_rename_class
728 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
729 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
730 arm_vectorize_vec_perm_const_ok
732 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
733 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
734 arm_builtin_vectorization_cost
735 #undef TARGET_VECTORIZE_ADD_STMT_COST
736 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
738 #undef TARGET_CANONICALIZE_COMPARISON
739 #define TARGET_CANONICALIZE_COMPARISON \
740 arm_canonicalize_comparison
742 #undef TARGET_ASAN_SHADOW_OFFSET
743 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
745 #undef MAX_INSN_PER_IT_BLOCK
746 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
748 #undef TARGET_CAN_USE_DOLOOP_P
749 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
751 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
752 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
754 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
755 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
757 #undef TARGET_SCHED_FUSION_PRIORITY
758 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
760 #undef TARGET_ASM_FUNCTION_SECTION
761 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
763 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
764 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
766 #undef TARGET_SECTION_TYPE_FLAGS
767 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
769 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
770 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
772 #undef TARGET_C_EXCESS_PRECISION
773 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
775 /* Although the architecture reserves bits 0 and 1, only the former is
776 used for ARM/Thumb ISA selection in v7 and earlier versions. */
777 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
778 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
780 #undef TARGET_FIXED_CONDITION_CODE_REGS
781 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
784 /* Obstack for minipool constant handling. */
785 static struct obstack minipool_obstack;
786 static char * minipool_startobj;
788 /* The maximum number of insns skipped which
789 will be conditionalised if possible. */
790 static int max_insns_skipped = 5;
792 extern FILE * asm_out_file;
794 /* True if we are currently building a constant table. */
795 int making_const_table;
797 /* The processor for which instructions should be scheduled. */
798 enum processor_type arm_tune = TARGET_CPU_arm_none;
800 /* The current tuning set. */
801 const struct tune_params *current_tune;
803 /* Which floating point hardware to schedule for. */
804 int arm_fpu_attr;
806 /* Used for Thumb call_via trampolines. */
807 rtx thumb_call_via_label[14];
808 static int thumb_call_reg_needed;
810 /* The bits in this mask specify which instruction scheduling options should
811 be used. */
812 unsigned int tune_flags = 0;
814 /* The highest ARM architecture version supported by the
815 target. */
816 enum base_architecture arm_base_arch = BASE_ARCH_0;
818 /* Active target architecture and tuning. */
820 struct arm_build_target arm_active_target;
822 /* The following are used in the arm.md file as equivalents to bits
823 in the above two flag variables. */
825 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
826 int arm_arch3m = 0;
828 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
829 int arm_arch4 = 0;
831 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
832 int arm_arch4t = 0;
834 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
835 int arm_arch5 = 0;
837 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
838 int arm_arch5e = 0;
840 /* Nonzero if this chip supports the ARM Architecture 5TE extensions. */
841 int arm_arch5te = 0;
843 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
844 int arm_arch6 = 0;
846 /* Nonzero if this chip supports the ARM 6K extensions. */
847 int arm_arch6k = 0;
849 /* Nonzero if this chip supports the ARM 6KZ extensions. */
850 int arm_arch6kz = 0;
852 /* Nonzero if instructions present in ARMv6-M can be used. */
853 int arm_arch6m = 0;
855 /* Nonzero if this chip supports the ARM 7 extensions. */
856 int arm_arch7 = 0;
858 /* Nonzero if this chip supports the Large Physical Address Extension. */
859 int arm_arch_lpae = 0;
861 /* Nonzero if instructions not present in the 'M' profile can be used. */
862 int arm_arch_notm = 0;
864 /* Nonzero if instructions present in ARMv7E-M can be used. */
865 int arm_arch7em = 0;
867 /* Nonzero if instructions present in ARMv8 can be used. */
868 int arm_arch8 = 0;
870 /* Nonzero if this chip supports the ARMv8.1 extensions. */
871 int arm_arch8_1 = 0;
873 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */
874 int arm_arch8_2 = 0;
876 /* Nonzero if this chip supports the FP16 instructions extension of ARM
877 Architecture 8.2. */
878 int arm_fp16_inst = 0;
880 /* Nonzero if this chip can benefit from load scheduling. */
881 int arm_ld_sched = 0;
883 /* Nonzero if this chip is a StrongARM. */
884 int arm_tune_strongarm = 0;
886 /* Nonzero if this chip supports Intel Wireless MMX technology. */
887 int arm_arch_iwmmxt = 0;
889 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
890 int arm_arch_iwmmxt2 = 0;
892 /* Nonzero if this chip is an XScale. */
893 int arm_arch_xscale = 0;
895 /* Nonzero if tuning for XScale */
896 int arm_tune_xscale = 0;
898 /* Nonzero if we want to tune for stores that access the write-buffer.
899 This typically means an ARM6 or ARM7 with MMU or MPU. */
900 int arm_tune_wbuf = 0;
902 /* Nonzero if tuning for Cortex-A9. */
903 int arm_tune_cortex_a9 = 0;
905 /* Nonzero if we should define __THUMB_INTERWORK__ in the
906 preprocessor.
907 XXX This is a bit of a hack, it's intended to help work around
908 problems in GLD which doesn't understand that armv5t code is
909 interworking clean. */
910 int arm_cpp_interwork = 0;
912 /* Nonzero if chip supports Thumb 1. */
913 int arm_arch_thumb1;
915 /* Nonzero if chip supports Thumb 2. */
916 int arm_arch_thumb2;
918 /* Nonzero if chip supports integer division instruction. */
919 int arm_arch_arm_hwdiv;
920 int arm_arch_thumb_hwdiv;
922 /* Nonzero if chip disallows volatile memory access in IT block. */
923 int arm_arch_no_volatile_ce;
925 /* Nonzero if we should use Neon to handle 64-bits operations rather
926 than core registers. */
927 int prefer_neon_for_64bits = 0;
929 /* Nonzero if we shouldn't use literal pools. */
930 bool arm_disable_literal_pool = false;
932 /* The register number to be used for the PIC offset register. */
933 unsigned arm_pic_register = INVALID_REGNUM;
935 enum arm_pcs arm_pcs_default;
937 /* For an explanation of these variables, see final_prescan_insn below. */
938 int arm_ccfsm_state;
939 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
940 enum arm_cond_code arm_current_cc;
942 rtx arm_target_insn;
943 int arm_target_label;
944 /* The number of conditionally executed insns, including the current insn. */
945 int arm_condexec_count = 0;
946 /* A bitmask specifying the patterns for the IT block.
947 Zero means do not output an IT block before this insn. */
948 int arm_condexec_mask = 0;
949 /* The number of bits used in arm_condexec_mask. */
950 int arm_condexec_masklen = 0;
952 /* Nonzero if chip supports the ARMv8 CRC instructions. */
953 int arm_arch_crc = 0;
955 /* Nonzero if chip supports the ARMv8-M security extensions. */
956 int arm_arch_cmse = 0;
958 /* Nonzero if the core has a very small, high-latency, multiply unit. */
959 int arm_m_profile_small_mul = 0;
961 /* The condition codes of the ARM, and the inverse function. */
962 static const char * const arm_condition_codes[] =
964 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
965 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
968 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
969 int arm_regs_in_sequence[] =
971 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
974 #define ARM_LSL_NAME "lsl"
975 #define streq(string1, string2) (strcmp (string1, string2) == 0)
977 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
978 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
979 | (1 << PIC_OFFSET_TABLE_REGNUM)))
981 /* Initialization code. */
983 struct cpu_tune
985 enum processor_type scheduler;
986 unsigned int tune_flags;
987 const struct tune_params *tune;
990 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
991 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
993 num_slots, \
994 l1_size, \
995 l1_line_size \
998 /* arm generic vectorizer costs. */
999 static const
1000 struct cpu_vec_costs arm_default_vec_cost = {
1001 1, /* scalar_stmt_cost. */
1002 1, /* scalar load_cost. */
1003 1, /* scalar_store_cost. */
1004 1, /* vec_stmt_cost. */
1005 1, /* vec_to_scalar_cost. */
1006 1, /* scalar_to_vec_cost. */
1007 1, /* vec_align_load_cost. */
1008 1, /* vec_unalign_load_cost. */
1009 1, /* vec_unalign_store_cost. */
1010 1, /* vec_store_cost. */
1011 3, /* cond_taken_branch_cost. */
1012 1, /* cond_not_taken_branch_cost. */
1015 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
1016 #include "aarch-cost-tables.h"
1020 const struct cpu_cost_table cortexa9_extra_costs =
1022 /* ALU */
1024 0, /* arith. */
1025 0, /* logical. */
1026 0, /* shift. */
1027 COSTS_N_INSNS (1), /* shift_reg. */
1028 COSTS_N_INSNS (1), /* arith_shift. */
1029 COSTS_N_INSNS (2), /* arith_shift_reg. */
1030 0, /* log_shift. */
1031 COSTS_N_INSNS (1), /* log_shift_reg. */
1032 COSTS_N_INSNS (1), /* extend. */
1033 COSTS_N_INSNS (2), /* extend_arith. */
1034 COSTS_N_INSNS (1), /* bfi. */
1035 COSTS_N_INSNS (1), /* bfx. */
1036 0, /* clz. */
1037 0, /* rev. */
1038 0, /* non_exec. */
1039 true /* non_exec_costs_exec. */
1042 /* MULT SImode */
1044 COSTS_N_INSNS (3), /* simple. */
1045 COSTS_N_INSNS (3), /* flag_setting. */
1046 COSTS_N_INSNS (2), /* extend. */
1047 COSTS_N_INSNS (3), /* add. */
1048 COSTS_N_INSNS (2), /* extend_add. */
1049 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1051 /* MULT DImode */
1053 0, /* simple (N/A). */
1054 0, /* flag_setting (N/A). */
1055 COSTS_N_INSNS (4), /* extend. */
1056 0, /* add (N/A). */
1057 COSTS_N_INSNS (4), /* extend_add. */
1058 0 /* idiv (N/A). */
1061 /* LD/ST */
1063 COSTS_N_INSNS (2), /* load. */
1064 COSTS_N_INSNS (2), /* load_sign_extend. */
1065 COSTS_N_INSNS (2), /* ldrd. */
1066 COSTS_N_INSNS (2), /* ldm_1st. */
1067 1, /* ldm_regs_per_insn_1st. */
1068 2, /* ldm_regs_per_insn_subsequent. */
1069 COSTS_N_INSNS (5), /* loadf. */
1070 COSTS_N_INSNS (5), /* loadd. */
1071 COSTS_N_INSNS (1), /* load_unaligned. */
1072 COSTS_N_INSNS (2), /* store. */
1073 COSTS_N_INSNS (2), /* strd. */
1074 COSTS_N_INSNS (2), /* stm_1st. */
1075 1, /* stm_regs_per_insn_1st. */
1076 2, /* stm_regs_per_insn_subsequent. */
1077 COSTS_N_INSNS (1), /* storef. */
1078 COSTS_N_INSNS (1), /* stored. */
1079 COSTS_N_INSNS (1), /* store_unaligned. */
1080 COSTS_N_INSNS (1), /* loadv. */
1081 COSTS_N_INSNS (1) /* storev. */
1084 /* FP SFmode */
1086 COSTS_N_INSNS (14), /* div. */
1087 COSTS_N_INSNS (4), /* mult. */
1088 COSTS_N_INSNS (7), /* mult_addsub. */
1089 COSTS_N_INSNS (30), /* fma. */
1090 COSTS_N_INSNS (3), /* addsub. */
1091 COSTS_N_INSNS (1), /* fpconst. */
1092 COSTS_N_INSNS (1), /* neg. */
1093 COSTS_N_INSNS (3), /* compare. */
1094 COSTS_N_INSNS (3), /* widen. */
1095 COSTS_N_INSNS (3), /* narrow. */
1096 COSTS_N_INSNS (3), /* toint. */
1097 COSTS_N_INSNS (3), /* fromint. */
1098 COSTS_N_INSNS (3) /* roundint. */
1100 /* FP DFmode */
1102 COSTS_N_INSNS (24), /* div. */
1103 COSTS_N_INSNS (5), /* mult. */
1104 COSTS_N_INSNS (8), /* mult_addsub. */
1105 COSTS_N_INSNS (30), /* fma. */
1106 COSTS_N_INSNS (3), /* addsub. */
1107 COSTS_N_INSNS (1), /* fpconst. */
1108 COSTS_N_INSNS (1), /* neg. */
1109 COSTS_N_INSNS (3), /* compare. */
1110 COSTS_N_INSNS (3), /* widen. */
1111 COSTS_N_INSNS (3), /* narrow. */
1112 COSTS_N_INSNS (3), /* toint. */
1113 COSTS_N_INSNS (3), /* fromint. */
1114 COSTS_N_INSNS (3) /* roundint. */
1117 /* Vector */
1119 COSTS_N_INSNS (1) /* alu. */
1123 const struct cpu_cost_table cortexa8_extra_costs =
1125 /* ALU */
1127 0, /* arith. */
1128 0, /* logical. */
1129 COSTS_N_INSNS (1), /* shift. */
1130 0, /* shift_reg. */
1131 COSTS_N_INSNS (1), /* arith_shift. */
1132 0, /* arith_shift_reg. */
1133 COSTS_N_INSNS (1), /* log_shift. */
1134 0, /* log_shift_reg. */
1135 0, /* extend. */
1136 0, /* extend_arith. */
1137 0, /* bfi. */
1138 0, /* bfx. */
1139 0, /* clz. */
1140 0, /* rev. */
1141 0, /* non_exec. */
1142 true /* non_exec_costs_exec. */
1145 /* MULT SImode */
1147 COSTS_N_INSNS (1), /* simple. */
1148 COSTS_N_INSNS (1), /* flag_setting. */
1149 COSTS_N_INSNS (1), /* extend. */
1150 COSTS_N_INSNS (1), /* add. */
1151 COSTS_N_INSNS (1), /* extend_add. */
1152 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1154 /* MULT DImode */
1156 0, /* simple (N/A). */
1157 0, /* flag_setting (N/A). */
1158 COSTS_N_INSNS (2), /* extend. */
1159 0, /* add (N/A). */
1160 COSTS_N_INSNS (2), /* extend_add. */
1161 0 /* idiv (N/A). */
1164 /* LD/ST */
1166 COSTS_N_INSNS (1), /* load. */
1167 COSTS_N_INSNS (1), /* load_sign_extend. */
1168 COSTS_N_INSNS (1), /* ldrd. */
1169 COSTS_N_INSNS (1), /* ldm_1st. */
1170 1, /* ldm_regs_per_insn_1st. */
1171 2, /* ldm_regs_per_insn_subsequent. */
1172 COSTS_N_INSNS (1), /* loadf. */
1173 COSTS_N_INSNS (1), /* loadd. */
1174 COSTS_N_INSNS (1), /* load_unaligned. */
1175 COSTS_N_INSNS (1), /* store. */
1176 COSTS_N_INSNS (1), /* strd. */
1177 COSTS_N_INSNS (1), /* stm_1st. */
1178 1, /* stm_regs_per_insn_1st. */
1179 2, /* stm_regs_per_insn_subsequent. */
1180 COSTS_N_INSNS (1), /* storef. */
1181 COSTS_N_INSNS (1), /* stored. */
1182 COSTS_N_INSNS (1), /* store_unaligned. */
1183 COSTS_N_INSNS (1), /* loadv. */
1184 COSTS_N_INSNS (1) /* storev. */
1187 /* FP SFmode */
1189 COSTS_N_INSNS (36), /* div. */
1190 COSTS_N_INSNS (11), /* mult. */
1191 COSTS_N_INSNS (20), /* mult_addsub. */
1192 COSTS_N_INSNS (30), /* fma. */
1193 COSTS_N_INSNS (9), /* addsub. */
1194 COSTS_N_INSNS (3), /* fpconst. */
1195 COSTS_N_INSNS (3), /* neg. */
1196 COSTS_N_INSNS (6), /* compare. */
1197 COSTS_N_INSNS (4), /* widen. */
1198 COSTS_N_INSNS (4), /* narrow. */
1199 COSTS_N_INSNS (8), /* toint. */
1200 COSTS_N_INSNS (8), /* fromint. */
1201 COSTS_N_INSNS (8) /* roundint. */
1203 /* FP DFmode */
1205 COSTS_N_INSNS (64), /* div. */
1206 COSTS_N_INSNS (16), /* mult. */
1207 COSTS_N_INSNS (25), /* mult_addsub. */
1208 COSTS_N_INSNS (30), /* fma. */
1209 COSTS_N_INSNS (9), /* addsub. */
1210 COSTS_N_INSNS (3), /* fpconst. */
1211 COSTS_N_INSNS (3), /* neg. */
1212 COSTS_N_INSNS (6), /* compare. */
1213 COSTS_N_INSNS (6), /* widen. */
1214 COSTS_N_INSNS (6), /* narrow. */
1215 COSTS_N_INSNS (8), /* toint. */
1216 COSTS_N_INSNS (8), /* fromint. */
1217 COSTS_N_INSNS (8) /* roundint. */
1220 /* Vector */
1222 COSTS_N_INSNS (1) /* alu. */
1226 const struct cpu_cost_table cortexa5_extra_costs =
1228 /* ALU */
1230 0, /* arith. */
1231 0, /* logical. */
1232 COSTS_N_INSNS (1), /* shift. */
1233 COSTS_N_INSNS (1), /* shift_reg. */
1234 COSTS_N_INSNS (1), /* arith_shift. */
1235 COSTS_N_INSNS (1), /* arith_shift_reg. */
1236 COSTS_N_INSNS (1), /* log_shift. */
1237 COSTS_N_INSNS (1), /* log_shift_reg. */
1238 COSTS_N_INSNS (1), /* extend. */
1239 COSTS_N_INSNS (1), /* extend_arith. */
1240 COSTS_N_INSNS (1), /* bfi. */
1241 COSTS_N_INSNS (1), /* bfx. */
1242 COSTS_N_INSNS (1), /* clz. */
1243 COSTS_N_INSNS (1), /* rev. */
1244 0, /* non_exec. */
1245 true /* non_exec_costs_exec. */
1249 /* MULT SImode */
1251 0, /* simple. */
1252 COSTS_N_INSNS (1), /* flag_setting. */
1253 COSTS_N_INSNS (1), /* extend. */
1254 COSTS_N_INSNS (1), /* add. */
1255 COSTS_N_INSNS (1), /* extend_add. */
1256 COSTS_N_INSNS (7) /* idiv. */
1258 /* MULT DImode */
1260 0, /* simple (N/A). */
1261 0, /* flag_setting (N/A). */
1262 COSTS_N_INSNS (1), /* extend. */
1263 0, /* add. */
1264 COSTS_N_INSNS (2), /* extend_add. */
1265 0 /* idiv (N/A). */
1268 /* LD/ST */
1270 COSTS_N_INSNS (1), /* load. */
1271 COSTS_N_INSNS (1), /* load_sign_extend. */
1272 COSTS_N_INSNS (6), /* ldrd. */
1273 COSTS_N_INSNS (1), /* ldm_1st. */
1274 1, /* ldm_regs_per_insn_1st. */
1275 2, /* ldm_regs_per_insn_subsequent. */
1276 COSTS_N_INSNS (2), /* loadf. */
1277 COSTS_N_INSNS (4), /* loadd. */
1278 COSTS_N_INSNS (1), /* load_unaligned. */
1279 COSTS_N_INSNS (1), /* store. */
1280 COSTS_N_INSNS (3), /* strd. */
1281 COSTS_N_INSNS (1), /* stm_1st. */
1282 1, /* stm_regs_per_insn_1st. */
1283 2, /* stm_regs_per_insn_subsequent. */
1284 COSTS_N_INSNS (2), /* storef. */
1285 COSTS_N_INSNS (2), /* stored. */
1286 COSTS_N_INSNS (1), /* store_unaligned. */
1287 COSTS_N_INSNS (1), /* loadv. */
1288 COSTS_N_INSNS (1) /* storev. */
1291 /* FP SFmode */
1293 COSTS_N_INSNS (15), /* div. */
1294 COSTS_N_INSNS (3), /* mult. */
1295 COSTS_N_INSNS (7), /* mult_addsub. */
1296 COSTS_N_INSNS (7), /* fma. */
1297 COSTS_N_INSNS (3), /* addsub. */
1298 COSTS_N_INSNS (3), /* fpconst. */
1299 COSTS_N_INSNS (3), /* neg. */
1300 COSTS_N_INSNS (3), /* compare. */
1301 COSTS_N_INSNS (3), /* widen. */
1302 COSTS_N_INSNS (3), /* narrow. */
1303 COSTS_N_INSNS (3), /* toint. */
1304 COSTS_N_INSNS (3), /* fromint. */
1305 COSTS_N_INSNS (3) /* roundint. */
1307 /* FP DFmode */
1309 COSTS_N_INSNS (30), /* div. */
1310 COSTS_N_INSNS (6), /* mult. */
1311 COSTS_N_INSNS (10), /* mult_addsub. */
1312 COSTS_N_INSNS (7), /* fma. */
1313 COSTS_N_INSNS (3), /* addsub. */
1314 COSTS_N_INSNS (3), /* fpconst. */
1315 COSTS_N_INSNS (3), /* neg. */
1316 COSTS_N_INSNS (3), /* compare. */
1317 COSTS_N_INSNS (3), /* widen. */
1318 COSTS_N_INSNS (3), /* narrow. */
1319 COSTS_N_INSNS (3), /* toint. */
1320 COSTS_N_INSNS (3), /* fromint. */
1321 COSTS_N_INSNS (3) /* roundint. */
1324 /* Vector */
1326 COSTS_N_INSNS (1) /* alu. */
1331 const struct cpu_cost_table cortexa7_extra_costs =
1333 /* ALU */
1335 0, /* arith. */
1336 0, /* logical. */
1337 COSTS_N_INSNS (1), /* shift. */
1338 COSTS_N_INSNS (1), /* shift_reg. */
1339 COSTS_N_INSNS (1), /* arith_shift. */
1340 COSTS_N_INSNS (1), /* arith_shift_reg. */
1341 COSTS_N_INSNS (1), /* log_shift. */
1342 COSTS_N_INSNS (1), /* log_shift_reg. */
1343 COSTS_N_INSNS (1), /* extend. */
1344 COSTS_N_INSNS (1), /* extend_arith. */
1345 COSTS_N_INSNS (1), /* bfi. */
1346 COSTS_N_INSNS (1), /* bfx. */
1347 COSTS_N_INSNS (1), /* clz. */
1348 COSTS_N_INSNS (1), /* rev. */
1349 0, /* non_exec. */
1350 true /* non_exec_costs_exec. */
1354 /* MULT SImode */
1356 0, /* simple. */
1357 COSTS_N_INSNS (1), /* flag_setting. */
1358 COSTS_N_INSNS (1), /* extend. */
1359 COSTS_N_INSNS (1), /* add. */
1360 COSTS_N_INSNS (1), /* extend_add. */
1361 COSTS_N_INSNS (7) /* idiv. */
1363 /* MULT DImode */
1365 0, /* simple (N/A). */
1366 0, /* flag_setting (N/A). */
1367 COSTS_N_INSNS (1), /* extend. */
1368 0, /* add. */
1369 COSTS_N_INSNS (2), /* extend_add. */
1370 0 /* idiv (N/A). */
1373 /* LD/ST */
1375 COSTS_N_INSNS (1), /* load. */
1376 COSTS_N_INSNS (1), /* load_sign_extend. */
1377 COSTS_N_INSNS (3), /* ldrd. */
1378 COSTS_N_INSNS (1), /* ldm_1st. */
1379 1, /* ldm_regs_per_insn_1st. */
1380 2, /* ldm_regs_per_insn_subsequent. */
1381 COSTS_N_INSNS (2), /* loadf. */
1382 COSTS_N_INSNS (2), /* loadd. */
1383 COSTS_N_INSNS (1), /* load_unaligned. */
1384 COSTS_N_INSNS (1), /* store. */
1385 COSTS_N_INSNS (3), /* strd. */
1386 COSTS_N_INSNS (1), /* stm_1st. */
1387 1, /* stm_regs_per_insn_1st. */
1388 2, /* stm_regs_per_insn_subsequent. */
1389 COSTS_N_INSNS (2), /* storef. */
1390 COSTS_N_INSNS (2), /* stored. */
1391 COSTS_N_INSNS (1), /* store_unaligned. */
1392 COSTS_N_INSNS (1), /* loadv. */
1393 COSTS_N_INSNS (1) /* storev. */
1396 /* FP SFmode */
1398 COSTS_N_INSNS (15), /* div. */
1399 COSTS_N_INSNS (3), /* mult. */
1400 COSTS_N_INSNS (7), /* mult_addsub. */
1401 COSTS_N_INSNS (7), /* fma. */
1402 COSTS_N_INSNS (3), /* addsub. */
1403 COSTS_N_INSNS (3), /* fpconst. */
1404 COSTS_N_INSNS (3), /* neg. */
1405 COSTS_N_INSNS (3), /* compare. */
1406 COSTS_N_INSNS (3), /* widen. */
1407 COSTS_N_INSNS (3), /* narrow. */
1408 COSTS_N_INSNS (3), /* toint. */
1409 COSTS_N_INSNS (3), /* fromint. */
1410 COSTS_N_INSNS (3) /* roundint. */
1412 /* FP DFmode */
1414 COSTS_N_INSNS (30), /* div. */
1415 COSTS_N_INSNS (6), /* mult. */
1416 COSTS_N_INSNS (10), /* mult_addsub. */
1417 COSTS_N_INSNS (7), /* fma. */
1418 COSTS_N_INSNS (3), /* addsub. */
1419 COSTS_N_INSNS (3), /* fpconst. */
1420 COSTS_N_INSNS (3), /* neg. */
1421 COSTS_N_INSNS (3), /* compare. */
1422 COSTS_N_INSNS (3), /* widen. */
1423 COSTS_N_INSNS (3), /* narrow. */
1424 COSTS_N_INSNS (3), /* toint. */
1425 COSTS_N_INSNS (3), /* fromint. */
1426 COSTS_N_INSNS (3) /* roundint. */
1429 /* Vector */
1431 COSTS_N_INSNS (1) /* alu. */
1435 const struct cpu_cost_table cortexa12_extra_costs =
1437 /* ALU */
1439 0, /* arith. */
1440 0, /* logical. */
1441 0, /* shift. */
1442 COSTS_N_INSNS (1), /* shift_reg. */
1443 COSTS_N_INSNS (1), /* arith_shift. */
1444 COSTS_N_INSNS (1), /* arith_shift_reg. */
1445 COSTS_N_INSNS (1), /* log_shift. */
1446 COSTS_N_INSNS (1), /* log_shift_reg. */
1447 0, /* extend. */
1448 COSTS_N_INSNS (1), /* extend_arith. */
1449 0, /* bfi. */
1450 COSTS_N_INSNS (1), /* bfx. */
1451 COSTS_N_INSNS (1), /* clz. */
1452 COSTS_N_INSNS (1), /* rev. */
1453 0, /* non_exec. */
1454 true /* non_exec_costs_exec. */
1456 /* MULT SImode */
1459 COSTS_N_INSNS (2), /* simple. */
1460 COSTS_N_INSNS (3), /* flag_setting. */
1461 COSTS_N_INSNS (2), /* extend. */
1462 COSTS_N_INSNS (3), /* add. */
1463 COSTS_N_INSNS (2), /* extend_add. */
1464 COSTS_N_INSNS (18) /* idiv. */
1466 /* MULT DImode */
1468 0, /* simple (N/A). */
1469 0, /* flag_setting (N/A). */
1470 COSTS_N_INSNS (3), /* extend. */
1471 0, /* add (N/A). */
1472 COSTS_N_INSNS (3), /* extend_add. */
1473 0 /* idiv (N/A). */
1476 /* LD/ST */
1478 COSTS_N_INSNS (3), /* load. */
1479 COSTS_N_INSNS (3), /* load_sign_extend. */
1480 COSTS_N_INSNS (3), /* ldrd. */
1481 COSTS_N_INSNS (3), /* ldm_1st. */
1482 1, /* ldm_regs_per_insn_1st. */
1483 2, /* ldm_regs_per_insn_subsequent. */
1484 COSTS_N_INSNS (3), /* loadf. */
1485 COSTS_N_INSNS (3), /* loadd. */
1486 0, /* load_unaligned. */
1487 0, /* store. */
1488 0, /* strd. */
1489 0, /* stm_1st. */
1490 1, /* stm_regs_per_insn_1st. */
1491 2, /* stm_regs_per_insn_subsequent. */
1492 COSTS_N_INSNS (2), /* storef. */
1493 COSTS_N_INSNS (2), /* stored. */
1494 0, /* store_unaligned. */
1495 COSTS_N_INSNS (1), /* loadv. */
1496 COSTS_N_INSNS (1) /* storev. */
1499 /* FP SFmode */
1501 COSTS_N_INSNS (17), /* div. */
1502 COSTS_N_INSNS (4), /* mult. */
1503 COSTS_N_INSNS (8), /* mult_addsub. */
1504 COSTS_N_INSNS (8), /* fma. */
1505 COSTS_N_INSNS (4), /* addsub. */
1506 COSTS_N_INSNS (2), /* fpconst. */
1507 COSTS_N_INSNS (2), /* neg. */
1508 COSTS_N_INSNS (2), /* compare. */
1509 COSTS_N_INSNS (4), /* widen. */
1510 COSTS_N_INSNS (4), /* narrow. */
1511 COSTS_N_INSNS (4), /* toint. */
1512 COSTS_N_INSNS (4), /* fromint. */
1513 COSTS_N_INSNS (4) /* roundint. */
1515 /* FP DFmode */
1517 COSTS_N_INSNS (31), /* div. */
1518 COSTS_N_INSNS (4), /* mult. */
1519 COSTS_N_INSNS (8), /* mult_addsub. */
1520 COSTS_N_INSNS (8), /* fma. */
1521 COSTS_N_INSNS (4), /* addsub. */
1522 COSTS_N_INSNS (2), /* fpconst. */
1523 COSTS_N_INSNS (2), /* neg. */
1524 COSTS_N_INSNS (2), /* compare. */
1525 COSTS_N_INSNS (4), /* widen. */
1526 COSTS_N_INSNS (4), /* narrow. */
1527 COSTS_N_INSNS (4), /* toint. */
1528 COSTS_N_INSNS (4), /* fromint. */
1529 COSTS_N_INSNS (4) /* roundint. */
1532 /* Vector */
1534 COSTS_N_INSNS (1) /* alu. */
1538 const struct cpu_cost_table cortexa15_extra_costs =
1540 /* ALU */
1542 0, /* arith. */
1543 0, /* logical. */
1544 0, /* shift. */
1545 0, /* shift_reg. */
1546 COSTS_N_INSNS (1), /* arith_shift. */
1547 COSTS_N_INSNS (1), /* arith_shift_reg. */
1548 COSTS_N_INSNS (1), /* log_shift. */
1549 COSTS_N_INSNS (1), /* log_shift_reg. */
1550 0, /* extend. */
1551 COSTS_N_INSNS (1), /* extend_arith. */
1552 COSTS_N_INSNS (1), /* bfi. */
1553 0, /* bfx. */
1554 0, /* clz. */
1555 0, /* rev. */
1556 0, /* non_exec. */
1557 true /* non_exec_costs_exec. */
1559 /* MULT SImode */
1562 COSTS_N_INSNS (2), /* simple. */
1563 COSTS_N_INSNS (3), /* flag_setting. */
1564 COSTS_N_INSNS (2), /* extend. */
1565 COSTS_N_INSNS (2), /* add. */
1566 COSTS_N_INSNS (2), /* extend_add. */
1567 COSTS_N_INSNS (18) /* idiv. */
1569 /* MULT DImode */
1571 0, /* simple (N/A). */
1572 0, /* flag_setting (N/A). */
1573 COSTS_N_INSNS (3), /* extend. */
1574 0, /* add (N/A). */
1575 COSTS_N_INSNS (3), /* extend_add. */
1576 0 /* idiv (N/A). */
1579 /* LD/ST */
1581 COSTS_N_INSNS (3), /* load. */
1582 COSTS_N_INSNS (3), /* load_sign_extend. */
1583 COSTS_N_INSNS (3), /* ldrd. */
1584 COSTS_N_INSNS (4), /* ldm_1st. */
1585 1, /* ldm_regs_per_insn_1st. */
1586 2, /* ldm_regs_per_insn_subsequent. */
1587 COSTS_N_INSNS (4), /* loadf. */
1588 COSTS_N_INSNS (4), /* loadd. */
1589 0, /* load_unaligned. */
1590 0, /* store. */
1591 0, /* strd. */
1592 COSTS_N_INSNS (1), /* stm_1st. */
1593 1, /* stm_regs_per_insn_1st. */
1594 2, /* stm_regs_per_insn_subsequent. */
1595 0, /* storef. */
1596 0, /* stored. */
1597 0, /* store_unaligned. */
1598 COSTS_N_INSNS (1), /* loadv. */
1599 COSTS_N_INSNS (1) /* storev. */
1602 /* FP SFmode */
1604 COSTS_N_INSNS (17), /* div. */
1605 COSTS_N_INSNS (4), /* mult. */
1606 COSTS_N_INSNS (8), /* mult_addsub. */
1607 COSTS_N_INSNS (8), /* fma. */
1608 COSTS_N_INSNS (4), /* addsub. */
1609 COSTS_N_INSNS (2), /* fpconst. */
1610 COSTS_N_INSNS (2), /* neg. */
1611 COSTS_N_INSNS (5), /* compare. */
1612 COSTS_N_INSNS (4), /* widen. */
1613 COSTS_N_INSNS (4), /* narrow. */
1614 COSTS_N_INSNS (4), /* toint. */
1615 COSTS_N_INSNS (4), /* fromint. */
1616 COSTS_N_INSNS (4) /* roundint. */
1618 /* FP DFmode */
1620 COSTS_N_INSNS (31), /* div. */
1621 COSTS_N_INSNS (4), /* mult. */
1622 COSTS_N_INSNS (8), /* mult_addsub. */
1623 COSTS_N_INSNS (8), /* fma. */
1624 COSTS_N_INSNS (4), /* addsub. */
1625 COSTS_N_INSNS (2), /* fpconst. */
1626 COSTS_N_INSNS (2), /* neg. */
1627 COSTS_N_INSNS (2), /* compare. */
1628 COSTS_N_INSNS (4), /* widen. */
1629 COSTS_N_INSNS (4), /* narrow. */
1630 COSTS_N_INSNS (4), /* toint. */
1631 COSTS_N_INSNS (4), /* fromint. */
1632 COSTS_N_INSNS (4) /* roundint. */
1635 /* Vector */
1637 COSTS_N_INSNS (1) /* alu. */
1641 const struct cpu_cost_table v7m_extra_costs =
1643 /* ALU */
1645 0, /* arith. */
1646 0, /* logical. */
1647 0, /* shift. */
1648 0, /* shift_reg. */
1649 0, /* arith_shift. */
1650 COSTS_N_INSNS (1), /* arith_shift_reg. */
1651 0, /* log_shift. */
1652 COSTS_N_INSNS (1), /* log_shift_reg. */
1653 0, /* extend. */
1654 COSTS_N_INSNS (1), /* extend_arith. */
1655 0, /* bfi. */
1656 0, /* bfx. */
1657 0, /* clz. */
1658 0, /* rev. */
1659 COSTS_N_INSNS (1), /* non_exec. */
1660 false /* non_exec_costs_exec. */
1663 /* MULT SImode */
1665 COSTS_N_INSNS (1), /* simple. */
1666 COSTS_N_INSNS (1), /* flag_setting. */
1667 COSTS_N_INSNS (2), /* extend. */
1668 COSTS_N_INSNS (1), /* add. */
1669 COSTS_N_INSNS (3), /* extend_add. */
1670 COSTS_N_INSNS (8) /* idiv. */
1672 /* MULT DImode */
1674 0, /* simple (N/A). */
1675 0, /* flag_setting (N/A). */
1676 COSTS_N_INSNS (2), /* extend. */
1677 0, /* add (N/A). */
1678 COSTS_N_INSNS (3), /* extend_add. */
1679 0 /* idiv (N/A). */
1682 /* LD/ST */
1684 COSTS_N_INSNS (2), /* load. */
1685 0, /* load_sign_extend. */
1686 COSTS_N_INSNS (3), /* ldrd. */
1687 COSTS_N_INSNS (2), /* ldm_1st. */
1688 1, /* ldm_regs_per_insn_1st. */
1689 1, /* ldm_regs_per_insn_subsequent. */
1690 COSTS_N_INSNS (2), /* loadf. */
1691 COSTS_N_INSNS (3), /* loadd. */
1692 COSTS_N_INSNS (1), /* load_unaligned. */
1693 COSTS_N_INSNS (2), /* store. */
1694 COSTS_N_INSNS (3), /* strd. */
1695 COSTS_N_INSNS (2), /* stm_1st. */
1696 1, /* stm_regs_per_insn_1st. */
1697 1, /* stm_regs_per_insn_subsequent. */
1698 COSTS_N_INSNS (2), /* storef. */
1699 COSTS_N_INSNS (3), /* stored. */
1700 COSTS_N_INSNS (1), /* store_unaligned. */
1701 COSTS_N_INSNS (1), /* loadv. */
1702 COSTS_N_INSNS (1) /* storev. */
1705 /* FP SFmode */
1707 COSTS_N_INSNS (7), /* div. */
1708 COSTS_N_INSNS (2), /* mult. */
1709 COSTS_N_INSNS (5), /* mult_addsub. */
1710 COSTS_N_INSNS (3), /* fma. */
1711 COSTS_N_INSNS (1), /* addsub. */
1712 0, /* fpconst. */
1713 0, /* neg. */
1714 0, /* compare. */
1715 0, /* widen. */
1716 0, /* narrow. */
1717 0, /* toint. */
1718 0, /* fromint. */
1719 0 /* roundint. */
1721 /* FP DFmode */
1723 COSTS_N_INSNS (15), /* div. */
1724 COSTS_N_INSNS (5), /* mult. */
1725 COSTS_N_INSNS (7), /* mult_addsub. */
1726 COSTS_N_INSNS (7), /* fma. */
1727 COSTS_N_INSNS (3), /* addsub. */
1728 0, /* fpconst. */
1729 0, /* neg. */
1730 0, /* compare. */
1731 0, /* widen. */
1732 0, /* narrow. */
1733 0, /* toint. */
1734 0, /* fromint. */
1735 0 /* roundint. */
1738 /* Vector */
1740 COSTS_N_INSNS (1) /* alu. */
1744 const struct tune_params arm_slowmul_tune =
1746 &generic_extra_costs, /* Insn extra costs. */
1747 NULL, /* Sched adj cost. */
1748 arm_default_branch_cost,
1749 &arm_default_vec_cost,
1750 3, /* Constant limit. */
1751 5, /* Max cond insns. */
1752 8, /* Memset max inline. */
1753 1, /* Issue rate. */
1754 ARM_PREFETCH_NOT_BENEFICIAL,
1755 tune_params::PREF_CONST_POOL_TRUE,
1756 tune_params::PREF_LDRD_FALSE,
1757 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1758 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1759 tune_params::DISPARAGE_FLAGS_NEITHER,
1760 tune_params::PREF_NEON_64_FALSE,
1761 tune_params::PREF_NEON_STRINGOPS_FALSE,
1762 tune_params::FUSE_NOTHING,
1763 tune_params::SCHED_AUTOPREF_OFF
1766 const struct tune_params arm_fastmul_tune =
1768 &generic_extra_costs, /* Insn extra costs. */
1769 NULL, /* Sched adj cost. */
1770 arm_default_branch_cost,
1771 &arm_default_vec_cost,
1772 1, /* Constant limit. */
1773 5, /* Max cond insns. */
1774 8, /* Memset max inline. */
1775 1, /* Issue rate. */
1776 ARM_PREFETCH_NOT_BENEFICIAL,
1777 tune_params::PREF_CONST_POOL_TRUE,
1778 tune_params::PREF_LDRD_FALSE,
1779 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1780 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1781 tune_params::DISPARAGE_FLAGS_NEITHER,
1782 tune_params::PREF_NEON_64_FALSE,
1783 tune_params::PREF_NEON_STRINGOPS_FALSE,
1784 tune_params::FUSE_NOTHING,
1785 tune_params::SCHED_AUTOPREF_OFF
1788 /* StrongARM has early execution of branches, so a sequence that is worth
1789 skipping is shorter. Set max_insns_skipped to a lower value. */
1791 const struct tune_params arm_strongarm_tune =
1793 &generic_extra_costs, /* Insn extra costs. */
1794 NULL, /* Sched adj cost. */
1795 arm_default_branch_cost,
1796 &arm_default_vec_cost,
1797 1, /* Constant limit. */
1798 3, /* Max cond insns. */
1799 8, /* Memset max inline. */
1800 1, /* Issue rate. */
1801 ARM_PREFETCH_NOT_BENEFICIAL,
1802 tune_params::PREF_CONST_POOL_TRUE,
1803 tune_params::PREF_LDRD_FALSE,
1804 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1805 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1806 tune_params::DISPARAGE_FLAGS_NEITHER,
1807 tune_params::PREF_NEON_64_FALSE,
1808 tune_params::PREF_NEON_STRINGOPS_FALSE,
1809 tune_params::FUSE_NOTHING,
1810 tune_params::SCHED_AUTOPREF_OFF
1813 const struct tune_params arm_xscale_tune =
1815 &generic_extra_costs, /* Insn extra costs. */
1816 xscale_sched_adjust_cost,
1817 arm_default_branch_cost,
1818 &arm_default_vec_cost,
1819 2, /* Constant limit. */
1820 3, /* Max cond insns. */
1821 8, /* Memset max inline. */
1822 1, /* Issue rate. */
1823 ARM_PREFETCH_NOT_BENEFICIAL,
1824 tune_params::PREF_CONST_POOL_TRUE,
1825 tune_params::PREF_LDRD_FALSE,
1826 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1827 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1828 tune_params::DISPARAGE_FLAGS_NEITHER,
1829 tune_params::PREF_NEON_64_FALSE,
1830 tune_params::PREF_NEON_STRINGOPS_FALSE,
1831 tune_params::FUSE_NOTHING,
1832 tune_params::SCHED_AUTOPREF_OFF
1835 const struct tune_params arm_9e_tune =
1837 &generic_extra_costs, /* Insn extra costs. */
1838 NULL, /* Sched adj cost. */
1839 arm_default_branch_cost,
1840 &arm_default_vec_cost,
1841 1, /* Constant limit. */
1842 5, /* Max cond insns. */
1843 8, /* Memset max inline. */
1844 1, /* Issue rate. */
1845 ARM_PREFETCH_NOT_BENEFICIAL,
1846 tune_params::PREF_CONST_POOL_TRUE,
1847 tune_params::PREF_LDRD_FALSE,
1848 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1849 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1850 tune_params::DISPARAGE_FLAGS_NEITHER,
1851 tune_params::PREF_NEON_64_FALSE,
1852 tune_params::PREF_NEON_STRINGOPS_FALSE,
1853 tune_params::FUSE_NOTHING,
1854 tune_params::SCHED_AUTOPREF_OFF
1857 const struct tune_params arm_marvell_pj4_tune =
1859 &generic_extra_costs, /* Insn extra costs. */
1860 NULL, /* Sched adj cost. */
1861 arm_default_branch_cost,
1862 &arm_default_vec_cost,
1863 1, /* Constant limit. */
1864 5, /* Max cond insns. */
1865 8, /* Memset max inline. */
1866 2, /* Issue rate. */
1867 ARM_PREFETCH_NOT_BENEFICIAL,
1868 tune_params::PREF_CONST_POOL_TRUE,
1869 tune_params::PREF_LDRD_FALSE,
1870 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1871 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1872 tune_params::DISPARAGE_FLAGS_NEITHER,
1873 tune_params::PREF_NEON_64_FALSE,
1874 tune_params::PREF_NEON_STRINGOPS_FALSE,
1875 tune_params::FUSE_NOTHING,
1876 tune_params::SCHED_AUTOPREF_OFF
1879 const struct tune_params arm_v6t2_tune =
1881 &generic_extra_costs, /* Insn extra costs. */
1882 NULL, /* Sched adj cost. */
1883 arm_default_branch_cost,
1884 &arm_default_vec_cost,
1885 1, /* Constant limit. */
1886 5, /* Max cond insns. */
1887 8, /* Memset max inline. */
1888 1, /* Issue rate. */
1889 ARM_PREFETCH_NOT_BENEFICIAL,
1890 tune_params::PREF_CONST_POOL_FALSE,
1891 tune_params::PREF_LDRD_FALSE,
1892 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1893 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1894 tune_params::DISPARAGE_FLAGS_NEITHER,
1895 tune_params::PREF_NEON_64_FALSE,
1896 tune_params::PREF_NEON_STRINGOPS_FALSE,
1897 tune_params::FUSE_NOTHING,
1898 tune_params::SCHED_AUTOPREF_OFF
1902 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1903 const struct tune_params arm_cortex_tune =
1905 &generic_extra_costs,
1906 NULL, /* Sched adj cost. */
1907 arm_default_branch_cost,
1908 &arm_default_vec_cost,
1909 1, /* Constant limit. */
1910 5, /* Max cond insns. */
1911 8, /* Memset max inline. */
1912 2, /* Issue rate. */
1913 ARM_PREFETCH_NOT_BENEFICIAL,
1914 tune_params::PREF_CONST_POOL_FALSE,
1915 tune_params::PREF_LDRD_FALSE,
1916 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1917 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1918 tune_params::DISPARAGE_FLAGS_NEITHER,
1919 tune_params::PREF_NEON_64_FALSE,
1920 tune_params::PREF_NEON_STRINGOPS_FALSE,
1921 tune_params::FUSE_NOTHING,
1922 tune_params::SCHED_AUTOPREF_OFF
1925 const struct tune_params arm_cortex_a8_tune =
1927 &cortexa8_extra_costs,
1928 NULL, /* Sched adj cost. */
1929 arm_default_branch_cost,
1930 &arm_default_vec_cost,
1931 1, /* Constant limit. */
1932 5, /* Max cond insns. */
1933 8, /* Memset max inline. */
1934 2, /* Issue rate. */
1935 ARM_PREFETCH_NOT_BENEFICIAL,
1936 tune_params::PREF_CONST_POOL_FALSE,
1937 tune_params::PREF_LDRD_FALSE,
1938 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1939 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1940 tune_params::DISPARAGE_FLAGS_NEITHER,
1941 tune_params::PREF_NEON_64_FALSE,
1942 tune_params::PREF_NEON_STRINGOPS_TRUE,
1943 tune_params::FUSE_NOTHING,
1944 tune_params::SCHED_AUTOPREF_OFF
1947 const struct tune_params arm_cortex_a7_tune =
1949 &cortexa7_extra_costs,
1950 NULL, /* Sched adj cost. */
1951 arm_default_branch_cost,
1952 &arm_default_vec_cost,
1953 1, /* Constant limit. */
1954 5, /* Max cond insns. */
1955 8, /* Memset max inline. */
1956 2, /* Issue rate. */
1957 ARM_PREFETCH_NOT_BENEFICIAL,
1958 tune_params::PREF_CONST_POOL_FALSE,
1959 tune_params::PREF_LDRD_FALSE,
1960 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1961 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1962 tune_params::DISPARAGE_FLAGS_NEITHER,
1963 tune_params::PREF_NEON_64_FALSE,
1964 tune_params::PREF_NEON_STRINGOPS_TRUE,
1965 tune_params::FUSE_NOTHING,
1966 tune_params::SCHED_AUTOPREF_OFF
1969 const struct tune_params arm_cortex_a15_tune =
1971 &cortexa15_extra_costs,
1972 NULL, /* Sched adj cost. */
1973 arm_default_branch_cost,
1974 &arm_default_vec_cost,
1975 1, /* Constant limit. */
1976 2, /* Max cond insns. */
1977 8, /* Memset max inline. */
1978 3, /* Issue rate. */
1979 ARM_PREFETCH_NOT_BENEFICIAL,
1980 tune_params::PREF_CONST_POOL_FALSE,
1981 tune_params::PREF_LDRD_TRUE,
1982 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1983 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1984 tune_params::DISPARAGE_FLAGS_ALL,
1985 tune_params::PREF_NEON_64_FALSE,
1986 tune_params::PREF_NEON_STRINGOPS_TRUE,
1987 tune_params::FUSE_NOTHING,
1988 tune_params::SCHED_AUTOPREF_FULL
1991 const struct tune_params arm_cortex_a35_tune =
1993 &cortexa53_extra_costs,
1994 NULL, /* Sched adj cost. */
1995 arm_default_branch_cost,
1996 &arm_default_vec_cost,
1997 1, /* Constant limit. */
1998 5, /* Max cond insns. */
1999 8, /* Memset max inline. */
2000 1, /* Issue rate. */
2001 ARM_PREFETCH_NOT_BENEFICIAL,
2002 tune_params::PREF_CONST_POOL_FALSE,
2003 tune_params::PREF_LDRD_FALSE,
2004 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2005 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2006 tune_params::DISPARAGE_FLAGS_NEITHER,
2007 tune_params::PREF_NEON_64_FALSE,
2008 tune_params::PREF_NEON_STRINGOPS_TRUE,
2009 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2010 tune_params::SCHED_AUTOPREF_OFF
2013 const struct tune_params arm_cortex_a53_tune =
2015 &cortexa53_extra_costs,
2016 NULL, /* Sched adj cost. */
2017 arm_default_branch_cost,
2018 &arm_default_vec_cost,
2019 1, /* Constant limit. */
2020 5, /* Max cond insns. */
2021 8, /* Memset max inline. */
2022 2, /* Issue rate. */
2023 ARM_PREFETCH_NOT_BENEFICIAL,
2024 tune_params::PREF_CONST_POOL_FALSE,
2025 tune_params::PREF_LDRD_FALSE,
2026 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2027 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2028 tune_params::DISPARAGE_FLAGS_NEITHER,
2029 tune_params::PREF_NEON_64_FALSE,
2030 tune_params::PREF_NEON_STRINGOPS_TRUE,
2031 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2032 tune_params::SCHED_AUTOPREF_OFF
2035 const struct tune_params arm_cortex_a57_tune =
2037 &cortexa57_extra_costs,
2038 NULL, /* Sched adj cost. */
2039 arm_default_branch_cost,
2040 &arm_default_vec_cost,
2041 1, /* Constant limit. */
2042 2, /* Max cond insns. */
2043 8, /* Memset max inline. */
2044 3, /* Issue rate. */
2045 ARM_PREFETCH_NOT_BENEFICIAL,
2046 tune_params::PREF_CONST_POOL_FALSE,
2047 tune_params::PREF_LDRD_TRUE,
2048 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2049 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2050 tune_params::DISPARAGE_FLAGS_ALL,
2051 tune_params::PREF_NEON_64_FALSE,
2052 tune_params::PREF_NEON_STRINGOPS_TRUE,
2053 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2054 tune_params::SCHED_AUTOPREF_FULL
2057 const struct tune_params arm_exynosm1_tune =
2059 &exynosm1_extra_costs,
2060 NULL, /* Sched adj cost. */
2061 arm_default_branch_cost,
2062 &arm_default_vec_cost,
2063 1, /* Constant limit. */
2064 2, /* Max cond insns. */
2065 8, /* Memset max inline. */
2066 3, /* Issue rate. */
2067 ARM_PREFETCH_NOT_BENEFICIAL,
2068 tune_params::PREF_CONST_POOL_FALSE,
2069 tune_params::PREF_LDRD_TRUE,
2070 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2071 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2072 tune_params::DISPARAGE_FLAGS_ALL,
2073 tune_params::PREF_NEON_64_FALSE,
2074 tune_params::PREF_NEON_STRINGOPS_TRUE,
2075 tune_params::FUSE_NOTHING,
2076 tune_params::SCHED_AUTOPREF_OFF
2079 const struct tune_params arm_xgene1_tune =
2081 &xgene1_extra_costs,
2082 NULL, /* Sched adj cost. */
2083 arm_default_branch_cost,
2084 &arm_default_vec_cost,
2085 1, /* Constant limit. */
2086 2, /* Max cond insns. */
2087 32, /* Memset max inline. */
2088 4, /* Issue rate. */
2089 ARM_PREFETCH_NOT_BENEFICIAL,
2090 tune_params::PREF_CONST_POOL_FALSE,
2091 tune_params::PREF_LDRD_TRUE,
2092 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2093 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2094 tune_params::DISPARAGE_FLAGS_ALL,
2095 tune_params::PREF_NEON_64_FALSE,
2096 tune_params::PREF_NEON_STRINGOPS_FALSE,
2097 tune_params::FUSE_NOTHING,
2098 tune_params::SCHED_AUTOPREF_OFF
2101 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2102 less appealing. Set max_insns_skipped to a low value. */
2104 const struct tune_params arm_cortex_a5_tune =
2106 &cortexa5_extra_costs,
2107 NULL, /* Sched adj cost. */
2108 arm_cortex_a5_branch_cost,
2109 &arm_default_vec_cost,
2110 1, /* Constant limit. */
2111 1, /* Max cond insns. */
2112 8, /* Memset max inline. */
2113 2, /* Issue rate. */
2114 ARM_PREFETCH_NOT_BENEFICIAL,
2115 tune_params::PREF_CONST_POOL_FALSE,
2116 tune_params::PREF_LDRD_FALSE,
2117 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2118 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2119 tune_params::DISPARAGE_FLAGS_NEITHER,
2120 tune_params::PREF_NEON_64_FALSE,
2121 tune_params::PREF_NEON_STRINGOPS_TRUE,
2122 tune_params::FUSE_NOTHING,
2123 tune_params::SCHED_AUTOPREF_OFF
2126 const struct tune_params arm_cortex_a9_tune =
2128 &cortexa9_extra_costs,
2129 cortex_a9_sched_adjust_cost,
2130 arm_default_branch_cost,
2131 &arm_default_vec_cost,
2132 1, /* Constant limit. */
2133 5, /* Max cond insns. */
2134 8, /* Memset max inline. */
2135 2, /* Issue rate. */
2136 ARM_PREFETCH_BENEFICIAL(4,32,32),
2137 tune_params::PREF_CONST_POOL_FALSE,
2138 tune_params::PREF_LDRD_FALSE,
2139 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2140 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2141 tune_params::DISPARAGE_FLAGS_NEITHER,
2142 tune_params::PREF_NEON_64_FALSE,
2143 tune_params::PREF_NEON_STRINGOPS_FALSE,
2144 tune_params::FUSE_NOTHING,
2145 tune_params::SCHED_AUTOPREF_OFF
2148 const struct tune_params arm_cortex_a12_tune =
2150 &cortexa12_extra_costs,
2151 NULL, /* Sched adj cost. */
2152 arm_default_branch_cost,
2153 &arm_default_vec_cost, /* Vectorizer costs. */
2154 1, /* Constant limit. */
2155 2, /* Max cond insns. */
2156 8, /* Memset max inline. */
2157 2, /* Issue rate. */
2158 ARM_PREFETCH_NOT_BENEFICIAL,
2159 tune_params::PREF_CONST_POOL_FALSE,
2160 tune_params::PREF_LDRD_TRUE,
2161 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2162 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2163 tune_params::DISPARAGE_FLAGS_ALL,
2164 tune_params::PREF_NEON_64_FALSE,
2165 tune_params::PREF_NEON_STRINGOPS_TRUE,
2166 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2167 tune_params::SCHED_AUTOPREF_OFF
2170 const struct tune_params arm_cortex_a73_tune =
2172 &cortexa57_extra_costs,
2173 NULL, /* Sched adj cost. */
2174 arm_default_branch_cost,
2175 &arm_default_vec_cost, /* Vectorizer costs. */
2176 1, /* Constant limit. */
2177 2, /* Max cond insns. */
2178 8, /* Memset max inline. */
2179 2, /* Issue rate. */
2180 ARM_PREFETCH_NOT_BENEFICIAL,
2181 tune_params::PREF_CONST_POOL_FALSE,
2182 tune_params::PREF_LDRD_TRUE,
2183 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2184 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2185 tune_params::DISPARAGE_FLAGS_ALL,
2186 tune_params::PREF_NEON_64_FALSE,
2187 tune_params::PREF_NEON_STRINGOPS_TRUE,
2188 FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2189 tune_params::SCHED_AUTOPREF_FULL
2192 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2193 cycle to execute each. An LDR from the constant pool also takes two cycles
2194 to execute, but mildly increases pipelining opportunity (consecutive
2195 loads/stores can be pipelined together, saving one cycle), and may also
2196 improve icache utilisation. Hence we prefer the constant pool for such
2197 processors. */
2199 const struct tune_params arm_v7m_tune =
2201 &v7m_extra_costs,
2202 NULL, /* Sched adj cost. */
2203 arm_cortex_m_branch_cost,
2204 &arm_default_vec_cost,
2205 1, /* Constant limit. */
2206 2, /* Max cond insns. */
2207 8, /* Memset max inline. */
2208 1, /* Issue rate. */
2209 ARM_PREFETCH_NOT_BENEFICIAL,
2210 tune_params::PREF_CONST_POOL_TRUE,
2211 tune_params::PREF_LDRD_FALSE,
2212 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2213 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2214 tune_params::DISPARAGE_FLAGS_NEITHER,
2215 tune_params::PREF_NEON_64_FALSE,
2216 tune_params::PREF_NEON_STRINGOPS_FALSE,
2217 tune_params::FUSE_NOTHING,
2218 tune_params::SCHED_AUTOPREF_OFF
2221 /* Cortex-M7 tuning. */
2223 const struct tune_params arm_cortex_m7_tune =
2225 &v7m_extra_costs,
2226 NULL, /* Sched adj cost. */
2227 arm_cortex_m7_branch_cost,
2228 &arm_default_vec_cost,
2229 0, /* Constant limit. */
2230 1, /* Max cond insns. */
2231 8, /* Memset max inline. */
2232 2, /* Issue rate. */
2233 ARM_PREFETCH_NOT_BENEFICIAL,
2234 tune_params::PREF_CONST_POOL_TRUE,
2235 tune_params::PREF_LDRD_FALSE,
2236 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2237 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2238 tune_params::DISPARAGE_FLAGS_NEITHER,
2239 tune_params::PREF_NEON_64_FALSE,
2240 tune_params::PREF_NEON_STRINGOPS_FALSE,
2241 tune_params::FUSE_NOTHING,
2242 tune_params::SCHED_AUTOPREF_OFF
2245 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2246 arm_v6t2_tune. It is used for cortex-m0, cortex-m1, cortex-m0plus and
2247 cortex-m23. */
2248 const struct tune_params arm_v6m_tune =
2250 &generic_extra_costs, /* Insn extra costs. */
2251 NULL, /* Sched adj cost. */
2252 arm_default_branch_cost,
2253 &arm_default_vec_cost, /* Vectorizer costs. */
2254 1, /* Constant limit. */
2255 5, /* Max cond insns. */
2256 8, /* Memset max inline. */
2257 1, /* Issue rate. */
2258 ARM_PREFETCH_NOT_BENEFICIAL,
2259 tune_params::PREF_CONST_POOL_FALSE,
2260 tune_params::PREF_LDRD_FALSE,
2261 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2262 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2263 tune_params::DISPARAGE_FLAGS_NEITHER,
2264 tune_params::PREF_NEON_64_FALSE,
2265 tune_params::PREF_NEON_STRINGOPS_FALSE,
2266 tune_params::FUSE_NOTHING,
2267 tune_params::SCHED_AUTOPREF_OFF
2270 const struct tune_params arm_fa726te_tune =
2272 &generic_extra_costs, /* Insn extra costs. */
2273 fa726te_sched_adjust_cost,
2274 arm_default_branch_cost,
2275 &arm_default_vec_cost,
2276 1, /* Constant limit. */
2277 5, /* Max cond insns. */
2278 8, /* Memset max inline. */
2279 2, /* Issue rate. */
2280 ARM_PREFETCH_NOT_BENEFICIAL,
2281 tune_params::PREF_CONST_POOL_TRUE,
2282 tune_params::PREF_LDRD_FALSE,
2283 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2284 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2285 tune_params::DISPARAGE_FLAGS_NEITHER,
2286 tune_params::PREF_NEON_64_FALSE,
2287 tune_params::PREF_NEON_STRINGOPS_FALSE,
2288 tune_params::FUSE_NOTHING,
2289 tune_params::SCHED_AUTOPREF_OFF
2292 /* Auto-generated CPU, FPU and architecture tables. */
2293 #include "arm-cpu-data.h"
2295 /* The name of the preprocessor macro to define for this architecture. PROFILE
2296 is replaced by the architecture name (eg. 8A) in arm_option_override () and
2297 is thus chosen to be big enough to hold the longest architecture name. */
2299 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2301 /* Supported TLS relocations. */
2303 enum tls_reloc {
2304 TLS_GD32,
2305 TLS_LDM32,
2306 TLS_LDO32,
2307 TLS_IE32,
2308 TLS_LE32,
2309 TLS_DESCSEQ /* GNU scheme */
2312 /* The maximum number of insns to be used when loading a constant. */
2313 inline static int
2314 arm_constant_limit (bool size_p)
2316 return size_p ? 1 : current_tune->constant_limit;
2319 /* Emit an insn that's a simple single-set. Both the operands must be known
2320 to be valid. */
2321 inline static rtx_insn *
2322 emit_set_insn (rtx x, rtx y)
2324 return emit_insn (gen_rtx_SET (x, y));
2327 /* Return the number of bits set in VALUE. */
2328 static unsigned
2329 bit_count (unsigned long value)
2331 unsigned long count = 0;
2333 while (value)
2335 count++;
2336 value &= value - 1; /* Clear the least-significant set bit. */
2339 return count;
2342 /* Return the number of bits set in BMAP. */
2343 static unsigned
2344 bitmap_popcount (const sbitmap bmap)
2346 unsigned int count = 0;
2347 unsigned int n = 0;
2348 sbitmap_iterator sbi;
2350 EXECUTE_IF_SET_IN_BITMAP (bmap, 0, n, sbi)
2351 count++;
2352 return count;
2355 typedef struct
2357 machine_mode mode;
2358 const char *name;
2359 } arm_fixed_mode_set;
2361 /* A small helper for setting fixed-point library libfuncs. */
2363 static void
2364 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2365 const char *funcname, const char *modename,
2366 int num_suffix)
2368 char buffer[50];
2370 if (num_suffix == 0)
2371 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2372 else
2373 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2375 set_optab_libfunc (optable, mode, buffer);
2378 static void
2379 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2380 machine_mode from, const char *funcname,
2381 const char *toname, const char *fromname)
2383 char buffer[50];
2384 const char *maybe_suffix_2 = "";
2386 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2387 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2388 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2389 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2390 maybe_suffix_2 = "2";
2392 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2393 maybe_suffix_2);
2395 set_conv_libfunc (optable, to, from, buffer);
2398 /* Set up library functions unique to ARM. */
2400 static void
2401 arm_init_libfuncs (void)
2403 /* For Linux, we have access to kernel support for atomic operations. */
2404 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2405 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2407 /* There are no special library functions unless we are using the
2408 ARM BPABI. */
2409 if (!TARGET_BPABI)
2410 return;
2412 /* The functions below are described in Section 4 of the "Run-Time
2413 ABI for the ARM architecture", Version 1.0. */
2415 /* Double-precision floating-point arithmetic. Table 2. */
2416 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2417 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2418 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2419 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2420 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2422 /* Double-precision comparisons. Table 3. */
2423 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2424 set_optab_libfunc (ne_optab, DFmode, NULL);
2425 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2426 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2427 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2428 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2429 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2431 /* Single-precision floating-point arithmetic. Table 4. */
2432 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2433 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2434 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2435 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2436 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2438 /* Single-precision comparisons. Table 5. */
2439 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2440 set_optab_libfunc (ne_optab, SFmode, NULL);
2441 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2442 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2443 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2444 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2445 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2447 /* Floating-point to integer conversions. Table 6. */
2448 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2449 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2450 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2451 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2452 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2453 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2454 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2455 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2457 /* Conversions between floating types. Table 7. */
2458 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2459 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2461 /* Integer to floating-point conversions. Table 8. */
2462 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2463 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2464 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2465 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2466 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2467 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2468 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2469 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2471 /* Long long. Table 9. */
2472 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2473 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2474 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2475 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2476 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2477 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2478 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2479 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2481 /* Integer (32/32->32) division. \S 4.3.1. */
2482 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2483 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2485 /* The divmod functions are designed so that they can be used for
2486 plain division, even though they return both the quotient and the
2487 remainder. The quotient is returned in the usual location (i.e.,
2488 r0 for SImode, {r0, r1} for DImode), just as would be expected
2489 for an ordinary division routine. Because the AAPCS calling
2490 conventions specify that all of { r0, r1, r2, r3 } are
2491 callee-saved registers, there is no need to tell the compiler
2492 explicitly that those registers are clobbered by these
2493 routines. */
2494 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2495 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2497 /* For SImode division the ABI provides div-without-mod routines,
2498 which are faster. */
2499 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2500 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2502 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2503 divmod libcalls instead. */
2504 set_optab_libfunc (smod_optab, DImode, NULL);
2505 set_optab_libfunc (umod_optab, DImode, NULL);
2506 set_optab_libfunc (smod_optab, SImode, NULL);
2507 set_optab_libfunc (umod_optab, SImode, NULL);
2509 /* Half-precision float operations. The compiler handles all operations
2510 with NULL libfuncs by converting the SFmode. */
2511 switch (arm_fp16_format)
2513 case ARM_FP16_FORMAT_IEEE:
2514 case ARM_FP16_FORMAT_ALTERNATIVE:
2516 /* Conversions. */
2517 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2518 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2519 ? "__gnu_f2h_ieee"
2520 : "__gnu_f2h_alternative"));
2521 set_conv_libfunc (sext_optab, SFmode, HFmode,
2522 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2523 ? "__gnu_h2f_ieee"
2524 : "__gnu_h2f_alternative"));
2526 set_conv_libfunc (trunc_optab, HFmode, DFmode,
2527 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2528 ? "__gnu_d2h_ieee"
2529 : "__gnu_d2h_alternative"));
2531 /* Arithmetic. */
2532 set_optab_libfunc (add_optab, HFmode, NULL);
2533 set_optab_libfunc (sdiv_optab, HFmode, NULL);
2534 set_optab_libfunc (smul_optab, HFmode, NULL);
2535 set_optab_libfunc (neg_optab, HFmode, NULL);
2536 set_optab_libfunc (sub_optab, HFmode, NULL);
2538 /* Comparisons. */
2539 set_optab_libfunc (eq_optab, HFmode, NULL);
2540 set_optab_libfunc (ne_optab, HFmode, NULL);
2541 set_optab_libfunc (lt_optab, HFmode, NULL);
2542 set_optab_libfunc (le_optab, HFmode, NULL);
2543 set_optab_libfunc (ge_optab, HFmode, NULL);
2544 set_optab_libfunc (gt_optab, HFmode, NULL);
2545 set_optab_libfunc (unord_optab, HFmode, NULL);
2546 break;
2548 default:
2549 break;
2552 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2554 const arm_fixed_mode_set fixed_arith_modes[] =
2556 { E_QQmode, "qq" },
2557 { E_UQQmode, "uqq" },
2558 { E_HQmode, "hq" },
2559 { E_UHQmode, "uhq" },
2560 { E_SQmode, "sq" },
2561 { E_USQmode, "usq" },
2562 { E_DQmode, "dq" },
2563 { E_UDQmode, "udq" },
2564 { E_TQmode, "tq" },
2565 { E_UTQmode, "utq" },
2566 { E_HAmode, "ha" },
2567 { E_UHAmode, "uha" },
2568 { E_SAmode, "sa" },
2569 { E_USAmode, "usa" },
2570 { E_DAmode, "da" },
2571 { E_UDAmode, "uda" },
2572 { E_TAmode, "ta" },
2573 { E_UTAmode, "uta" }
2575 const arm_fixed_mode_set fixed_conv_modes[] =
2577 { E_QQmode, "qq" },
2578 { E_UQQmode, "uqq" },
2579 { E_HQmode, "hq" },
2580 { E_UHQmode, "uhq" },
2581 { E_SQmode, "sq" },
2582 { E_USQmode, "usq" },
2583 { E_DQmode, "dq" },
2584 { E_UDQmode, "udq" },
2585 { E_TQmode, "tq" },
2586 { E_UTQmode, "utq" },
2587 { E_HAmode, "ha" },
2588 { E_UHAmode, "uha" },
2589 { E_SAmode, "sa" },
2590 { E_USAmode, "usa" },
2591 { E_DAmode, "da" },
2592 { E_UDAmode, "uda" },
2593 { E_TAmode, "ta" },
2594 { E_UTAmode, "uta" },
2595 { E_QImode, "qi" },
2596 { E_HImode, "hi" },
2597 { E_SImode, "si" },
2598 { E_DImode, "di" },
2599 { E_TImode, "ti" },
2600 { E_SFmode, "sf" },
2601 { E_DFmode, "df" }
2603 unsigned int i, j;
2605 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2607 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2608 "add", fixed_arith_modes[i].name, 3);
2609 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2610 "ssadd", fixed_arith_modes[i].name, 3);
2611 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2612 "usadd", fixed_arith_modes[i].name, 3);
2613 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2614 "sub", fixed_arith_modes[i].name, 3);
2615 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2616 "sssub", fixed_arith_modes[i].name, 3);
2617 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2618 "ussub", fixed_arith_modes[i].name, 3);
2619 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2620 "mul", fixed_arith_modes[i].name, 3);
2621 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2622 "ssmul", fixed_arith_modes[i].name, 3);
2623 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2624 "usmul", fixed_arith_modes[i].name, 3);
2625 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2626 "div", fixed_arith_modes[i].name, 3);
2627 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2628 "udiv", fixed_arith_modes[i].name, 3);
2629 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2630 "ssdiv", fixed_arith_modes[i].name, 3);
2631 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2632 "usdiv", fixed_arith_modes[i].name, 3);
2633 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2634 "neg", fixed_arith_modes[i].name, 2);
2635 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2636 "ssneg", fixed_arith_modes[i].name, 2);
2637 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2638 "usneg", fixed_arith_modes[i].name, 2);
2639 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2640 "ashl", fixed_arith_modes[i].name, 3);
2641 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2642 "ashr", fixed_arith_modes[i].name, 3);
2643 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2644 "lshr", fixed_arith_modes[i].name, 3);
2645 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2646 "ssashl", fixed_arith_modes[i].name, 3);
2647 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2648 "usashl", fixed_arith_modes[i].name, 3);
2649 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2650 "cmp", fixed_arith_modes[i].name, 2);
2653 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2654 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2656 if (i == j
2657 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2658 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2659 continue;
2661 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2662 fixed_conv_modes[j].mode, "fract",
2663 fixed_conv_modes[i].name,
2664 fixed_conv_modes[j].name);
2665 arm_set_fixed_conv_libfunc (satfract_optab,
2666 fixed_conv_modes[i].mode,
2667 fixed_conv_modes[j].mode, "satfract",
2668 fixed_conv_modes[i].name,
2669 fixed_conv_modes[j].name);
2670 arm_set_fixed_conv_libfunc (fractuns_optab,
2671 fixed_conv_modes[i].mode,
2672 fixed_conv_modes[j].mode, "fractuns",
2673 fixed_conv_modes[i].name,
2674 fixed_conv_modes[j].name);
2675 arm_set_fixed_conv_libfunc (satfractuns_optab,
2676 fixed_conv_modes[i].mode,
2677 fixed_conv_modes[j].mode, "satfractuns",
2678 fixed_conv_modes[i].name,
2679 fixed_conv_modes[j].name);
2683 if (TARGET_AAPCS_BASED)
2684 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2687 /* On AAPCS systems, this is the "struct __va_list". */
2688 static GTY(()) tree va_list_type;
2690 /* Return the type to use as __builtin_va_list. */
2691 static tree
2692 arm_build_builtin_va_list (void)
2694 tree va_list_name;
2695 tree ap_field;
2697 if (!TARGET_AAPCS_BASED)
2698 return std_build_builtin_va_list ();
2700 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2701 defined as:
2703 struct __va_list
2705 void *__ap;
2708 The C Library ABI further reinforces this definition in \S
2709 4.1.
2711 We must follow this definition exactly. The structure tag
2712 name is visible in C++ mangled names, and thus forms a part
2713 of the ABI. The field name may be used by people who
2714 #include <stdarg.h>. */
2715 /* Create the type. */
2716 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2717 /* Give it the required name. */
2718 va_list_name = build_decl (BUILTINS_LOCATION,
2719 TYPE_DECL,
2720 get_identifier ("__va_list"),
2721 va_list_type);
2722 DECL_ARTIFICIAL (va_list_name) = 1;
2723 TYPE_NAME (va_list_type) = va_list_name;
2724 TYPE_STUB_DECL (va_list_type) = va_list_name;
2725 /* Create the __ap field. */
2726 ap_field = build_decl (BUILTINS_LOCATION,
2727 FIELD_DECL,
2728 get_identifier ("__ap"),
2729 ptr_type_node);
2730 DECL_ARTIFICIAL (ap_field) = 1;
2731 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2732 TYPE_FIELDS (va_list_type) = ap_field;
2733 /* Compute its layout. */
2734 layout_type (va_list_type);
2736 return va_list_type;
2739 /* Return an expression of type "void *" pointing to the next
2740 available argument in a variable-argument list. VALIST is the
2741 user-level va_list object, of type __builtin_va_list. */
2742 static tree
2743 arm_extract_valist_ptr (tree valist)
2745 if (TREE_TYPE (valist) == error_mark_node)
2746 return error_mark_node;
2748 /* On an AAPCS target, the pointer is stored within "struct
2749 va_list". */
2750 if (TARGET_AAPCS_BASED)
2752 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2753 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2754 valist, ap_field, NULL_TREE);
2757 return valist;
2760 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2761 static void
2762 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2764 valist = arm_extract_valist_ptr (valist);
2765 std_expand_builtin_va_start (valist, nextarg);
2768 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2769 static tree
2770 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2771 gimple_seq *post_p)
2773 valist = arm_extract_valist_ptr (valist);
2774 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2777 /* Check any incompatible options that the user has specified. */
2778 static void
2779 arm_option_check_internal (struct gcc_options *opts)
2781 int flags = opts->x_target_flags;
2783 /* iWMMXt and NEON are incompatible. */
2784 if (TARGET_IWMMXT
2785 && bitmap_bit_p (arm_active_target.isa, isa_bit_neon))
2786 error ("iWMMXt and NEON are incompatible");
2788 /* Make sure that the processor choice does not conflict with any of the
2789 other command line choices. */
2790 if (TARGET_ARM_P (flags)
2791 && !bitmap_bit_p (arm_active_target.isa, isa_bit_notm))
2792 error ("target CPU does not support ARM mode");
2794 /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet. */
2795 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2796 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2798 if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2799 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2801 /* If this target is normally configured to use APCS frames, warn if they
2802 are turned off and debugging is turned on. */
2803 if (TARGET_ARM_P (flags)
2804 && write_symbols != NO_DEBUG
2805 && !TARGET_APCS_FRAME
2806 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2807 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2809 /* iWMMXt unsupported under Thumb mode. */
2810 if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2811 error ("iWMMXt unsupported under Thumb mode");
2813 if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
2814 error ("can not use -mtp=cp15 with 16-bit Thumb");
2816 if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
2818 error ("RTP PIC is incompatible with Thumb");
2819 flag_pic = 0;
2822 /* We only support -mpure-code and -mslow-flash-data on M-profile targets
2823 with MOVT. */
2824 if ((target_pure_code || target_slow_flash_data)
2825 && (!TARGET_HAVE_MOVT || arm_arch_notm || flag_pic || TARGET_NEON))
2827 const char *flag = (target_pure_code ? "-mpure-code" :
2828 "-mslow-flash-data");
2829 error ("%s only supports non-pic code on M-profile targets with the "
2830 "MOVT instruction", flag);
2835 /* Recompute the global settings depending on target attribute options. */
2837 static void
2838 arm_option_params_internal (void)
2840 /* If we are not using the default (ARM mode) section anchor offset
2841 ranges, then set the correct ranges now. */
2842 if (TARGET_THUMB1)
2844 /* Thumb-1 LDR instructions cannot have negative offsets.
2845 Permissible positive offset ranges are 5-bit (for byte loads),
2846 6-bit (for halfword loads), or 7-bit (for word loads).
2847 Empirical results suggest a 7-bit anchor range gives the best
2848 overall code size. */
2849 targetm.min_anchor_offset = 0;
2850 targetm.max_anchor_offset = 127;
2852 else if (TARGET_THUMB2)
2854 /* The minimum is set such that the total size of the block
2855 for a particular anchor is 248 + 1 + 4095 bytes, which is
2856 divisible by eight, ensuring natural spacing of anchors. */
2857 targetm.min_anchor_offset = -248;
2858 targetm.max_anchor_offset = 4095;
2860 else
2862 targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
2863 targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
2866 if (optimize_size)
2868 /* If optimizing for size, bump the number of instructions that we
2869 are prepared to conditionally execute (even on a StrongARM). */
2870 max_insns_skipped = 6;
2872 /* For THUMB2, we limit the conditional sequence to one IT block. */
2873 if (TARGET_THUMB2)
2874 max_insns_skipped = arm_restrict_it ? 1 : 4;
2876 else
2877 /* When -mrestrict-it is in use tone down the if-conversion. */
2878 max_insns_skipped = (TARGET_THUMB2 && arm_restrict_it)
2879 ? 1 : current_tune->max_insns_skipped;
2882 /* True if -mflip-thumb should next add an attribute for the default
2883 mode, false if it should next add an attribute for the opposite mode. */
2884 static GTY(()) bool thumb_flipper;
2886 /* Options after initial target override. */
2887 static GTY(()) tree init_optimize;
2889 static void
2890 arm_override_options_after_change_1 (struct gcc_options *opts)
2892 if (opts->x_align_functions <= 0)
2893 opts->x_align_functions = TARGET_THUMB_P (opts->x_target_flags)
2894 && opts->x_optimize_size ? 2 : 4;
2897 /* Implement targetm.override_options_after_change. */
2899 static void
2900 arm_override_options_after_change (void)
2902 arm_configure_build_target (&arm_active_target,
2903 TREE_TARGET_OPTION (target_option_default_node),
2904 &global_options_set, false);
2906 arm_override_options_after_change_1 (&global_options);
2909 /* Implement TARGET_OPTION_SAVE. */
2910 static void
2911 arm_option_save (struct cl_target_option *ptr, struct gcc_options *opts)
2913 ptr->x_arm_arch_string = opts->x_arm_arch_string;
2914 ptr->x_arm_cpu_string = opts->x_arm_cpu_string;
2915 ptr->x_arm_tune_string = opts->x_arm_tune_string;
2918 /* Implement TARGET_OPTION_RESTORE. */
2919 static void
2920 arm_option_restore (struct gcc_options *opts, struct cl_target_option *ptr)
2922 opts->x_arm_arch_string = ptr->x_arm_arch_string;
2923 opts->x_arm_cpu_string = ptr->x_arm_cpu_string;
2924 opts->x_arm_tune_string = ptr->x_arm_tune_string;
2925 arm_configure_build_target (&arm_active_target, ptr, &global_options_set,
2926 false);
2929 /* Reset options between modes that the user has specified. */
2930 static void
2931 arm_option_override_internal (struct gcc_options *opts,
2932 struct gcc_options *opts_set)
2934 arm_override_options_after_change_1 (opts);
2936 if (TARGET_INTERWORK && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
2938 /* The default is to enable interworking, so this warning message would
2939 be confusing to users who have just compiled with, eg, -march=armv3. */
2940 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
2941 opts->x_target_flags &= ~MASK_INTERWORK;
2944 if (TARGET_THUMB_P (opts->x_target_flags)
2945 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
2947 warning (0, "target CPU does not support THUMB instructions");
2948 opts->x_target_flags &= ~MASK_THUMB;
2951 if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
2953 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2954 opts->x_target_flags &= ~MASK_APCS_FRAME;
2957 /* Callee super interworking implies thumb interworking. Adding
2958 this to the flags here simplifies the logic elsewhere. */
2959 if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
2960 opts->x_target_flags |= MASK_INTERWORK;
2962 /* need to remember initial values so combinaisons of options like
2963 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
2964 cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
2966 if (! opts_set->x_arm_restrict_it)
2967 opts->x_arm_restrict_it = arm_arch8;
2969 /* ARM execution state and M profile don't have [restrict] IT. */
2970 if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
2971 opts->x_arm_restrict_it = 0;
2973 /* Enable -munaligned-access by default for
2974 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
2975 i.e. Thumb2 and ARM state only.
2976 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2977 - ARMv8 architecture-base processors.
2979 Disable -munaligned-access by default for
2980 - all pre-ARMv6 architecture-based processors
2981 - ARMv6-M architecture-based processors
2982 - ARMv8-M Baseline processors. */
2984 if (! opts_set->x_unaligned_access)
2986 opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
2987 && arm_arch6 && (arm_arch_notm || arm_arch7));
2989 else if (opts->x_unaligned_access == 1
2990 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
2992 warning (0, "target CPU does not support unaligned accesses");
2993 opts->x_unaligned_access = 0;
2996 /* Don't warn since it's on by default in -O2. */
2997 if (TARGET_THUMB1_P (opts->x_target_flags))
2998 opts->x_flag_schedule_insns = 0;
2999 else
3000 opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
3002 /* Disable shrink-wrap when optimizing function for size, since it tends to
3003 generate additional returns. */
3004 if (optimize_function_for_size_p (cfun)
3005 && TARGET_THUMB2_P (opts->x_target_flags))
3006 opts->x_flag_shrink_wrap = false;
3007 else
3008 opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
3010 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3011 - epilogue_insns - does not accurately model the corresponding insns
3012 emitted in the asm file. In particular, see the comment in thumb_exit
3013 'Find out how many of the (return) argument registers we can corrupt'.
3014 As a consequence, the epilogue may clobber registers without fipa-ra
3015 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3016 TODO: Accurately model clobbers for epilogue_insns and reenable
3017 fipa-ra. */
3018 if (TARGET_THUMB1_P (opts->x_target_flags))
3019 opts->x_flag_ipa_ra = 0;
3020 else
3021 opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3023 /* Thumb2 inline assembly code should always use unified syntax.
3024 This will apply to ARM and Thumb1 eventually. */
3025 opts->x_inline_asm_unified = TARGET_THUMB2_P (opts->x_target_flags);
3027 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3028 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3029 #endif
3032 static sbitmap isa_all_fpubits;
3033 static sbitmap isa_quirkbits;
3035 /* Configure a build target TARGET from the user-specified options OPTS and
3036 OPTS_SET. If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3037 architecture have been specified, but the two are not identical. */
3038 void
3039 arm_configure_build_target (struct arm_build_target *target,
3040 struct cl_target_option *opts,
3041 struct gcc_options *opts_set,
3042 bool warn_compatible)
3044 const cpu_option *arm_selected_tune = NULL;
3045 const arch_option *arm_selected_arch = NULL;
3046 const cpu_option *arm_selected_cpu = NULL;
3047 const arm_fpu_desc *arm_selected_fpu = NULL;
3048 const char *tune_opts = NULL;
3049 const char *arch_opts = NULL;
3050 const char *cpu_opts = NULL;
3052 bitmap_clear (target->isa);
3053 target->core_name = NULL;
3054 target->arch_name = NULL;
3056 if (opts_set->x_arm_arch_string)
3058 arm_selected_arch = arm_parse_arch_option_name (all_architectures,
3059 "-march",
3060 opts->x_arm_arch_string);
3061 arch_opts = strchr (opts->x_arm_arch_string, '+');
3064 if (opts_set->x_arm_cpu_string)
3066 arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "-mcpu",
3067 opts->x_arm_cpu_string);
3068 cpu_opts = strchr (opts->x_arm_cpu_string, '+');
3069 arm_selected_tune = arm_selected_cpu;
3070 /* If taking the tuning from -mcpu, we don't need to rescan the
3071 options for tuning. */
3074 if (opts_set->x_arm_tune_string)
3076 arm_selected_tune = arm_parse_cpu_option_name (all_cores, "-mtune",
3077 opts->x_arm_tune_string);
3078 tune_opts = strchr (opts->x_arm_tune_string, '+');
3081 if (arm_selected_arch)
3083 arm_initialize_isa (target->isa, arm_selected_arch->common.isa_bits);
3084 arm_parse_option_features (target->isa, &arm_selected_arch->common,
3085 arch_opts);
3087 if (arm_selected_cpu)
3089 auto_sbitmap cpu_isa (isa_num_bits);
3090 auto_sbitmap isa_delta (isa_num_bits);
3092 arm_initialize_isa (cpu_isa, arm_selected_cpu->common.isa_bits);
3093 arm_parse_option_features (cpu_isa, &arm_selected_cpu->common,
3094 cpu_opts);
3095 bitmap_xor (isa_delta, cpu_isa, target->isa);
3096 /* Ignore any bits that are quirk bits. */
3097 bitmap_and_compl (isa_delta, isa_delta, isa_quirkbits);
3098 /* Ignore (for now) any bits that might be set by -mfpu. */
3099 bitmap_and_compl (isa_delta, isa_delta, isa_all_fpubits);
3101 if (!bitmap_empty_p (isa_delta))
3103 if (warn_compatible)
3104 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
3105 arm_selected_cpu->common.name,
3106 arm_selected_arch->common.name);
3107 /* -march wins for code generation.
3108 -mcpu wins for default tuning. */
3109 if (!arm_selected_tune)
3110 arm_selected_tune = arm_selected_cpu;
3112 arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3113 target->arch_name = arm_selected_arch->common.name;
3115 else
3117 /* Architecture and CPU are essentially the same.
3118 Prefer the CPU setting. */
3119 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3120 target->core_name = arm_selected_cpu->common.name;
3121 /* Copy the CPU's capabilities, so that we inherit the
3122 appropriate extensions and quirks. */
3123 bitmap_copy (target->isa, cpu_isa);
3126 else
3128 /* Pick a CPU based on the architecture. */
3129 arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3130 target->arch_name = arm_selected_arch->common.name;
3131 /* Note: target->core_name is left unset in this path. */
3134 else if (arm_selected_cpu)
3136 target->core_name = arm_selected_cpu->common.name;
3137 arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3138 arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3139 cpu_opts);
3140 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3142 /* If the user did not specify a processor or architecture, choose
3143 one for them. */
3144 else
3146 const cpu_option *sel;
3147 auto_sbitmap sought_isa (isa_num_bits);
3148 bitmap_clear (sought_isa);
3149 auto_sbitmap default_isa (isa_num_bits);
3151 arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "default CPU",
3152 TARGET_CPU_DEFAULT);
3153 cpu_opts = strchr (TARGET_CPU_DEFAULT, '+');
3154 gcc_assert (arm_selected_cpu->common.name);
3156 /* RWE: All of the selection logic below (to the end of this
3157 'if' clause) looks somewhat suspect. It appears to be mostly
3158 there to support forcing thumb support when the default CPU
3159 does not have thumb (somewhat dubious in terms of what the
3160 user might be expecting). I think it should be removed once
3161 support for the pre-thumb era cores is removed. */
3162 sel = arm_selected_cpu;
3163 arm_initialize_isa (default_isa, sel->common.isa_bits);
3164 arm_parse_option_features (default_isa, &arm_selected_cpu->common,
3165 cpu_opts);
3167 /* Now check to see if the user has specified any command line
3168 switches that require certain abilities from the cpu. */
3170 if (TARGET_INTERWORK || TARGET_THUMB)
3172 bitmap_set_bit (sought_isa, isa_bit_thumb);
3173 bitmap_set_bit (sought_isa, isa_bit_mode32);
3175 /* There are no ARM processors that support both APCS-26 and
3176 interworking. Therefore we forcibly remove MODE26 from
3177 from the isa features here (if it was set), so that the
3178 search below will always be able to find a compatible
3179 processor. */
3180 bitmap_clear_bit (default_isa, isa_bit_mode26);
3183 /* If there are such requirements and the default CPU does not
3184 satisfy them, we need to run over the complete list of
3185 cores looking for one that is satisfactory. */
3186 if (!bitmap_empty_p (sought_isa)
3187 && !bitmap_subset_p (sought_isa, default_isa))
3189 auto_sbitmap candidate_isa (isa_num_bits);
3190 /* We're only interested in a CPU with at least the
3191 capabilities of the default CPU and the required
3192 additional features. */
3193 bitmap_ior (default_isa, default_isa, sought_isa);
3195 /* Try to locate a CPU type that supports all of the abilities
3196 of the default CPU, plus the extra abilities requested by
3197 the user. */
3198 for (sel = all_cores; sel->common.name != NULL; sel++)
3200 arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3201 /* An exact match? */
3202 if (bitmap_equal_p (default_isa, candidate_isa))
3203 break;
3206 if (sel->common.name == NULL)
3208 unsigned current_bit_count = isa_num_bits;
3209 const cpu_option *best_fit = NULL;
3211 /* Ideally we would like to issue an error message here
3212 saying that it was not possible to find a CPU compatible
3213 with the default CPU, but which also supports the command
3214 line options specified by the programmer, and so they
3215 ought to use the -mcpu=<name> command line option to
3216 override the default CPU type.
3218 If we cannot find a CPU that has exactly the
3219 characteristics of the default CPU and the given
3220 command line options we scan the array again looking
3221 for a best match. The best match must have at least
3222 the capabilities of the perfect match. */
3223 for (sel = all_cores; sel->common.name != NULL; sel++)
3225 arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3227 if (bitmap_subset_p (default_isa, candidate_isa))
3229 unsigned count;
3231 bitmap_and_compl (candidate_isa, candidate_isa,
3232 default_isa);
3233 count = bitmap_popcount (candidate_isa);
3235 if (count < current_bit_count)
3237 best_fit = sel;
3238 current_bit_count = count;
3242 gcc_assert (best_fit);
3243 sel = best_fit;
3246 arm_selected_cpu = sel;
3249 /* Now we know the CPU, we can finally initialize the target
3250 structure. */
3251 target->core_name = arm_selected_cpu->common.name;
3252 arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3253 arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3254 cpu_opts);
3255 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3258 gcc_assert (arm_selected_cpu);
3259 gcc_assert (arm_selected_arch);
3261 if (opts->x_arm_fpu_index != TARGET_FPU_auto)
3263 arm_selected_fpu = &all_fpus[opts->x_arm_fpu_index];
3264 auto_sbitmap fpu_bits (isa_num_bits);
3266 arm_initialize_isa (fpu_bits, arm_selected_fpu->isa_bits);
3267 bitmap_and_compl (target->isa, target->isa, isa_all_fpubits);
3268 bitmap_ior (target->isa, target->isa, fpu_bits);
3271 if (!arm_selected_tune)
3272 arm_selected_tune = arm_selected_cpu;
3273 else /* Validate the features passed to -mtune. */
3274 arm_parse_option_features (NULL, &arm_selected_tune->common, tune_opts);
3276 const cpu_tune *tune_data = &all_tunes[arm_selected_tune - all_cores];
3278 /* Finish initializing the target structure. */
3279 target->arch_pp_name = arm_selected_arch->arch;
3280 target->base_arch = arm_selected_arch->base_arch;
3281 target->profile = arm_selected_arch->profile;
3283 target->tune_flags = tune_data->tune_flags;
3284 target->tune = tune_data->tune;
3285 target->tune_core = tune_data->scheduler;
3288 /* Fix up any incompatible options that the user has specified. */
3289 static void
3290 arm_option_override (void)
3292 static const enum isa_feature fpu_bitlist[]
3293 = { ISA_ALL_FPU_INTERNAL, isa_nobit };
3294 static const enum isa_feature quirk_bitlist[] = { ISA_ALL_QUIRKS, isa_nobit};
3295 cl_target_option opts;
3297 isa_quirkbits = sbitmap_alloc (isa_num_bits);
3298 arm_initialize_isa (isa_quirkbits, quirk_bitlist);
3300 isa_all_fpubits = sbitmap_alloc (isa_num_bits);
3301 arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
3303 arm_active_target.isa = sbitmap_alloc (isa_num_bits);
3305 if (!global_options_set.x_arm_fpu_index)
3307 bool ok;
3308 int fpu_index;
3310 ok = opt_enum_arg_to_value (OPT_mfpu_, FPUTYPE_AUTO, &fpu_index,
3311 CL_TARGET);
3312 gcc_assert (ok);
3313 arm_fpu_index = (enum fpu_type) fpu_index;
3316 cl_target_option_save (&opts, &global_options);
3317 arm_configure_build_target (&arm_active_target, &opts, &global_options_set,
3318 true);
3320 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3321 SUBTARGET_OVERRIDE_OPTIONS;
3322 #endif
3324 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_active_target.arch_pp_name);
3325 arm_base_arch = arm_active_target.base_arch;
3327 arm_tune = arm_active_target.tune_core;
3328 tune_flags = arm_active_target.tune_flags;
3329 current_tune = arm_active_target.tune;
3331 /* TBD: Dwarf info for apcs frame is not handled yet. */
3332 if (TARGET_APCS_FRAME)
3333 flag_shrink_wrap = false;
3335 /* BPABI targets use linker tricks to allow interworking on cores
3336 without thumb support. */
3337 if (TARGET_INTERWORK
3338 && !TARGET_BPABI
3339 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3341 warning (0, "target CPU does not support interworking" );
3342 target_flags &= ~MASK_INTERWORK;
3345 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3347 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3348 target_flags |= MASK_APCS_FRAME;
3351 if (TARGET_POKE_FUNCTION_NAME)
3352 target_flags |= MASK_APCS_FRAME;
3354 if (TARGET_APCS_REENT && flag_pic)
3355 error ("-fpic and -mapcs-reent are incompatible");
3357 if (TARGET_APCS_REENT)
3358 warning (0, "APCS reentrant code not supported. Ignored");
3360 /* Initialize boolean versions of the architectural flags, for use
3361 in the arm.md file. */
3362 arm_arch3m = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv3m);
3363 arm_arch4 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv4);
3364 arm_arch4t = arm_arch4 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3365 arm_arch5 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv5);
3366 arm_arch5e = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv5e);
3367 arm_arch5te = arm_arch5e
3368 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3369 arm_arch6 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv6);
3370 arm_arch6k = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv6k);
3371 arm_arch_notm = bitmap_bit_p (arm_active_target.isa, isa_bit_notm);
3372 arm_arch6m = arm_arch6 && !arm_arch_notm;
3373 arm_arch7 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv7);
3374 arm_arch7em = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv7em);
3375 arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv8);
3376 arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv8_1);
3377 arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv8_2);
3378 arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3379 arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
3380 arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
3381 arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt);
3382 arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2);
3383 arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv);
3384 arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
3385 arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
3386 arm_arch_cmse = bitmap_bit_p (arm_active_target.isa, isa_bit_cmse);
3387 arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16);
3388 arm_arch_lpae = bitmap_bit_p (arm_active_target.isa, isa_bit_lpae);
3389 if (arm_fp16_inst)
3391 if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
3392 error ("selected fp16 options are incompatible");
3393 arm_fp16_format = ARM_FP16_FORMAT_IEEE;
3397 /* Set up some tuning parameters. */
3398 arm_ld_sched = (tune_flags & TF_LDSCHED) != 0;
3399 arm_tune_strongarm = (tune_flags & TF_STRONG) != 0;
3400 arm_tune_wbuf = (tune_flags & TF_WBUF) != 0;
3401 arm_tune_xscale = (tune_flags & TF_XSCALE) != 0;
3402 arm_tune_cortex_a9 = (arm_tune == TARGET_CPU_cortexa9) != 0;
3403 arm_m_profile_small_mul = (tune_flags & TF_SMALLMUL) != 0;
3405 /* And finally, set up some quirks. */
3406 arm_arch_no_volatile_ce
3407 = bitmap_bit_p (arm_active_target.isa, isa_quirk_no_volatile_ce);
3408 arm_arch6kz
3409 = arm_arch6k && bitmap_bit_p (arm_active_target.isa, isa_quirk_ARMv6kz);
3411 /* V5 code we generate is completely interworking capable, so we turn off
3412 TARGET_INTERWORK here to avoid many tests later on. */
3414 /* XXX However, we must pass the right pre-processor defines to CPP
3415 or GLD can get confused. This is a hack. */
3416 if (TARGET_INTERWORK)
3417 arm_cpp_interwork = 1;
3419 if (arm_arch5)
3420 target_flags &= ~MASK_INTERWORK;
3422 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3423 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3425 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3426 error ("iwmmxt abi requires an iwmmxt capable cpu");
3428 /* If soft-float is specified then don't use FPU. */
3429 if (TARGET_SOFT_FLOAT)
3430 arm_fpu_attr = FPU_NONE;
3431 else
3432 arm_fpu_attr = FPU_VFP;
3434 if (TARGET_AAPCS_BASED)
3436 if (TARGET_CALLER_INTERWORKING)
3437 error ("AAPCS does not support -mcaller-super-interworking");
3438 else
3439 if (TARGET_CALLEE_INTERWORKING)
3440 error ("AAPCS does not support -mcallee-super-interworking");
3443 /* __fp16 support currently assumes the core has ldrh. */
3444 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3445 sorry ("__fp16 and no ldrh");
3447 if (TARGET_AAPCS_BASED)
3449 if (arm_abi == ARM_ABI_IWMMXT)
3450 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3451 else if (TARGET_HARD_FLOAT_ABI)
3453 arm_pcs_default = ARM_PCS_AAPCS_VFP;
3454 if (!bitmap_bit_p (arm_active_target.isa, isa_bit_VFPv2))
3455 error ("-mfloat-abi=hard: selected processor lacks an FPU");
3457 else
3458 arm_pcs_default = ARM_PCS_AAPCS;
3460 else
3462 if (arm_float_abi == ARM_FLOAT_ABI_HARD)
3463 sorry ("-mfloat-abi=hard and VFP");
3465 if (arm_abi == ARM_ABI_APCS)
3466 arm_pcs_default = ARM_PCS_APCS;
3467 else
3468 arm_pcs_default = ARM_PCS_ATPCS;
3471 /* For arm2/3 there is no need to do any scheduling if we are doing
3472 software floating-point. */
3473 if (TARGET_SOFT_FLOAT && (tune_flags & TF_NO_MODE32))
3474 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3476 /* Use the cp15 method if it is available. */
3477 if (target_thread_pointer == TP_AUTO)
3479 if (arm_arch6k && !TARGET_THUMB1)
3480 target_thread_pointer = TP_CP15;
3481 else
3482 target_thread_pointer = TP_SOFT;
3485 /* Override the default structure alignment for AAPCS ABI. */
3486 if (!global_options_set.x_arm_structure_size_boundary)
3488 if (TARGET_AAPCS_BASED)
3489 arm_structure_size_boundary = 8;
3491 else
3493 warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3495 if (arm_structure_size_boundary != 8
3496 && arm_structure_size_boundary != 32
3497 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3499 if (ARM_DOUBLEWORD_ALIGN)
3500 warning (0,
3501 "structure size boundary can only be set to 8, 32 or 64");
3502 else
3503 warning (0, "structure size boundary can only be set to 8 or 32");
3504 arm_structure_size_boundary
3505 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3509 if (TARGET_VXWORKS_RTP)
3511 if (!global_options_set.x_arm_pic_data_is_text_relative)
3512 arm_pic_data_is_text_relative = 0;
3514 else if (flag_pic
3515 && !arm_pic_data_is_text_relative
3516 && !(global_options_set.x_target_flags & MASK_SINGLE_PIC_BASE))
3517 /* When text & data segments don't have a fixed displacement, the
3518 intended use is with a single, read only, pic base register.
3519 Unless the user explicitly requested not to do that, set
3520 it. */
3521 target_flags |= MASK_SINGLE_PIC_BASE;
3523 /* If stack checking is disabled, we can use r10 as the PIC register,
3524 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3525 if (flag_pic && TARGET_SINGLE_PIC_BASE)
3527 if (TARGET_VXWORKS_RTP)
3528 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3529 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3532 if (flag_pic && TARGET_VXWORKS_RTP)
3533 arm_pic_register = 9;
3535 if (arm_pic_register_string != NULL)
3537 int pic_register = decode_reg_name (arm_pic_register_string);
3539 if (!flag_pic)
3540 warning (0, "-mpic-register= is useless without -fpic");
3542 /* Prevent the user from choosing an obviously stupid PIC register. */
3543 else if (pic_register < 0 || call_used_regs[pic_register]
3544 || pic_register == HARD_FRAME_POINTER_REGNUM
3545 || pic_register == STACK_POINTER_REGNUM
3546 || pic_register >= PC_REGNUM
3547 || (TARGET_VXWORKS_RTP
3548 && (unsigned int) pic_register != arm_pic_register))
3549 error ("unable to use '%s' for PIC register", arm_pic_register_string);
3550 else
3551 arm_pic_register = pic_register;
3554 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3555 if (fix_cm3_ldrd == 2)
3557 if (bitmap_bit_p (arm_active_target.isa, isa_quirk_cm3_ldrd))
3558 fix_cm3_ldrd = 1;
3559 else
3560 fix_cm3_ldrd = 0;
3563 /* Hot/Cold partitioning is not currently supported, since we can't
3564 handle literal pool placement in that case. */
3565 if (flag_reorder_blocks_and_partition)
3567 inform (input_location,
3568 "-freorder-blocks-and-partition not supported on this architecture");
3569 flag_reorder_blocks_and_partition = 0;
3570 flag_reorder_blocks = 1;
3573 if (flag_pic)
3574 /* Hoisting PIC address calculations more aggressively provides a small,
3575 but measurable, size reduction for PIC code. Therefore, we decrease
3576 the bar for unrestricted expression hoisting to the cost of PIC address
3577 calculation, which is 2 instructions. */
3578 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3579 global_options.x_param_values,
3580 global_options_set.x_param_values);
3582 /* ARM EABI defaults to strict volatile bitfields. */
3583 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3584 && abi_version_at_least(2))
3585 flag_strict_volatile_bitfields = 1;
3587 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3588 have deemed it beneficial (signified by setting
3589 prefetch.num_slots to 1 or more). */
3590 if (flag_prefetch_loop_arrays < 0
3591 && HAVE_prefetch
3592 && optimize >= 3
3593 && current_tune->prefetch.num_slots > 0)
3594 flag_prefetch_loop_arrays = 1;
3596 /* Set up parameters to be used in prefetching algorithm. Do not
3597 override the defaults unless we are tuning for a core we have
3598 researched values for. */
3599 if (current_tune->prefetch.num_slots > 0)
3600 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3601 current_tune->prefetch.num_slots,
3602 global_options.x_param_values,
3603 global_options_set.x_param_values);
3604 if (current_tune->prefetch.l1_cache_line_size >= 0)
3605 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3606 current_tune->prefetch.l1_cache_line_size,
3607 global_options.x_param_values,
3608 global_options_set.x_param_values);
3609 if (current_tune->prefetch.l1_cache_size >= 0)
3610 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3611 current_tune->prefetch.l1_cache_size,
3612 global_options.x_param_values,
3613 global_options_set.x_param_values);
3615 /* Use Neon to perform 64-bits operations rather than core
3616 registers. */
3617 prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3618 if (use_neon_for_64bits == 1)
3619 prefer_neon_for_64bits = true;
3621 /* Use the alternative scheduling-pressure algorithm by default. */
3622 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3623 global_options.x_param_values,
3624 global_options_set.x_param_values);
3626 /* Look through ready list and all of queue for instructions
3627 relevant for L2 auto-prefetcher. */
3628 int param_sched_autopref_queue_depth;
3630 switch (current_tune->sched_autopref)
3632 case tune_params::SCHED_AUTOPREF_OFF:
3633 param_sched_autopref_queue_depth = -1;
3634 break;
3636 case tune_params::SCHED_AUTOPREF_RANK:
3637 param_sched_autopref_queue_depth = 0;
3638 break;
3640 case tune_params::SCHED_AUTOPREF_FULL:
3641 param_sched_autopref_queue_depth = max_insn_queue_index + 1;
3642 break;
3644 default:
3645 gcc_unreachable ();
3648 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
3649 param_sched_autopref_queue_depth,
3650 global_options.x_param_values,
3651 global_options_set.x_param_values);
3653 /* Currently, for slow flash data, we just disable literal pools. We also
3654 disable it for pure-code. */
3655 if (target_slow_flash_data || target_pure_code)
3656 arm_disable_literal_pool = true;
3658 if (use_cmse && !arm_arch_cmse)
3659 error ("target CPU does not support ARMv8-M Security Extensions");
3661 /* Disable scheduling fusion by default if it's not armv7 processor
3662 or doesn't prefer ldrd/strd. */
3663 if (flag_schedule_fusion == 2
3664 && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3665 flag_schedule_fusion = 0;
3667 /* Need to remember initial options before they are overriden. */
3668 init_optimize = build_optimization_node (&global_options);
3670 arm_option_override_internal (&global_options, &global_options_set);
3671 arm_option_check_internal (&global_options);
3672 arm_option_params_internal ();
3674 /* Create the default target_options structure. */
3675 target_option_default_node = target_option_current_node
3676 = build_target_option_node (&global_options);
3678 /* Register global variables with the garbage collector. */
3679 arm_add_gc_roots ();
3681 /* Init initial mode for testing. */
3682 thumb_flipper = TARGET_THUMB;
3685 static void
3686 arm_add_gc_roots (void)
3688 gcc_obstack_init(&minipool_obstack);
3689 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3692 /* A table of known ARM exception types.
3693 For use with the interrupt function attribute. */
3695 typedef struct
3697 const char *const arg;
3698 const unsigned long return_value;
3700 isr_attribute_arg;
3702 static const isr_attribute_arg isr_attribute_args [] =
3704 { "IRQ", ARM_FT_ISR },
3705 { "irq", ARM_FT_ISR },
3706 { "FIQ", ARM_FT_FIQ },
3707 { "fiq", ARM_FT_FIQ },
3708 { "ABORT", ARM_FT_ISR },
3709 { "abort", ARM_FT_ISR },
3710 { "ABORT", ARM_FT_ISR },
3711 { "abort", ARM_FT_ISR },
3712 { "UNDEF", ARM_FT_EXCEPTION },
3713 { "undef", ARM_FT_EXCEPTION },
3714 { "SWI", ARM_FT_EXCEPTION },
3715 { "swi", ARM_FT_EXCEPTION },
3716 { NULL, ARM_FT_NORMAL }
3719 /* Returns the (interrupt) function type of the current
3720 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3722 static unsigned long
3723 arm_isr_value (tree argument)
3725 const isr_attribute_arg * ptr;
3726 const char * arg;
3728 if (!arm_arch_notm)
3729 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3731 /* No argument - default to IRQ. */
3732 if (argument == NULL_TREE)
3733 return ARM_FT_ISR;
3735 /* Get the value of the argument. */
3736 if (TREE_VALUE (argument) == NULL_TREE
3737 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3738 return ARM_FT_UNKNOWN;
3740 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3742 /* Check it against the list of known arguments. */
3743 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3744 if (streq (arg, ptr->arg))
3745 return ptr->return_value;
3747 /* An unrecognized interrupt type. */
3748 return ARM_FT_UNKNOWN;
3751 /* Computes the type of the current function. */
3753 static unsigned long
3754 arm_compute_func_type (void)
3756 unsigned long type = ARM_FT_UNKNOWN;
3757 tree a;
3758 tree attr;
3760 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3762 /* Decide if the current function is volatile. Such functions
3763 never return, and many memory cycles can be saved by not storing
3764 register values that will never be needed again. This optimization
3765 was added to speed up context switching in a kernel application. */
3766 if (optimize > 0
3767 && (TREE_NOTHROW (current_function_decl)
3768 || !(flag_unwind_tables
3769 || (flag_exceptions
3770 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3771 && TREE_THIS_VOLATILE (current_function_decl))
3772 type |= ARM_FT_VOLATILE;
3774 if (cfun->static_chain_decl != NULL)
3775 type |= ARM_FT_NESTED;
3777 attr = DECL_ATTRIBUTES (current_function_decl);
3779 a = lookup_attribute ("naked", attr);
3780 if (a != NULL_TREE)
3781 type |= ARM_FT_NAKED;
3783 a = lookup_attribute ("isr", attr);
3784 if (a == NULL_TREE)
3785 a = lookup_attribute ("interrupt", attr);
3787 if (a == NULL_TREE)
3788 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3789 else
3790 type |= arm_isr_value (TREE_VALUE (a));
3792 if (lookup_attribute ("cmse_nonsecure_entry", attr))
3793 type |= ARM_FT_CMSE_ENTRY;
3795 return type;
3798 /* Returns the type of the current function. */
3800 unsigned long
3801 arm_current_func_type (void)
3803 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3804 cfun->machine->func_type = arm_compute_func_type ();
3806 return cfun->machine->func_type;
3809 bool
3810 arm_allocate_stack_slots_for_args (void)
3812 /* Naked functions should not allocate stack slots for arguments. */
3813 return !IS_NAKED (arm_current_func_type ());
3816 static bool
3817 arm_warn_func_return (tree decl)
3819 /* Naked functions are implemented entirely in assembly, including the
3820 return sequence, so suppress warnings about this. */
3821 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3825 /* Output assembler code for a block containing the constant parts
3826 of a trampoline, leaving space for the variable parts.
3828 On the ARM, (if r8 is the static chain regnum, and remembering that
3829 referencing pc adds an offset of 8) the trampoline looks like:
3830 ldr r8, [pc, #0]
3831 ldr pc, [pc]
3832 .word static chain value
3833 .word function's address
3834 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3836 static void
3837 arm_asm_trampoline_template (FILE *f)
3839 fprintf (f, "\t.syntax unified\n");
3841 if (TARGET_ARM)
3843 fprintf (f, "\t.arm\n");
3844 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3845 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3847 else if (TARGET_THUMB2)
3849 fprintf (f, "\t.thumb\n");
3850 /* The Thumb-2 trampoline is similar to the arm implementation.
3851 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3852 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3853 STATIC_CHAIN_REGNUM, PC_REGNUM);
3854 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3856 else
3858 ASM_OUTPUT_ALIGN (f, 2);
3859 fprintf (f, "\t.code\t16\n");
3860 fprintf (f, ".Ltrampoline_start:\n");
3861 asm_fprintf (f, "\tpush\t{r0, r1}\n");
3862 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3863 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3864 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3865 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3866 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3868 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3869 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3872 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3874 static void
3875 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3877 rtx fnaddr, mem, a_tramp;
3879 emit_block_move (m_tramp, assemble_trampoline_template (),
3880 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3882 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3883 emit_move_insn (mem, chain_value);
3885 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3886 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3887 emit_move_insn (mem, fnaddr);
3889 a_tramp = XEXP (m_tramp, 0);
3890 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3891 LCT_NORMAL, VOIDmode, a_tramp, Pmode,
3892 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3895 /* Thumb trampolines should be entered in thumb mode, so set
3896 the bottom bit of the address. */
3898 static rtx
3899 arm_trampoline_adjust_address (rtx addr)
3901 if (TARGET_THUMB)
3902 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3903 NULL, 0, OPTAB_LIB_WIDEN);
3904 return addr;
3907 /* Return 1 if it is possible to return using a single instruction.
3908 If SIBLING is non-null, this is a test for a return before a sibling
3909 call. SIBLING is the call insn, so we can examine its register usage. */
3912 use_return_insn (int iscond, rtx sibling)
3914 int regno;
3915 unsigned int func_type;
3916 unsigned long saved_int_regs;
3917 unsigned HOST_WIDE_INT stack_adjust;
3918 arm_stack_offsets *offsets;
3920 /* Never use a return instruction before reload has run. */
3921 if (!reload_completed)
3922 return 0;
3924 func_type = arm_current_func_type ();
3926 /* Naked, volatile and stack alignment functions need special
3927 consideration. */
3928 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3929 return 0;
3931 /* So do interrupt functions that use the frame pointer and Thumb
3932 interrupt functions. */
3933 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3934 return 0;
3936 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3937 && !optimize_function_for_size_p (cfun))
3938 return 0;
3940 offsets = arm_get_frame_offsets ();
3941 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3943 /* As do variadic functions. */
3944 if (crtl->args.pretend_args_size
3945 || cfun->machine->uses_anonymous_args
3946 /* Or if the function calls __builtin_eh_return () */
3947 || crtl->calls_eh_return
3948 /* Or if the function calls alloca */
3949 || cfun->calls_alloca
3950 /* Or if there is a stack adjustment. However, if the stack pointer
3951 is saved on the stack, we can use a pre-incrementing stack load. */
3952 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3953 && stack_adjust == 4))
3954 /* Or if the static chain register was saved above the frame, under the
3955 assumption that the stack pointer isn't saved on the stack. */
3956 || (!(TARGET_APCS_FRAME && frame_pointer_needed)
3957 && arm_compute_static_chain_stack_bytes() != 0))
3958 return 0;
3960 saved_int_regs = offsets->saved_regs_mask;
3962 /* Unfortunately, the insn
3964 ldmib sp, {..., sp, ...}
3966 triggers a bug on most SA-110 based devices, such that the stack
3967 pointer won't be correctly restored if the instruction takes a
3968 page fault. We work around this problem by popping r3 along with
3969 the other registers, since that is never slower than executing
3970 another instruction.
3972 We test for !arm_arch5 here, because code for any architecture
3973 less than this could potentially be run on one of the buggy
3974 chips. */
3975 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3977 /* Validate that r3 is a call-clobbered register (always true in
3978 the default abi) ... */
3979 if (!call_used_regs[3])
3980 return 0;
3982 /* ... that it isn't being used for a return value ... */
3983 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
3984 return 0;
3986 /* ... or for a tail-call argument ... */
3987 if (sibling)
3989 gcc_assert (CALL_P (sibling));
3991 if (find_regno_fusage (sibling, USE, 3))
3992 return 0;
3995 /* ... and that there are no call-saved registers in r0-r2
3996 (always true in the default ABI). */
3997 if (saved_int_regs & 0x7)
3998 return 0;
4001 /* Can't be done if interworking with Thumb, and any registers have been
4002 stacked. */
4003 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
4004 return 0;
4006 /* On StrongARM, conditional returns are expensive if they aren't
4007 taken and multiple registers have been stacked. */
4008 if (iscond && arm_tune_strongarm)
4010 /* Conditional return when just the LR is stored is a simple
4011 conditional-load instruction, that's not expensive. */
4012 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
4013 return 0;
4015 if (flag_pic
4016 && arm_pic_register != INVALID_REGNUM
4017 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
4018 return 0;
4021 /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4022 several instructions if anything needs to be popped. */
4023 if (saved_int_regs && IS_CMSE_ENTRY (func_type))
4024 return 0;
4026 /* If there are saved registers but the LR isn't saved, then we need
4027 two instructions for the return. */
4028 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
4029 return 0;
4031 /* Can't be done if any of the VFP regs are pushed,
4032 since this also requires an insn. */
4033 if (TARGET_HARD_FLOAT)
4034 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
4035 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
4036 return 0;
4038 if (TARGET_REALLY_IWMMXT)
4039 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
4040 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
4041 return 0;
4043 return 1;
4046 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4047 shrink-wrapping if possible. This is the case if we need to emit a
4048 prologue, which we can test by looking at the offsets. */
4049 bool
4050 use_simple_return_p (void)
4052 arm_stack_offsets *offsets;
4054 /* Note this function can be called before or after reload. */
4055 if (!reload_completed)
4056 arm_compute_frame_layout ();
4058 offsets = arm_get_frame_offsets ();
4059 return offsets->outgoing_args != 0;
4062 /* Return TRUE if int I is a valid immediate ARM constant. */
4065 const_ok_for_arm (HOST_WIDE_INT i)
4067 int lowbit;
4069 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4070 be all zero, or all one. */
4071 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
4072 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
4073 != ((~(unsigned HOST_WIDE_INT) 0)
4074 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
4075 return FALSE;
4077 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
4079 /* Fast return for 0 and small values. We must do this for zero, since
4080 the code below can't handle that one case. */
4081 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
4082 return TRUE;
4084 /* Get the number of trailing zeros. */
4085 lowbit = ffs((int) i) - 1;
4087 /* Only even shifts are allowed in ARM mode so round down to the
4088 nearest even number. */
4089 if (TARGET_ARM)
4090 lowbit &= ~1;
4092 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
4093 return TRUE;
4095 if (TARGET_ARM)
4097 /* Allow rotated constants in ARM mode. */
4098 if (lowbit <= 4
4099 && ((i & ~0xc000003f) == 0
4100 || (i & ~0xf000000f) == 0
4101 || (i & ~0xfc000003) == 0))
4102 return TRUE;
4104 else if (TARGET_THUMB2)
4106 HOST_WIDE_INT v;
4108 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
4109 v = i & 0xff;
4110 v |= v << 16;
4111 if (i == v || i == (v | (v << 8)))
4112 return TRUE;
4114 /* Allow repeated pattern 0xXY00XY00. */
4115 v = i & 0xff00;
4116 v |= v << 16;
4117 if (i == v)
4118 return TRUE;
4120 else if (TARGET_HAVE_MOVT)
4122 /* Thumb-1 Targets with MOVT. */
4123 if (i > 0xffff)
4124 return FALSE;
4125 else
4126 return TRUE;
4129 return FALSE;
4132 /* Return true if I is a valid constant for the operation CODE. */
4134 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
4136 if (const_ok_for_arm (i))
4137 return 1;
4139 switch (code)
4141 case SET:
4142 /* See if we can use movw. */
4143 if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
4144 return 1;
4145 else
4146 /* Otherwise, try mvn. */
4147 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4149 case PLUS:
4150 /* See if we can use addw or subw. */
4151 if (TARGET_THUMB2
4152 && ((i & 0xfffff000) == 0
4153 || ((-i) & 0xfffff000) == 0))
4154 return 1;
4155 /* Fall through. */
4156 case COMPARE:
4157 case EQ:
4158 case NE:
4159 case GT:
4160 case LE:
4161 case LT:
4162 case GE:
4163 case GEU:
4164 case LTU:
4165 case GTU:
4166 case LEU:
4167 case UNORDERED:
4168 case ORDERED:
4169 case UNEQ:
4170 case UNGE:
4171 case UNLT:
4172 case UNGT:
4173 case UNLE:
4174 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
4176 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
4177 case XOR:
4178 return 0;
4180 case IOR:
4181 if (TARGET_THUMB2)
4182 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4183 return 0;
4185 case AND:
4186 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4188 default:
4189 gcc_unreachable ();
4193 /* Return true if I is a valid di mode constant for the operation CODE. */
4195 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4197 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4198 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4199 rtx hi = GEN_INT (hi_val);
4200 rtx lo = GEN_INT (lo_val);
4202 if (TARGET_THUMB1)
4203 return 0;
4205 switch (code)
4207 case AND:
4208 case IOR:
4209 case XOR:
4210 return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
4211 && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
4212 case PLUS:
4213 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4215 default:
4216 return 0;
4220 /* Emit a sequence of insns to handle a large constant.
4221 CODE is the code of the operation required, it can be any of SET, PLUS,
4222 IOR, AND, XOR, MINUS;
4223 MODE is the mode in which the operation is being performed;
4224 VAL is the integer to operate on;
4225 SOURCE is the other operand (a register, or a null-pointer for SET);
4226 SUBTARGETS means it is safe to create scratch registers if that will
4227 either produce a simpler sequence, or we will want to cse the values.
4228 Return value is the number of insns emitted. */
4230 /* ??? Tweak this for thumb2. */
4232 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4233 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4235 rtx cond;
4237 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4238 cond = COND_EXEC_TEST (PATTERN (insn));
4239 else
4240 cond = NULL_RTX;
4242 if (subtargets || code == SET
4243 || (REG_P (target) && REG_P (source)
4244 && REGNO (target) != REGNO (source)))
4246 /* After arm_reorg has been called, we can't fix up expensive
4247 constants by pushing them into memory so we must synthesize
4248 them in-line, regardless of the cost. This is only likely to
4249 be more costly on chips that have load delay slots and we are
4250 compiling without running the scheduler (so no splitting
4251 occurred before the final instruction emission).
4253 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4255 if (!cfun->machine->after_arm_reorg
4256 && !cond
4257 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4258 1, 0)
4259 > (arm_constant_limit (optimize_function_for_size_p (cfun))
4260 + (code != SET))))
4262 if (code == SET)
4264 /* Currently SET is the only monadic value for CODE, all
4265 the rest are diadic. */
4266 if (TARGET_USE_MOVT)
4267 arm_emit_movpair (target, GEN_INT (val));
4268 else
4269 emit_set_insn (target, GEN_INT (val));
4271 return 1;
4273 else
4275 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4277 if (TARGET_USE_MOVT)
4278 arm_emit_movpair (temp, GEN_INT (val));
4279 else
4280 emit_set_insn (temp, GEN_INT (val));
4282 /* For MINUS, the value is subtracted from, since we never
4283 have subtraction of a constant. */
4284 if (code == MINUS)
4285 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4286 else
4287 emit_set_insn (target,
4288 gen_rtx_fmt_ee (code, mode, source, temp));
4289 return 2;
4294 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4298 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4299 ARM/THUMB2 immediates, and add up to VAL.
4300 Thr function return value gives the number of insns required. */
4301 static int
4302 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4303 struct four_ints *return_sequence)
4305 int best_consecutive_zeros = 0;
4306 int i;
4307 int best_start = 0;
4308 int insns1, insns2;
4309 struct four_ints tmp_sequence;
4311 /* If we aren't targeting ARM, the best place to start is always at
4312 the bottom, otherwise look more closely. */
4313 if (TARGET_ARM)
4315 for (i = 0; i < 32; i += 2)
4317 int consecutive_zeros = 0;
4319 if (!(val & (3 << i)))
4321 while ((i < 32) && !(val & (3 << i)))
4323 consecutive_zeros += 2;
4324 i += 2;
4326 if (consecutive_zeros > best_consecutive_zeros)
4328 best_consecutive_zeros = consecutive_zeros;
4329 best_start = i - consecutive_zeros;
4331 i -= 2;
4336 /* So long as it won't require any more insns to do so, it's
4337 desirable to emit a small constant (in bits 0...9) in the last
4338 insn. This way there is more chance that it can be combined with
4339 a later addressing insn to form a pre-indexed load or store
4340 operation. Consider:
4342 *((volatile int *)0xe0000100) = 1;
4343 *((volatile int *)0xe0000110) = 2;
4345 We want this to wind up as:
4347 mov rA, #0xe0000000
4348 mov rB, #1
4349 str rB, [rA, #0x100]
4350 mov rB, #2
4351 str rB, [rA, #0x110]
4353 rather than having to synthesize both large constants from scratch.
4355 Therefore, we calculate how many insns would be required to emit
4356 the constant starting from `best_start', and also starting from
4357 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4358 yield a shorter sequence, we may as well use zero. */
4359 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4360 if (best_start != 0
4361 && ((HOST_WIDE_INT_1U << best_start) < val))
4363 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4364 if (insns2 <= insns1)
4366 *return_sequence = tmp_sequence;
4367 insns1 = insns2;
4371 return insns1;
4374 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4375 static int
4376 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4377 struct four_ints *return_sequence, int i)
4379 int remainder = val & 0xffffffff;
4380 int insns = 0;
4382 /* Try and find a way of doing the job in either two or three
4383 instructions.
4385 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4386 location. We start at position I. This may be the MSB, or
4387 optimial_immediate_sequence may have positioned it at the largest block
4388 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4389 wrapping around to the top of the word when we drop off the bottom.
4390 In the worst case this code should produce no more than four insns.
4392 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4393 constants, shifted to any arbitrary location. We should always start
4394 at the MSB. */
4397 int end;
4398 unsigned int b1, b2, b3, b4;
4399 unsigned HOST_WIDE_INT result;
4400 int loc;
4402 gcc_assert (insns < 4);
4404 if (i <= 0)
4405 i += 32;
4407 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4408 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4410 loc = i;
4411 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4412 /* We can use addw/subw for the last 12 bits. */
4413 result = remainder;
4414 else
4416 /* Use an 8-bit shifted/rotated immediate. */
4417 end = i - 8;
4418 if (end < 0)
4419 end += 32;
4420 result = remainder & ((0x0ff << end)
4421 | ((i < end) ? (0xff >> (32 - end))
4422 : 0));
4423 i -= 8;
4426 else
4428 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4429 arbitrary shifts. */
4430 i -= TARGET_ARM ? 2 : 1;
4431 continue;
4434 /* Next, see if we can do a better job with a thumb2 replicated
4435 constant.
4437 We do it this way around to catch the cases like 0x01F001E0 where
4438 two 8-bit immediates would work, but a replicated constant would
4439 make it worse.
4441 TODO: 16-bit constants that don't clear all the bits, but still win.
4442 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4443 if (TARGET_THUMB2)
4445 b1 = (remainder & 0xff000000) >> 24;
4446 b2 = (remainder & 0x00ff0000) >> 16;
4447 b3 = (remainder & 0x0000ff00) >> 8;
4448 b4 = remainder & 0xff;
4450 if (loc > 24)
4452 /* The 8-bit immediate already found clears b1 (and maybe b2),
4453 but must leave b3 and b4 alone. */
4455 /* First try to find a 32-bit replicated constant that clears
4456 almost everything. We can assume that we can't do it in one,
4457 or else we wouldn't be here. */
4458 unsigned int tmp = b1 & b2 & b3 & b4;
4459 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4460 + (tmp << 24);
4461 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4462 + (tmp == b3) + (tmp == b4);
4463 if (tmp
4464 && (matching_bytes >= 3
4465 || (matching_bytes == 2
4466 && const_ok_for_op (remainder & ~tmp2, code))))
4468 /* At least 3 of the bytes match, and the fourth has at
4469 least as many bits set, or two of the bytes match
4470 and it will only require one more insn to finish. */
4471 result = tmp2;
4472 i = tmp != b1 ? 32
4473 : tmp != b2 ? 24
4474 : tmp != b3 ? 16
4475 : 8;
4478 /* Second, try to find a 16-bit replicated constant that can
4479 leave three of the bytes clear. If b2 or b4 is already
4480 zero, then we can. If the 8-bit from above would not
4481 clear b2 anyway, then we still win. */
4482 else if (b1 == b3 && (!b2 || !b4
4483 || (remainder & 0x00ff0000 & ~result)))
4485 result = remainder & 0xff00ff00;
4486 i = 24;
4489 else if (loc > 16)
4491 /* The 8-bit immediate already found clears b2 (and maybe b3)
4492 and we don't get here unless b1 is alredy clear, but it will
4493 leave b4 unchanged. */
4495 /* If we can clear b2 and b4 at once, then we win, since the
4496 8-bits couldn't possibly reach that far. */
4497 if (b2 == b4)
4499 result = remainder & 0x00ff00ff;
4500 i = 16;
4505 return_sequence->i[insns++] = result;
4506 remainder &= ~result;
4508 if (code == SET || code == MINUS)
4509 code = PLUS;
4511 while (remainder);
4513 return insns;
4516 /* Emit an instruction with the indicated PATTERN. If COND is
4517 non-NULL, conditionalize the execution of the instruction on COND
4518 being true. */
4520 static void
4521 emit_constant_insn (rtx cond, rtx pattern)
4523 if (cond)
4524 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4525 emit_insn (pattern);
4528 /* As above, but extra parameter GENERATE which, if clear, suppresses
4529 RTL generation. */
4531 static int
4532 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4533 unsigned HOST_WIDE_INT val, rtx target, rtx source,
4534 int subtargets, int generate)
4536 int can_invert = 0;
4537 int can_negate = 0;
4538 int final_invert = 0;
4539 int i;
4540 int set_sign_bit_copies = 0;
4541 int clear_sign_bit_copies = 0;
4542 int clear_zero_bit_copies = 0;
4543 int set_zero_bit_copies = 0;
4544 int insns = 0, neg_insns, inv_insns;
4545 unsigned HOST_WIDE_INT temp1, temp2;
4546 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4547 struct four_ints *immediates;
4548 struct four_ints pos_immediates, neg_immediates, inv_immediates;
4550 /* Find out which operations are safe for a given CODE. Also do a quick
4551 check for degenerate cases; these can occur when DImode operations
4552 are split. */
4553 switch (code)
4555 case SET:
4556 can_invert = 1;
4557 break;
4559 case PLUS:
4560 can_negate = 1;
4561 break;
4563 case IOR:
4564 if (remainder == 0xffffffff)
4566 if (generate)
4567 emit_constant_insn (cond,
4568 gen_rtx_SET (target,
4569 GEN_INT (ARM_SIGN_EXTEND (val))));
4570 return 1;
4573 if (remainder == 0)
4575 if (reload_completed && rtx_equal_p (target, source))
4576 return 0;
4578 if (generate)
4579 emit_constant_insn (cond, gen_rtx_SET (target, source));
4580 return 1;
4582 break;
4584 case AND:
4585 if (remainder == 0)
4587 if (generate)
4588 emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
4589 return 1;
4591 if (remainder == 0xffffffff)
4593 if (reload_completed && rtx_equal_p (target, source))
4594 return 0;
4595 if (generate)
4596 emit_constant_insn (cond, gen_rtx_SET (target, source));
4597 return 1;
4599 can_invert = 1;
4600 break;
4602 case XOR:
4603 if (remainder == 0)
4605 if (reload_completed && rtx_equal_p (target, source))
4606 return 0;
4607 if (generate)
4608 emit_constant_insn (cond, gen_rtx_SET (target, source));
4609 return 1;
4612 if (remainder == 0xffffffff)
4614 if (generate)
4615 emit_constant_insn (cond,
4616 gen_rtx_SET (target,
4617 gen_rtx_NOT (mode, source)));
4618 return 1;
4620 final_invert = 1;
4621 break;
4623 case MINUS:
4624 /* We treat MINUS as (val - source), since (source - val) is always
4625 passed as (source + (-val)). */
4626 if (remainder == 0)
4628 if (generate)
4629 emit_constant_insn (cond,
4630 gen_rtx_SET (target,
4631 gen_rtx_NEG (mode, source)));
4632 return 1;
4634 if (const_ok_for_arm (val))
4636 if (generate)
4637 emit_constant_insn (cond,
4638 gen_rtx_SET (target,
4639 gen_rtx_MINUS (mode, GEN_INT (val),
4640 source)));
4641 return 1;
4644 break;
4646 default:
4647 gcc_unreachable ();
4650 /* If we can do it in one insn get out quickly. */
4651 if (const_ok_for_op (val, code))
4653 if (generate)
4654 emit_constant_insn (cond,
4655 gen_rtx_SET (target,
4656 (source
4657 ? gen_rtx_fmt_ee (code, mode, source,
4658 GEN_INT (val))
4659 : GEN_INT (val))));
4660 return 1;
4663 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4664 insn. */
4665 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4666 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4668 if (generate)
4670 if (mode == SImode && i == 16)
4671 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4672 smaller insn. */
4673 emit_constant_insn (cond,
4674 gen_zero_extendhisi2
4675 (target, gen_lowpart (HImode, source)));
4676 else
4677 /* Extz only supports SImode, but we can coerce the operands
4678 into that mode. */
4679 emit_constant_insn (cond,
4680 gen_extzv_t2 (gen_lowpart (SImode, target),
4681 gen_lowpart (SImode, source),
4682 GEN_INT (i), const0_rtx));
4685 return 1;
4688 /* Calculate a few attributes that may be useful for specific
4689 optimizations. */
4690 /* Count number of leading zeros. */
4691 for (i = 31; i >= 0; i--)
4693 if ((remainder & (1 << i)) == 0)
4694 clear_sign_bit_copies++;
4695 else
4696 break;
4699 /* Count number of leading 1's. */
4700 for (i = 31; i >= 0; i--)
4702 if ((remainder & (1 << i)) != 0)
4703 set_sign_bit_copies++;
4704 else
4705 break;
4708 /* Count number of trailing zero's. */
4709 for (i = 0; i <= 31; i++)
4711 if ((remainder & (1 << i)) == 0)
4712 clear_zero_bit_copies++;
4713 else
4714 break;
4717 /* Count number of trailing 1's. */
4718 for (i = 0; i <= 31; i++)
4720 if ((remainder & (1 << i)) != 0)
4721 set_zero_bit_copies++;
4722 else
4723 break;
4726 switch (code)
4728 case SET:
4729 /* See if we can do this by sign_extending a constant that is known
4730 to be negative. This is a good, way of doing it, since the shift
4731 may well merge into a subsequent insn. */
4732 if (set_sign_bit_copies > 1)
4734 if (const_ok_for_arm
4735 (temp1 = ARM_SIGN_EXTEND (remainder
4736 << (set_sign_bit_copies - 1))))
4738 if (generate)
4740 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4741 emit_constant_insn (cond,
4742 gen_rtx_SET (new_src, GEN_INT (temp1)));
4743 emit_constant_insn (cond,
4744 gen_ashrsi3 (target, new_src,
4745 GEN_INT (set_sign_bit_copies - 1)));
4747 return 2;
4749 /* For an inverted constant, we will need to set the low bits,
4750 these will be shifted out of harm's way. */
4751 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4752 if (const_ok_for_arm (~temp1))
4754 if (generate)
4756 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4757 emit_constant_insn (cond,
4758 gen_rtx_SET (new_src, GEN_INT (temp1)));
4759 emit_constant_insn (cond,
4760 gen_ashrsi3 (target, new_src,
4761 GEN_INT (set_sign_bit_copies - 1)));
4763 return 2;
4767 /* See if we can calculate the value as the difference between two
4768 valid immediates. */
4769 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4771 int topshift = clear_sign_bit_copies & ~1;
4773 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4774 & (0xff000000 >> topshift));
4776 /* If temp1 is zero, then that means the 9 most significant
4777 bits of remainder were 1 and we've caused it to overflow.
4778 When topshift is 0 we don't need to do anything since we
4779 can borrow from 'bit 32'. */
4780 if (temp1 == 0 && topshift != 0)
4781 temp1 = 0x80000000 >> (topshift - 1);
4783 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4785 if (const_ok_for_arm (temp2))
4787 if (generate)
4789 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4790 emit_constant_insn (cond,
4791 gen_rtx_SET (new_src, GEN_INT (temp1)));
4792 emit_constant_insn (cond,
4793 gen_addsi3 (target, new_src,
4794 GEN_INT (-temp2)));
4797 return 2;
4801 /* See if we can generate this by setting the bottom (or the top)
4802 16 bits, and then shifting these into the other half of the
4803 word. We only look for the simplest cases, to do more would cost
4804 too much. Be careful, however, not to generate this when the
4805 alternative would take fewer insns. */
4806 if (val & 0xffff0000)
4808 temp1 = remainder & 0xffff0000;
4809 temp2 = remainder & 0x0000ffff;
4811 /* Overlaps outside this range are best done using other methods. */
4812 for (i = 9; i < 24; i++)
4814 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4815 && !const_ok_for_arm (temp2))
4817 rtx new_src = (subtargets
4818 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4819 : target);
4820 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4821 source, subtargets, generate);
4822 source = new_src;
4823 if (generate)
4824 emit_constant_insn
4825 (cond,
4826 gen_rtx_SET
4827 (target,
4828 gen_rtx_IOR (mode,
4829 gen_rtx_ASHIFT (mode, source,
4830 GEN_INT (i)),
4831 source)));
4832 return insns + 1;
4836 /* Don't duplicate cases already considered. */
4837 for (i = 17; i < 24; i++)
4839 if (((temp1 | (temp1 >> i)) == remainder)
4840 && !const_ok_for_arm (temp1))
4842 rtx new_src = (subtargets
4843 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4844 : target);
4845 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4846 source, subtargets, generate);
4847 source = new_src;
4848 if (generate)
4849 emit_constant_insn
4850 (cond,
4851 gen_rtx_SET (target,
4852 gen_rtx_IOR
4853 (mode,
4854 gen_rtx_LSHIFTRT (mode, source,
4855 GEN_INT (i)),
4856 source)));
4857 return insns + 1;
4861 break;
4863 case IOR:
4864 case XOR:
4865 /* If we have IOR or XOR, and the constant can be loaded in a
4866 single instruction, and we can find a temporary to put it in,
4867 then this can be done in two instructions instead of 3-4. */
4868 if (subtargets
4869 /* TARGET can't be NULL if SUBTARGETS is 0 */
4870 || (reload_completed && !reg_mentioned_p (target, source)))
4872 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4874 if (generate)
4876 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4878 emit_constant_insn (cond,
4879 gen_rtx_SET (sub, GEN_INT (val)));
4880 emit_constant_insn (cond,
4881 gen_rtx_SET (target,
4882 gen_rtx_fmt_ee (code, mode,
4883 source, sub)));
4885 return 2;
4889 if (code == XOR)
4890 break;
4892 /* Convert.
4893 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4894 and the remainder 0s for e.g. 0xfff00000)
4895 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4897 This can be done in 2 instructions by using shifts with mov or mvn.
4898 e.g. for
4899 x = x | 0xfff00000;
4900 we generate.
4901 mvn r0, r0, asl #12
4902 mvn r0, r0, lsr #12 */
4903 if (set_sign_bit_copies > 8
4904 && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
4906 if (generate)
4908 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4909 rtx shift = GEN_INT (set_sign_bit_copies);
4911 emit_constant_insn
4912 (cond,
4913 gen_rtx_SET (sub,
4914 gen_rtx_NOT (mode,
4915 gen_rtx_ASHIFT (mode,
4916 source,
4917 shift))));
4918 emit_constant_insn
4919 (cond,
4920 gen_rtx_SET (target,
4921 gen_rtx_NOT (mode,
4922 gen_rtx_LSHIFTRT (mode, sub,
4923 shift))));
4925 return 2;
4928 /* Convert
4929 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4931 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4933 For eg. r0 = r0 | 0xfff
4934 mvn r0, r0, lsr #12
4935 mvn r0, r0, asl #12
4938 if (set_zero_bit_copies > 8
4939 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4941 if (generate)
4943 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4944 rtx shift = GEN_INT (set_zero_bit_copies);
4946 emit_constant_insn
4947 (cond,
4948 gen_rtx_SET (sub,
4949 gen_rtx_NOT (mode,
4950 gen_rtx_LSHIFTRT (mode,
4951 source,
4952 shift))));
4953 emit_constant_insn
4954 (cond,
4955 gen_rtx_SET (target,
4956 gen_rtx_NOT (mode,
4957 gen_rtx_ASHIFT (mode, sub,
4958 shift))));
4960 return 2;
4963 /* This will never be reached for Thumb2 because orn is a valid
4964 instruction. This is for Thumb1 and the ARM 32 bit cases.
4966 x = y | constant (such that ~constant is a valid constant)
4967 Transform this to
4968 x = ~(~y & ~constant).
4970 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4972 if (generate)
4974 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4975 emit_constant_insn (cond,
4976 gen_rtx_SET (sub,
4977 gen_rtx_NOT (mode, source)));
4978 source = sub;
4979 if (subtargets)
4980 sub = gen_reg_rtx (mode);
4981 emit_constant_insn (cond,
4982 gen_rtx_SET (sub,
4983 gen_rtx_AND (mode, source,
4984 GEN_INT (temp1))));
4985 emit_constant_insn (cond,
4986 gen_rtx_SET (target,
4987 gen_rtx_NOT (mode, sub)));
4989 return 3;
4991 break;
4993 case AND:
4994 /* See if two shifts will do 2 or more insn's worth of work. */
4995 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
4997 HOST_WIDE_INT shift_mask = ((0xffffffff
4998 << (32 - clear_sign_bit_copies))
4999 & 0xffffffff);
5001 if ((remainder | shift_mask) != 0xffffffff)
5003 HOST_WIDE_INT new_val
5004 = ARM_SIGN_EXTEND (remainder | shift_mask);
5006 if (generate)
5008 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5009 insns = arm_gen_constant (AND, SImode, cond, new_val,
5010 new_src, source, subtargets, 1);
5011 source = new_src;
5013 else
5015 rtx targ = subtargets ? NULL_RTX : target;
5016 insns = arm_gen_constant (AND, mode, cond, new_val,
5017 targ, source, subtargets, 0);
5021 if (generate)
5023 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5024 rtx shift = GEN_INT (clear_sign_bit_copies);
5026 emit_insn (gen_ashlsi3 (new_src, source, shift));
5027 emit_insn (gen_lshrsi3 (target, new_src, shift));
5030 return insns + 2;
5033 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
5035 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
5037 if ((remainder | shift_mask) != 0xffffffff)
5039 HOST_WIDE_INT new_val
5040 = ARM_SIGN_EXTEND (remainder | shift_mask);
5041 if (generate)
5043 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5045 insns = arm_gen_constant (AND, mode, cond, new_val,
5046 new_src, source, subtargets, 1);
5047 source = new_src;
5049 else
5051 rtx targ = subtargets ? NULL_RTX : target;
5053 insns = arm_gen_constant (AND, mode, cond, new_val,
5054 targ, source, subtargets, 0);
5058 if (generate)
5060 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5061 rtx shift = GEN_INT (clear_zero_bit_copies);
5063 emit_insn (gen_lshrsi3 (new_src, source, shift));
5064 emit_insn (gen_ashlsi3 (target, new_src, shift));
5067 return insns + 2;
5070 break;
5072 default:
5073 break;
5076 /* Calculate what the instruction sequences would be if we generated it
5077 normally, negated, or inverted. */
5078 if (code == AND)
5079 /* AND cannot be split into multiple insns, so invert and use BIC. */
5080 insns = 99;
5081 else
5082 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
5084 if (can_negate)
5085 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
5086 &neg_immediates);
5087 else
5088 neg_insns = 99;
5090 if (can_invert || final_invert)
5091 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
5092 &inv_immediates);
5093 else
5094 inv_insns = 99;
5096 immediates = &pos_immediates;
5098 /* Is the negated immediate sequence more efficient? */
5099 if (neg_insns < insns && neg_insns <= inv_insns)
5101 insns = neg_insns;
5102 immediates = &neg_immediates;
5104 else
5105 can_negate = 0;
5107 /* Is the inverted immediate sequence more efficient?
5108 We must allow for an extra NOT instruction for XOR operations, although
5109 there is some chance that the final 'mvn' will get optimized later. */
5110 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
5112 insns = inv_insns;
5113 immediates = &inv_immediates;
5115 else
5117 can_invert = 0;
5118 final_invert = 0;
5121 /* Now output the chosen sequence as instructions. */
5122 if (generate)
5124 for (i = 0; i < insns; i++)
5126 rtx new_src, temp1_rtx;
5128 temp1 = immediates->i[i];
5130 if (code == SET || code == MINUS)
5131 new_src = (subtargets ? gen_reg_rtx (mode) : target);
5132 else if ((final_invert || i < (insns - 1)) && subtargets)
5133 new_src = gen_reg_rtx (mode);
5134 else
5135 new_src = target;
5137 if (can_invert)
5138 temp1 = ~temp1;
5139 else if (can_negate)
5140 temp1 = -temp1;
5142 temp1 = trunc_int_for_mode (temp1, mode);
5143 temp1_rtx = GEN_INT (temp1);
5145 if (code == SET)
5147 else if (code == MINUS)
5148 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
5149 else
5150 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
5152 emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
5153 source = new_src;
5155 if (code == SET)
5157 can_negate = can_invert;
5158 can_invert = 0;
5159 code = PLUS;
5161 else if (code == MINUS)
5162 code = PLUS;
5166 if (final_invert)
5168 if (generate)
5169 emit_constant_insn (cond, gen_rtx_SET (target,
5170 gen_rtx_NOT (mode, source)));
5171 insns++;
5174 return insns;
5177 /* Canonicalize a comparison so that we are more likely to recognize it.
5178 This can be done for a few constant compares, where we can make the
5179 immediate value easier to load. */
5181 static void
5182 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5183 bool op0_preserve_value)
5185 machine_mode mode;
5186 unsigned HOST_WIDE_INT i, maxval;
5188 mode = GET_MODE (*op0);
5189 if (mode == VOIDmode)
5190 mode = GET_MODE (*op1);
5192 maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5194 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
5195 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
5196 reversed or (for constant OP1) adjusted to GE/LT. Similarly
5197 for GTU/LEU in Thumb mode. */
5198 if (mode == DImode)
5201 if (*code == GT || *code == LE
5202 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
5204 /* Missing comparison. First try to use an available
5205 comparison. */
5206 if (CONST_INT_P (*op1))
5208 i = INTVAL (*op1);
5209 switch (*code)
5211 case GT:
5212 case LE:
5213 if (i != maxval
5214 && arm_const_double_by_immediates (GEN_INT (i + 1)))
5216 *op1 = GEN_INT (i + 1);
5217 *code = *code == GT ? GE : LT;
5218 return;
5220 break;
5221 case GTU:
5222 case LEU:
5223 if (i != ~((unsigned HOST_WIDE_INT) 0)
5224 && arm_const_double_by_immediates (GEN_INT (i + 1)))
5226 *op1 = GEN_INT (i + 1);
5227 *code = *code == GTU ? GEU : LTU;
5228 return;
5230 break;
5231 default:
5232 gcc_unreachable ();
5236 /* If that did not work, reverse the condition. */
5237 if (!op0_preserve_value)
5239 std::swap (*op0, *op1);
5240 *code = (int)swap_condition ((enum rtx_code)*code);
5243 return;
5246 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5247 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5248 to facilitate possible combining with a cmp into 'ands'. */
5249 if (mode == SImode
5250 && GET_CODE (*op0) == ZERO_EXTEND
5251 && GET_CODE (XEXP (*op0, 0)) == SUBREG
5252 && GET_MODE (XEXP (*op0, 0)) == QImode
5253 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5254 && subreg_lowpart_p (XEXP (*op0, 0))
5255 && *op1 == const0_rtx)
5256 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5257 GEN_INT (255));
5259 /* Comparisons smaller than DImode. Only adjust comparisons against
5260 an out-of-range constant. */
5261 if (!CONST_INT_P (*op1)
5262 || const_ok_for_arm (INTVAL (*op1))
5263 || const_ok_for_arm (- INTVAL (*op1)))
5264 return;
5266 i = INTVAL (*op1);
5268 switch (*code)
5270 case EQ:
5271 case NE:
5272 return;
5274 case GT:
5275 case LE:
5276 if (i != maxval
5277 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5279 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5280 *code = *code == GT ? GE : LT;
5281 return;
5283 break;
5285 case GE:
5286 case LT:
5287 if (i != ~maxval
5288 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5290 *op1 = GEN_INT (i - 1);
5291 *code = *code == GE ? GT : LE;
5292 return;
5294 break;
5296 case GTU:
5297 case LEU:
5298 if (i != ~((unsigned HOST_WIDE_INT) 0)
5299 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5301 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5302 *code = *code == GTU ? GEU : LTU;
5303 return;
5305 break;
5307 case GEU:
5308 case LTU:
5309 if (i != 0
5310 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5312 *op1 = GEN_INT (i - 1);
5313 *code = *code == GEU ? GTU : LEU;
5314 return;
5316 break;
5318 default:
5319 gcc_unreachable ();
5324 /* Define how to find the value returned by a function. */
5326 static rtx
5327 arm_function_value(const_tree type, const_tree func,
5328 bool outgoing ATTRIBUTE_UNUSED)
5330 machine_mode mode;
5331 int unsignedp ATTRIBUTE_UNUSED;
5332 rtx r ATTRIBUTE_UNUSED;
5334 mode = TYPE_MODE (type);
5336 if (TARGET_AAPCS_BASED)
5337 return aapcs_allocate_return_reg (mode, type, func);
5339 /* Promote integer types. */
5340 if (INTEGRAL_TYPE_P (type))
5341 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5343 /* Promotes small structs returned in a register to full-word size
5344 for big-endian AAPCS. */
5345 if (arm_return_in_msb (type))
5347 HOST_WIDE_INT size = int_size_in_bytes (type);
5348 if (size % UNITS_PER_WORD != 0)
5350 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5351 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5355 return arm_libcall_value_1 (mode);
5358 /* libcall hashtable helpers. */
5360 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5362 static inline hashval_t hash (const rtx_def *);
5363 static inline bool equal (const rtx_def *, const rtx_def *);
5364 static inline void remove (rtx_def *);
5367 inline bool
5368 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5370 return rtx_equal_p (p1, p2);
5373 inline hashval_t
5374 libcall_hasher::hash (const rtx_def *p1)
5376 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5379 typedef hash_table<libcall_hasher> libcall_table_type;
5381 static void
5382 add_libcall (libcall_table_type *htab, rtx libcall)
5384 *htab->find_slot (libcall, INSERT) = libcall;
5387 static bool
5388 arm_libcall_uses_aapcs_base (const_rtx libcall)
5390 static bool init_done = false;
5391 static libcall_table_type *libcall_htab = NULL;
5393 if (!init_done)
5395 init_done = true;
5397 libcall_htab = new libcall_table_type (31);
5398 add_libcall (libcall_htab,
5399 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5400 add_libcall (libcall_htab,
5401 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5402 add_libcall (libcall_htab,
5403 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5404 add_libcall (libcall_htab,
5405 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5407 add_libcall (libcall_htab,
5408 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5409 add_libcall (libcall_htab,
5410 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5411 add_libcall (libcall_htab,
5412 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5413 add_libcall (libcall_htab,
5414 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5416 add_libcall (libcall_htab,
5417 convert_optab_libfunc (sext_optab, SFmode, HFmode));
5418 add_libcall (libcall_htab,
5419 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5420 add_libcall (libcall_htab,
5421 convert_optab_libfunc (sfix_optab, SImode, DFmode));
5422 add_libcall (libcall_htab,
5423 convert_optab_libfunc (ufix_optab, SImode, DFmode));
5424 add_libcall (libcall_htab,
5425 convert_optab_libfunc (sfix_optab, DImode, DFmode));
5426 add_libcall (libcall_htab,
5427 convert_optab_libfunc (ufix_optab, DImode, DFmode));
5428 add_libcall (libcall_htab,
5429 convert_optab_libfunc (sfix_optab, DImode, SFmode));
5430 add_libcall (libcall_htab,
5431 convert_optab_libfunc (ufix_optab, DImode, SFmode));
5433 /* Values from double-precision helper functions are returned in core
5434 registers if the selected core only supports single-precision
5435 arithmetic, even if we are using the hard-float ABI. The same is
5436 true for single-precision helpers, but we will never be using the
5437 hard-float ABI on a CPU which doesn't support single-precision
5438 operations in hardware. */
5439 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5440 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5441 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5442 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5443 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5444 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5445 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5446 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5447 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5448 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5449 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5450 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5451 SFmode));
5452 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5453 DFmode));
5454 add_libcall (libcall_htab,
5455 convert_optab_libfunc (trunc_optab, HFmode, DFmode));
5458 return libcall && libcall_htab->find (libcall) != NULL;
5461 static rtx
5462 arm_libcall_value_1 (machine_mode mode)
5464 if (TARGET_AAPCS_BASED)
5465 return aapcs_libcall_value (mode);
5466 else if (TARGET_IWMMXT_ABI
5467 && arm_vector_mode_supported_p (mode))
5468 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5469 else
5470 return gen_rtx_REG (mode, ARG_REGISTER (1));
5473 /* Define how to find the value returned by a library function
5474 assuming the value has mode MODE. */
5476 static rtx
5477 arm_libcall_value (machine_mode mode, const_rtx libcall)
5479 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5480 && GET_MODE_CLASS (mode) == MODE_FLOAT)
5482 /* The following libcalls return their result in integer registers,
5483 even though they return a floating point value. */
5484 if (arm_libcall_uses_aapcs_base (libcall))
5485 return gen_rtx_REG (mode, ARG_REGISTER(1));
5489 return arm_libcall_value_1 (mode);
5492 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5494 static bool
5495 arm_function_value_regno_p (const unsigned int regno)
5497 if (regno == ARG_REGISTER (1)
5498 || (TARGET_32BIT
5499 && TARGET_AAPCS_BASED
5500 && TARGET_HARD_FLOAT
5501 && regno == FIRST_VFP_REGNUM)
5502 || (TARGET_IWMMXT_ABI
5503 && regno == FIRST_IWMMXT_REGNUM))
5504 return true;
5506 return false;
5509 /* Determine the amount of memory needed to store the possible return
5510 registers of an untyped call. */
5512 arm_apply_result_size (void)
5514 int size = 16;
5516 if (TARGET_32BIT)
5518 if (TARGET_HARD_FLOAT_ABI)
5519 size += 32;
5520 if (TARGET_IWMMXT_ABI)
5521 size += 8;
5524 return size;
5527 /* Decide whether TYPE should be returned in memory (true)
5528 or in a register (false). FNTYPE is the type of the function making
5529 the call. */
5530 static bool
5531 arm_return_in_memory (const_tree type, const_tree fntype)
5533 HOST_WIDE_INT size;
5535 size = int_size_in_bytes (type); /* Negative if not fixed size. */
5537 if (TARGET_AAPCS_BASED)
5539 /* Simple, non-aggregate types (ie not including vectors and
5540 complex) are always returned in a register (or registers).
5541 We don't care about which register here, so we can short-cut
5542 some of the detail. */
5543 if (!AGGREGATE_TYPE_P (type)
5544 && TREE_CODE (type) != VECTOR_TYPE
5545 && TREE_CODE (type) != COMPLEX_TYPE)
5546 return false;
5548 /* Any return value that is no larger than one word can be
5549 returned in r0. */
5550 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5551 return false;
5553 /* Check any available co-processors to see if they accept the
5554 type as a register candidate (VFP, for example, can return
5555 some aggregates in consecutive registers). These aren't
5556 available if the call is variadic. */
5557 if (aapcs_select_return_coproc (type, fntype) >= 0)
5558 return false;
5560 /* Vector values should be returned using ARM registers, not
5561 memory (unless they're over 16 bytes, which will break since
5562 we only have four call-clobbered registers to play with). */
5563 if (TREE_CODE (type) == VECTOR_TYPE)
5564 return (size < 0 || size > (4 * UNITS_PER_WORD));
5566 /* The rest go in memory. */
5567 return true;
5570 if (TREE_CODE (type) == VECTOR_TYPE)
5571 return (size < 0 || size > (4 * UNITS_PER_WORD));
5573 if (!AGGREGATE_TYPE_P (type) &&
5574 (TREE_CODE (type) != VECTOR_TYPE))
5575 /* All simple types are returned in registers. */
5576 return false;
5578 if (arm_abi != ARM_ABI_APCS)
5580 /* ATPCS and later return aggregate types in memory only if they are
5581 larger than a word (or are variable size). */
5582 return (size < 0 || size > UNITS_PER_WORD);
5585 /* For the arm-wince targets we choose to be compatible with Microsoft's
5586 ARM and Thumb compilers, which always return aggregates in memory. */
5587 #ifndef ARM_WINCE
5588 /* All structures/unions bigger than one word are returned in memory.
5589 Also catch the case where int_size_in_bytes returns -1. In this case
5590 the aggregate is either huge or of variable size, and in either case
5591 we will want to return it via memory and not in a register. */
5592 if (size < 0 || size > UNITS_PER_WORD)
5593 return true;
5595 if (TREE_CODE (type) == RECORD_TYPE)
5597 tree field;
5599 /* For a struct the APCS says that we only return in a register
5600 if the type is 'integer like' and every addressable element
5601 has an offset of zero. For practical purposes this means
5602 that the structure can have at most one non bit-field element
5603 and that this element must be the first one in the structure. */
5605 /* Find the first field, ignoring non FIELD_DECL things which will
5606 have been created by C++. */
5607 for (field = TYPE_FIELDS (type);
5608 field && TREE_CODE (field) != FIELD_DECL;
5609 field = DECL_CHAIN (field))
5610 continue;
5612 if (field == NULL)
5613 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5615 /* Check that the first field is valid for returning in a register. */
5617 /* ... Floats are not allowed */
5618 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5619 return true;
5621 /* ... Aggregates that are not themselves valid for returning in
5622 a register are not allowed. */
5623 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5624 return true;
5626 /* Now check the remaining fields, if any. Only bitfields are allowed,
5627 since they are not addressable. */
5628 for (field = DECL_CHAIN (field);
5629 field;
5630 field = DECL_CHAIN (field))
5632 if (TREE_CODE (field) != FIELD_DECL)
5633 continue;
5635 if (!DECL_BIT_FIELD_TYPE (field))
5636 return true;
5639 return false;
5642 if (TREE_CODE (type) == UNION_TYPE)
5644 tree field;
5646 /* Unions can be returned in registers if every element is
5647 integral, or can be returned in an integer register. */
5648 for (field = TYPE_FIELDS (type);
5649 field;
5650 field = DECL_CHAIN (field))
5652 if (TREE_CODE (field) != FIELD_DECL)
5653 continue;
5655 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5656 return true;
5658 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5659 return true;
5662 return false;
5664 #endif /* not ARM_WINCE */
5666 /* Return all other types in memory. */
5667 return true;
5670 const struct pcs_attribute_arg
5672 const char *arg;
5673 enum arm_pcs value;
5674 } pcs_attribute_args[] =
5676 {"aapcs", ARM_PCS_AAPCS},
5677 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5678 #if 0
5679 /* We could recognize these, but changes would be needed elsewhere
5680 * to implement them. */
5681 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5682 {"atpcs", ARM_PCS_ATPCS},
5683 {"apcs", ARM_PCS_APCS},
5684 #endif
5685 {NULL, ARM_PCS_UNKNOWN}
5688 static enum arm_pcs
5689 arm_pcs_from_attribute (tree attr)
5691 const struct pcs_attribute_arg *ptr;
5692 const char *arg;
5694 /* Get the value of the argument. */
5695 if (TREE_VALUE (attr) == NULL_TREE
5696 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5697 return ARM_PCS_UNKNOWN;
5699 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5701 /* Check it against the list of known arguments. */
5702 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5703 if (streq (arg, ptr->arg))
5704 return ptr->value;
5706 /* An unrecognized interrupt type. */
5707 return ARM_PCS_UNKNOWN;
5710 /* Get the PCS variant to use for this call. TYPE is the function's type
5711 specification, DECL is the specific declartion. DECL may be null if
5712 the call could be indirect or if this is a library call. */
5713 static enum arm_pcs
5714 arm_get_pcs_model (const_tree type, const_tree decl)
5716 bool user_convention = false;
5717 enum arm_pcs user_pcs = arm_pcs_default;
5718 tree attr;
5720 gcc_assert (type);
5722 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5723 if (attr)
5725 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5726 user_convention = true;
5729 if (TARGET_AAPCS_BASED)
5731 /* Detect varargs functions. These always use the base rules
5732 (no argument is ever a candidate for a co-processor
5733 register). */
5734 bool base_rules = stdarg_p (type);
5736 if (user_convention)
5738 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5739 sorry ("non-AAPCS derived PCS variant");
5740 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5741 error ("variadic functions must use the base AAPCS variant");
5744 if (base_rules)
5745 return ARM_PCS_AAPCS;
5746 else if (user_convention)
5747 return user_pcs;
5748 else if (decl && flag_unit_at_a_time)
5750 /* Local functions never leak outside this compilation unit,
5751 so we are free to use whatever conventions are
5752 appropriate. */
5753 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5754 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5755 if (i && i->local)
5756 return ARM_PCS_AAPCS_LOCAL;
5759 else if (user_convention && user_pcs != arm_pcs_default)
5760 sorry ("PCS variant");
5762 /* For everything else we use the target's default. */
5763 return arm_pcs_default;
5767 static void
5768 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5769 const_tree fntype ATTRIBUTE_UNUSED,
5770 rtx libcall ATTRIBUTE_UNUSED,
5771 const_tree fndecl ATTRIBUTE_UNUSED)
5773 /* Record the unallocated VFP registers. */
5774 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5775 pcum->aapcs_vfp_reg_alloc = 0;
5778 /* Walk down the type tree of TYPE counting consecutive base elements.
5779 If *MODEP is VOIDmode, then set it to the first valid floating point
5780 type. If a non-floating point type is found, or if a floating point
5781 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5782 otherwise return the count in the sub-tree. */
5783 static int
5784 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5786 machine_mode mode;
5787 HOST_WIDE_INT size;
5789 switch (TREE_CODE (type))
5791 case REAL_TYPE:
5792 mode = TYPE_MODE (type);
5793 if (mode != DFmode && mode != SFmode && mode != HFmode)
5794 return -1;
5796 if (*modep == VOIDmode)
5797 *modep = mode;
5799 if (*modep == mode)
5800 return 1;
5802 break;
5804 case COMPLEX_TYPE:
5805 mode = TYPE_MODE (TREE_TYPE (type));
5806 if (mode != DFmode && mode != SFmode)
5807 return -1;
5809 if (*modep == VOIDmode)
5810 *modep = mode;
5812 if (*modep == mode)
5813 return 2;
5815 break;
5817 case VECTOR_TYPE:
5818 /* Use V2SImode and V4SImode as representatives of all 64-bit
5819 and 128-bit vector types, whether or not those modes are
5820 supported with the present options. */
5821 size = int_size_in_bytes (type);
5822 switch (size)
5824 case 8:
5825 mode = V2SImode;
5826 break;
5827 case 16:
5828 mode = V4SImode;
5829 break;
5830 default:
5831 return -1;
5834 if (*modep == VOIDmode)
5835 *modep = mode;
5837 /* Vector modes are considered to be opaque: two vectors are
5838 equivalent for the purposes of being homogeneous aggregates
5839 if they are the same size. */
5840 if (*modep == mode)
5841 return 1;
5843 break;
5845 case ARRAY_TYPE:
5847 int count;
5848 tree index = TYPE_DOMAIN (type);
5850 /* Can't handle incomplete types nor sizes that are not
5851 fixed. */
5852 if (!COMPLETE_TYPE_P (type)
5853 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5854 return -1;
5856 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5857 if (count == -1
5858 || !index
5859 || !TYPE_MAX_VALUE (index)
5860 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5861 || !TYPE_MIN_VALUE (index)
5862 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5863 || count < 0)
5864 return -1;
5866 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5867 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5869 /* There must be no padding. */
5870 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5871 return -1;
5873 return count;
5876 case RECORD_TYPE:
5878 int count = 0;
5879 int sub_count;
5880 tree field;
5882 /* Can't handle incomplete types nor sizes that are not
5883 fixed. */
5884 if (!COMPLETE_TYPE_P (type)
5885 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5886 return -1;
5888 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5890 if (TREE_CODE (field) != FIELD_DECL)
5891 continue;
5893 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5894 if (sub_count < 0)
5895 return -1;
5896 count += sub_count;
5899 /* There must be no padding. */
5900 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5901 return -1;
5903 return count;
5906 case UNION_TYPE:
5907 case QUAL_UNION_TYPE:
5909 /* These aren't very interesting except in a degenerate case. */
5910 int count = 0;
5911 int sub_count;
5912 tree field;
5914 /* Can't handle incomplete types nor sizes that are not
5915 fixed. */
5916 if (!COMPLETE_TYPE_P (type)
5917 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5918 return -1;
5920 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5922 if (TREE_CODE (field) != FIELD_DECL)
5923 continue;
5925 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5926 if (sub_count < 0)
5927 return -1;
5928 count = count > sub_count ? count : sub_count;
5931 /* There must be no padding. */
5932 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5933 return -1;
5935 return count;
5938 default:
5939 break;
5942 return -1;
5945 /* Return true if PCS_VARIANT should use VFP registers. */
5946 static bool
5947 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5949 if (pcs_variant == ARM_PCS_AAPCS_VFP)
5951 static bool seen_thumb1_vfp = false;
5953 if (TARGET_THUMB1 && !seen_thumb1_vfp)
5955 sorry ("Thumb-1 hard-float VFP ABI");
5956 /* sorry() is not immediately fatal, so only display this once. */
5957 seen_thumb1_vfp = true;
5960 return true;
5963 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5964 return false;
5966 return (TARGET_32BIT && TARGET_HARD_FLOAT &&
5967 (TARGET_VFP_DOUBLE || !is_double));
5970 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5971 suitable for passing or returning in VFP registers for the PCS
5972 variant selected. If it is, then *BASE_MODE is updated to contain
5973 a machine mode describing each element of the argument's type and
5974 *COUNT to hold the number of such elements. */
5975 static bool
5976 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5977 machine_mode mode, const_tree type,
5978 machine_mode *base_mode, int *count)
5980 machine_mode new_mode = VOIDmode;
5982 /* If we have the type information, prefer that to working things
5983 out from the mode. */
5984 if (type)
5986 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5988 if (ag_count > 0 && ag_count <= 4)
5989 *count = ag_count;
5990 else
5991 return false;
5993 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
5994 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5995 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5997 *count = 1;
5998 new_mode = mode;
6000 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6002 *count = 2;
6003 new_mode = (mode == DCmode ? DFmode : SFmode);
6005 else
6006 return false;
6009 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
6010 return false;
6012 *base_mode = new_mode;
6013 return true;
6016 static bool
6017 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
6018 machine_mode mode, const_tree type)
6020 int count ATTRIBUTE_UNUSED;
6021 machine_mode ag_mode ATTRIBUTE_UNUSED;
6023 if (!use_vfp_abi (pcs_variant, false))
6024 return false;
6025 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6026 &ag_mode, &count);
6029 static bool
6030 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6031 const_tree type)
6033 if (!use_vfp_abi (pcum->pcs_variant, false))
6034 return false;
6036 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
6037 &pcum->aapcs_vfp_rmode,
6038 &pcum->aapcs_vfp_rcount);
6041 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
6042 for the behaviour of this function. */
6044 static bool
6045 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6046 const_tree type ATTRIBUTE_UNUSED)
6048 int rmode_size
6049 = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
6050 int shift = rmode_size / GET_MODE_SIZE (SFmode);
6051 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
6052 int regno;
6054 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
6055 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
6057 pcum->aapcs_vfp_reg_alloc = mask << regno;
6058 if (mode == BLKmode
6059 || (mode == TImode && ! TARGET_NEON)
6060 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
6062 int i;
6063 int rcount = pcum->aapcs_vfp_rcount;
6064 int rshift = shift;
6065 machine_mode rmode = pcum->aapcs_vfp_rmode;
6066 rtx par;
6067 if (!TARGET_NEON)
6069 /* Avoid using unsupported vector modes. */
6070 if (rmode == V2SImode)
6071 rmode = DImode;
6072 else if (rmode == V4SImode)
6074 rmode = DImode;
6075 rcount *= 2;
6076 rshift /= 2;
6079 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
6080 for (i = 0; i < rcount; i++)
6082 rtx tmp = gen_rtx_REG (rmode,
6083 FIRST_VFP_REGNUM + regno + i * rshift);
6084 tmp = gen_rtx_EXPR_LIST
6085 (VOIDmode, tmp,
6086 GEN_INT (i * GET_MODE_SIZE (rmode)));
6087 XVECEXP (par, 0, i) = tmp;
6090 pcum->aapcs_reg = par;
6092 else
6093 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
6094 return true;
6096 return false;
6099 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
6100 comment there for the behaviour of this function. */
6102 static rtx
6103 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
6104 machine_mode mode,
6105 const_tree type ATTRIBUTE_UNUSED)
6107 if (!use_vfp_abi (pcs_variant, false))
6108 return NULL;
6110 if (mode == BLKmode
6111 || (GET_MODE_CLASS (mode) == MODE_INT
6112 && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
6113 && !TARGET_NEON))
6115 int count;
6116 machine_mode ag_mode;
6117 int i;
6118 rtx par;
6119 int shift;
6121 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6122 &ag_mode, &count);
6124 if (!TARGET_NEON)
6126 if (ag_mode == V2SImode)
6127 ag_mode = DImode;
6128 else if (ag_mode == V4SImode)
6130 ag_mode = DImode;
6131 count *= 2;
6134 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
6135 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
6136 for (i = 0; i < count; i++)
6138 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
6139 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
6140 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
6141 XVECEXP (par, 0, i) = tmp;
6144 return par;
6147 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
6150 static void
6151 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
6152 machine_mode mode ATTRIBUTE_UNUSED,
6153 const_tree type ATTRIBUTE_UNUSED)
6155 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
6156 pcum->aapcs_vfp_reg_alloc = 0;
6157 return;
6160 #define AAPCS_CP(X) \
6162 aapcs_ ## X ## _cum_init, \
6163 aapcs_ ## X ## _is_call_candidate, \
6164 aapcs_ ## X ## _allocate, \
6165 aapcs_ ## X ## _is_return_candidate, \
6166 aapcs_ ## X ## _allocate_return_reg, \
6167 aapcs_ ## X ## _advance \
6170 /* Table of co-processors that can be used to pass arguments in
6171 registers. Idealy no arugment should be a candidate for more than
6172 one co-processor table entry, but the table is processed in order
6173 and stops after the first match. If that entry then fails to put
6174 the argument into a co-processor register, the argument will go on
6175 the stack. */
6176 static struct
6178 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
6179 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
6181 /* Return true if an argument of mode MODE (or type TYPE if MODE is
6182 BLKmode) is a candidate for this co-processor's registers; this
6183 function should ignore any position-dependent state in
6184 CUMULATIVE_ARGS and only use call-type dependent information. */
6185 bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6187 /* Return true if the argument does get a co-processor register; it
6188 should set aapcs_reg to an RTX of the register allocated as is
6189 required for a return from FUNCTION_ARG. */
6190 bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6192 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6193 be returned in this co-processor's registers. */
6194 bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6196 /* Allocate and return an RTX element to hold the return type of a call. This
6197 routine must not fail and will only be called if is_return_candidate
6198 returned true with the same parameters. */
6199 rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6201 /* Finish processing this argument and prepare to start processing
6202 the next one. */
6203 void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6204 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6206 AAPCS_CP(vfp)
6209 #undef AAPCS_CP
6211 static int
6212 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6213 const_tree type)
6215 int i;
6217 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6218 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6219 return i;
6221 return -1;
6224 static int
6225 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6227 /* We aren't passed a decl, so we can't check that a call is local.
6228 However, it isn't clear that that would be a win anyway, since it
6229 might limit some tail-calling opportunities. */
6230 enum arm_pcs pcs_variant;
6232 if (fntype)
6234 const_tree fndecl = NULL_TREE;
6236 if (TREE_CODE (fntype) == FUNCTION_DECL)
6238 fndecl = fntype;
6239 fntype = TREE_TYPE (fntype);
6242 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6244 else
6245 pcs_variant = arm_pcs_default;
6247 if (pcs_variant != ARM_PCS_AAPCS)
6249 int i;
6251 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6252 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6253 TYPE_MODE (type),
6254 type))
6255 return i;
6257 return -1;
6260 static rtx
6261 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6262 const_tree fntype)
6264 /* We aren't passed a decl, so we can't check that a call is local.
6265 However, it isn't clear that that would be a win anyway, since it
6266 might limit some tail-calling opportunities. */
6267 enum arm_pcs pcs_variant;
6268 int unsignedp ATTRIBUTE_UNUSED;
6270 if (fntype)
6272 const_tree fndecl = NULL_TREE;
6274 if (TREE_CODE (fntype) == FUNCTION_DECL)
6276 fndecl = fntype;
6277 fntype = TREE_TYPE (fntype);
6280 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6282 else
6283 pcs_variant = arm_pcs_default;
6285 /* Promote integer types. */
6286 if (type && INTEGRAL_TYPE_P (type))
6287 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
6289 if (pcs_variant != ARM_PCS_AAPCS)
6291 int i;
6293 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6294 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6295 type))
6296 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6297 mode, type);
6300 /* Promotes small structs returned in a register to full-word size
6301 for big-endian AAPCS. */
6302 if (type && arm_return_in_msb (type))
6304 HOST_WIDE_INT size = int_size_in_bytes (type);
6305 if (size % UNITS_PER_WORD != 0)
6307 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6308 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
6312 return gen_rtx_REG (mode, R0_REGNUM);
6315 static rtx
6316 aapcs_libcall_value (machine_mode mode)
6318 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6319 && GET_MODE_SIZE (mode) <= 4)
6320 mode = SImode;
6322 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
6325 /* Lay out a function argument using the AAPCS rules. The rule
6326 numbers referred to here are those in the AAPCS. */
6327 static void
6328 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
6329 const_tree type, bool named)
6331 int nregs, nregs2;
6332 int ncrn;
6334 /* We only need to do this once per argument. */
6335 if (pcum->aapcs_arg_processed)
6336 return;
6338 pcum->aapcs_arg_processed = true;
6340 /* Special case: if named is false then we are handling an incoming
6341 anonymous argument which is on the stack. */
6342 if (!named)
6343 return;
6345 /* Is this a potential co-processor register candidate? */
6346 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6348 int slot = aapcs_select_call_coproc (pcum, mode, type);
6349 pcum->aapcs_cprc_slot = slot;
6351 /* We don't have to apply any of the rules from part B of the
6352 preparation phase, these are handled elsewhere in the
6353 compiler. */
6355 if (slot >= 0)
6357 /* A Co-processor register candidate goes either in its own
6358 class of registers or on the stack. */
6359 if (!pcum->aapcs_cprc_failed[slot])
6361 /* C1.cp - Try to allocate the argument to co-processor
6362 registers. */
6363 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
6364 return;
6366 /* C2.cp - Put the argument on the stack and note that we
6367 can't assign any more candidates in this slot. We also
6368 need to note that we have allocated stack space, so that
6369 we won't later try to split a non-cprc candidate between
6370 core registers and the stack. */
6371 pcum->aapcs_cprc_failed[slot] = true;
6372 pcum->can_split = false;
6375 /* We didn't get a register, so this argument goes on the
6376 stack. */
6377 gcc_assert (pcum->can_split == false);
6378 return;
6382 /* C3 - For double-word aligned arguments, round the NCRN up to the
6383 next even number. */
6384 ncrn = pcum->aapcs_ncrn;
6385 if (ncrn & 1)
6387 int res = arm_needs_doubleword_align (mode, type);
6388 /* Only warn during RTL expansion of call stmts, otherwise we would
6389 warn e.g. during gimplification even on functions that will be
6390 always inlined, and we'd warn multiple times. Don't warn when
6391 called in expand_function_start either, as we warn instead in
6392 arm_function_arg_boundary in that case. */
6393 if (res < 0 && warn_psabi && currently_expanding_gimple_stmt)
6394 inform (input_location, "parameter passing for argument of type "
6395 "%qT changed in GCC 7.1", type);
6396 else if (res > 0)
6397 ncrn++;
6400 nregs = ARM_NUM_REGS2(mode, type);
6402 /* Sigh, this test should really assert that nregs > 0, but a GCC
6403 extension allows empty structs and then gives them empty size; it
6404 then allows such a structure to be passed by value. For some of
6405 the code below we have to pretend that such an argument has
6406 non-zero size so that we 'locate' it correctly either in
6407 registers or on the stack. */
6408 gcc_assert (nregs >= 0);
6410 nregs2 = nregs ? nregs : 1;
6412 /* C4 - Argument fits entirely in core registers. */
6413 if (ncrn + nregs2 <= NUM_ARG_REGS)
6415 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6416 pcum->aapcs_next_ncrn = ncrn + nregs;
6417 return;
6420 /* C5 - Some core registers left and there are no arguments already
6421 on the stack: split this argument between the remaining core
6422 registers and the stack. */
6423 if (ncrn < NUM_ARG_REGS && pcum->can_split)
6425 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6426 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6427 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
6428 return;
6431 /* C6 - NCRN is set to 4. */
6432 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6434 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
6435 return;
6438 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6439 for a call to a function whose data type is FNTYPE.
6440 For a library call, FNTYPE is NULL. */
6441 void
6442 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
6443 rtx libname,
6444 tree fndecl ATTRIBUTE_UNUSED)
6446 /* Long call handling. */
6447 if (fntype)
6448 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
6449 else
6450 pcum->pcs_variant = arm_pcs_default;
6452 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6454 if (arm_libcall_uses_aapcs_base (libname))
6455 pcum->pcs_variant = ARM_PCS_AAPCS;
6457 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
6458 pcum->aapcs_reg = NULL_RTX;
6459 pcum->aapcs_partial = 0;
6460 pcum->aapcs_arg_processed = false;
6461 pcum->aapcs_cprc_slot = -1;
6462 pcum->can_split = true;
6464 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6466 int i;
6468 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6470 pcum->aapcs_cprc_failed[i] = false;
6471 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
6474 return;
6477 /* Legacy ABIs */
6479 /* On the ARM, the offset starts at 0. */
6480 pcum->nregs = 0;
6481 pcum->iwmmxt_nregs = 0;
6482 pcum->can_split = true;
6484 /* Varargs vectors are treated the same as long long.
6485 named_count avoids having to change the way arm handles 'named' */
6486 pcum->named_count = 0;
6487 pcum->nargs = 0;
6489 if (TARGET_REALLY_IWMMXT && fntype)
6491 tree fn_arg;
6493 for (fn_arg = TYPE_ARG_TYPES (fntype);
6494 fn_arg;
6495 fn_arg = TREE_CHAIN (fn_arg))
6496 pcum->named_count += 1;
6498 if (! pcum->named_count)
6499 pcum->named_count = INT_MAX;
6503 /* Return 1 if double word alignment is required for argument passing.
6504 Return -1 if double word alignment used to be required for argument
6505 passing before PR77728 ABI fix, but is not required anymore.
6506 Return 0 if double word alignment is not required and wasn't requried
6507 before either. */
6508 static int
6509 arm_needs_doubleword_align (machine_mode mode, const_tree type)
6511 if (!type)
6512 return GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY;
6514 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
6515 if (!AGGREGATE_TYPE_P (type))
6516 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
6518 /* Array types: Use member alignment of element type. */
6519 if (TREE_CODE (type) == ARRAY_TYPE)
6520 return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
6522 int ret = 0;
6523 /* Record/aggregate types: Use greatest member alignment of any member. */
6524 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6525 if (DECL_ALIGN (field) > PARM_BOUNDARY)
6527 if (TREE_CODE (field) == FIELD_DECL)
6528 return 1;
6529 else
6530 /* Before PR77728 fix, we were incorrectly considering also
6531 other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
6532 Make sure we can warn about that with -Wpsabi. */
6533 ret = -1;
6536 return ret;
6540 /* Determine where to put an argument to a function.
6541 Value is zero to push the argument on the stack,
6542 or a hard register in which to store the argument.
6544 MODE is the argument's machine mode.
6545 TYPE is the data type of the argument (as a tree).
6546 This is null for libcalls where that information may
6547 not be available.
6548 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6549 the preceding args and about the function being called.
6550 NAMED is nonzero if this argument is a named parameter
6551 (otherwise it is an extra parameter matching an ellipsis).
6553 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6554 other arguments are passed on the stack. If (NAMED == 0) (which happens
6555 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6556 defined), say it is passed in the stack (function_prologue will
6557 indeed make it pass in the stack if necessary). */
6559 static rtx
6560 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
6561 const_tree type, bool named)
6563 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6564 int nregs;
6566 /* Handle the special case quickly. Pick an arbitrary value for op2 of
6567 a call insn (op3 of a call_value insn). */
6568 if (mode == VOIDmode)
6569 return const0_rtx;
6571 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6573 aapcs_layout_arg (pcum, mode, type, named);
6574 return pcum->aapcs_reg;
6577 /* Varargs vectors are treated the same as long long.
6578 named_count avoids having to change the way arm handles 'named' */
6579 if (TARGET_IWMMXT_ABI
6580 && arm_vector_mode_supported_p (mode)
6581 && pcum->named_count > pcum->nargs + 1)
6583 if (pcum->iwmmxt_nregs <= 9)
6584 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
6585 else
6587 pcum->can_split = false;
6588 return NULL_RTX;
6592 /* Put doubleword aligned quantities in even register pairs. */
6593 if ((pcum->nregs & 1) && ARM_DOUBLEWORD_ALIGN)
6595 int res = arm_needs_doubleword_align (mode, type);
6596 if (res < 0 && warn_psabi)
6597 inform (input_location, "parameter passing for argument of type "
6598 "%qT changed in GCC 7.1", type);
6599 else if (res > 0)
6600 pcum->nregs++;
6603 /* Only allow splitting an arg between regs and memory if all preceding
6604 args were allocated to regs. For args passed by reference we only count
6605 the reference pointer. */
6606 if (pcum->can_split)
6607 nregs = 1;
6608 else
6609 nregs = ARM_NUM_REGS2 (mode, type);
6611 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
6612 return NULL_RTX;
6614 return gen_rtx_REG (mode, pcum->nregs);
6617 static unsigned int
6618 arm_function_arg_boundary (machine_mode mode, const_tree type)
6620 if (!ARM_DOUBLEWORD_ALIGN)
6621 return PARM_BOUNDARY;
6623 int res = arm_needs_doubleword_align (mode, type);
6624 if (res < 0 && warn_psabi)
6625 inform (input_location, "parameter passing for argument of type %qT "
6626 "changed in GCC 7.1", type);
6628 return res > 0 ? DOUBLEWORD_ALIGNMENT : PARM_BOUNDARY;
6631 static int
6632 arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
6633 tree type, bool named)
6635 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6636 int nregs = pcum->nregs;
6638 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6640 aapcs_layout_arg (pcum, mode, type, named);
6641 return pcum->aapcs_partial;
6644 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6645 return 0;
6647 if (NUM_ARG_REGS > nregs
6648 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6649 && pcum->can_split)
6650 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6652 return 0;
6655 /* Update the data in PCUM to advance over an argument
6656 of mode MODE and data type TYPE.
6657 (TYPE is null for libcalls where that information may not be available.) */
6659 static void
6660 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6661 const_tree type, bool named)
6663 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6665 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6667 aapcs_layout_arg (pcum, mode, type, named);
6669 if (pcum->aapcs_cprc_slot >= 0)
6671 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6672 type);
6673 pcum->aapcs_cprc_slot = -1;
6676 /* Generic stuff. */
6677 pcum->aapcs_arg_processed = false;
6678 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6679 pcum->aapcs_reg = NULL_RTX;
6680 pcum->aapcs_partial = 0;
6682 else
6684 pcum->nargs += 1;
6685 if (arm_vector_mode_supported_p (mode)
6686 && pcum->named_count > pcum->nargs
6687 && TARGET_IWMMXT_ABI)
6688 pcum->iwmmxt_nregs += 1;
6689 else
6690 pcum->nregs += ARM_NUM_REGS2 (mode, type);
6694 /* Variable sized types are passed by reference. This is a GCC
6695 extension to the ARM ABI. */
6697 static bool
6698 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6699 machine_mode mode ATTRIBUTE_UNUSED,
6700 const_tree type, bool named ATTRIBUTE_UNUSED)
6702 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6705 /* Encode the current state of the #pragma [no_]long_calls. */
6706 typedef enum
6708 OFF, /* No #pragma [no_]long_calls is in effect. */
6709 LONG, /* #pragma long_calls is in effect. */
6710 SHORT /* #pragma no_long_calls is in effect. */
6711 } arm_pragma_enum;
6713 static arm_pragma_enum arm_pragma_long_calls = OFF;
6715 void
6716 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6718 arm_pragma_long_calls = LONG;
6721 void
6722 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6724 arm_pragma_long_calls = SHORT;
6727 void
6728 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6730 arm_pragma_long_calls = OFF;
6733 /* Handle an attribute requiring a FUNCTION_DECL;
6734 arguments as in struct attribute_spec.handler. */
6735 static tree
6736 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6737 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6739 if (TREE_CODE (*node) != FUNCTION_DECL)
6741 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6742 name);
6743 *no_add_attrs = true;
6746 return NULL_TREE;
6749 /* Handle an "interrupt" or "isr" attribute;
6750 arguments as in struct attribute_spec.handler. */
6751 static tree
6752 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6753 bool *no_add_attrs)
6755 if (DECL_P (*node))
6757 if (TREE_CODE (*node) != FUNCTION_DECL)
6759 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6760 name);
6761 *no_add_attrs = true;
6763 /* FIXME: the argument if any is checked for type attributes;
6764 should it be checked for decl ones? */
6766 else
6768 if (TREE_CODE (*node) == FUNCTION_TYPE
6769 || TREE_CODE (*node) == METHOD_TYPE)
6771 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6773 warning (OPT_Wattributes, "%qE attribute ignored",
6774 name);
6775 *no_add_attrs = true;
6778 else if (TREE_CODE (*node) == POINTER_TYPE
6779 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6780 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6781 && arm_isr_value (args) != ARM_FT_UNKNOWN)
6783 *node = build_variant_type_copy (*node);
6784 TREE_TYPE (*node) = build_type_attribute_variant
6785 (TREE_TYPE (*node),
6786 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6787 *no_add_attrs = true;
6789 else
6791 /* Possibly pass this attribute on from the type to a decl. */
6792 if (flags & ((int) ATTR_FLAG_DECL_NEXT
6793 | (int) ATTR_FLAG_FUNCTION_NEXT
6794 | (int) ATTR_FLAG_ARRAY_NEXT))
6796 *no_add_attrs = true;
6797 return tree_cons (name, args, NULL_TREE);
6799 else
6801 warning (OPT_Wattributes, "%qE attribute ignored",
6802 name);
6807 return NULL_TREE;
6810 /* Handle a "pcs" attribute; arguments as in struct
6811 attribute_spec.handler. */
6812 static tree
6813 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6814 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6816 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6818 warning (OPT_Wattributes, "%qE attribute ignored", name);
6819 *no_add_attrs = true;
6821 return NULL_TREE;
6824 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6825 /* Handle the "notshared" attribute. This attribute is another way of
6826 requesting hidden visibility. ARM's compiler supports
6827 "__declspec(notshared)"; we support the same thing via an
6828 attribute. */
6830 static tree
6831 arm_handle_notshared_attribute (tree *node,
6832 tree name ATTRIBUTE_UNUSED,
6833 tree args ATTRIBUTE_UNUSED,
6834 int flags ATTRIBUTE_UNUSED,
6835 bool *no_add_attrs)
6837 tree decl = TYPE_NAME (*node);
6839 if (decl)
6841 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6842 DECL_VISIBILITY_SPECIFIED (decl) = 1;
6843 *no_add_attrs = false;
6845 return NULL_TREE;
6847 #endif
6849 /* This function returns true if a function with declaration FNDECL and type
6850 FNTYPE uses the stack to pass arguments or return variables and false
6851 otherwise. This is used for functions with the attributes
6852 'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
6853 diagnostic messages if the stack is used. NAME is the name of the attribute
6854 used. */
6856 static bool
6857 cmse_func_args_or_return_in_stack (tree fndecl, tree name, tree fntype)
6859 function_args_iterator args_iter;
6860 CUMULATIVE_ARGS args_so_far_v;
6861 cumulative_args_t args_so_far;
6862 bool first_param = true;
6863 tree arg_type, prev_arg_type = NULL_TREE, ret_type;
6865 /* Error out if any argument is passed on the stack. */
6866 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
6867 args_so_far = pack_cumulative_args (&args_so_far_v);
6868 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
6870 rtx arg_rtx;
6871 machine_mode arg_mode = TYPE_MODE (arg_type);
6873 prev_arg_type = arg_type;
6874 if (VOID_TYPE_P (arg_type))
6875 continue;
6877 if (!first_param)
6878 arm_function_arg_advance (args_so_far, arg_mode, arg_type, true);
6879 arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type, true);
6880 if (!arg_rtx
6881 || arm_arg_partial_bytes (args_so_far, arg_mode, arg_type, true))
6883 error ("%qE attribute not available to functions with arguments "
6884 "passed on the stack", name);
6885 return true;
6887 first_param = false;
6890 /* Error out for variadic functions since we cannot control how many
6891 arguments will be passed and thus stack could be used. stdarg_p () is not
6892 used for the checking to avoid browsing arguments twice. */
6893 if (prev_arg_type != NULL_TREE && !VOID_TYPE_P (prev_arg_type))
6895 error ("%qE attribute not available to functions with variable number "
6896 "of arguments", name);
6897 return true;
6900 /* Error out if return value is passed on the stack. */
6901 ret_type = TREE_TYPE (fntype);
6902 if (arm_return_in_memory (ret_type, fntype))
6904 error ("%qE attribute not available to functions that return value on "
6905 "the stack", name);
6906 return true;
6908 return false;
6911 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
6912 function will check whether the attribute is allowed here and will add the
6913 attribute to the function declaration tree or otherwise issue a warning. */
6915 static tree
6916 arm_handle_cmse_nonsecure_entry (tree *node, tree name,
6917 tree /* args */,
6918 int /* flags */,
6919 bool *no_add_attrs)
6921 tree fndecl;
6923 if (!use_cmse)
6925 *no_add_attrs = true;
6926 warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
6927 name);
6928 return NULL_TREE;
6931 /* Ignore attribute for function types. */
6932 if (TREE_CODE (*node) != FUNCTION_DECL)
6934 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6935 name);
6936 *no_add_attrs = true;
6937 return NULL_TREE;
6940 fndecl = *node;
6942 /* Warn for static linkage functions. */
6943 if (!TREE_PUBLIC (fndecl))
6945 warning (OPT_Wattributes, "%qE attribute has no effect on functions "
6946 "with static linkage", name);
6947 *no_add_attrs = true;
6948 return NULL_TREE;
6951 *no_add_attrs |= cmse_func_args_or_return_in_stack (fndecl, name,
6952 TREE_TYPE (fndecl));
6953 return NULL_TREE;
6957 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
6958 function will check whether the attribute is allowed here and will add the
6959 attribute to the function type tree or otherwise issue a diagnostic. The
6960 reason we check this at declaration time is to only allow the use of the
6961 attribute with declarations of function pointers and not function
6962 declarations. This function checks NODE is of the expected type and issues
6963 diagnostics otherwise using NAME. If it is not of the expected type
6964 *NO_ADD_ATTRS will be set to true. */
6966 static tree
6967 arm_handle_cmse_nonsecure_call (tree *node, tree name,
6968 tree /* args */,
6969 int /* flags */,
6970 bool *no_add_attrs)
6972 tree decl = NULL_TREE, fntype = NULL_TREE;
6973 tree type;
6975 if (!use_cmse)
6977 *no_add_attrs = true;
6978 warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
6979 name);
6980 return NULL_TREE;
6983 if (TREE_CODE (*node) == VAR_DECL || TREE_CODE (*node) == TYPE_DECL)
6985 decl = *node;
6986 fntype = TREE_TYPE (decl);
6989 while (fntype != NULL_TREE && TREE_CODE (fntype) == POINTER_TYPE)
6990 fntype = TREE_TYPE (fntype);
6992 if (!decl || TREE_CODE (fntype) != FUNCTION_TYPE)
6994 warning (OPT_Wattributes, "%qE attribute only applies to base type of a "
6995 "function pointer", name);
6996 *no_add_attrs = true;
6997 return NULL_TREE;
7000 *no_add_attrs |= cmse_func_args_or_return_in_stack (NULL, name, fntype);
7002 if (*no_add_attrs)
7003 return NULL_TREE;
7005 /* Prevent trees being shared among function types with and without
7006 cmse_nonsecure_call attribute. */
7007 type = TREE_TYPE (decl);
7009 type = build_distinct_type_copy (type);
7010 TREE_TYPE (decl) = type;
7011 fntype = type;
7013 while (TREE_CODE (fntype) != FUNCTION_TYPE)
7015 type = fntype;
7016 fntype = TREE_TYPE (fntype);
7017 fntype = build_distinct_type_copy (fntype);
7018 TREE_TYPE (type) = fntype;
7021 /* Construct a type attribute and add it to the function type. */
7022 tree attrs = tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE,
7023 TYPE_ATTRIBUTES (fntype));
7024 TYPE_ATTRIBUTES (fntype) = attrs;
7025 return NULL_TREE;
7028 /* Return 0 if the attributes for two types are incompatible, 1 if they
7029 are compatible, and 2 if they are nearly compatible (which causes a
7030 warning to be generated). */
7031 static int
7032 arm_comp_type_attributes (const_tree type1, const_tree type2)
7034 int l1, l2, s1, s2;
7036 /* Check for mismatch of non-default calling convention. */
7037 if (TREE_CODE (type1) != FUNCTION_TYPE)
7038 return 1;
7040 /* Check for mismatched call attributes. */
7041 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
7042 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
7043 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
7044 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
7046 /* Only bother to check if an attribute is defined. */
7047 if (l1 | l2 | s1 | s2)
7049 /* If one type has an attribute, the other must have the same attribute. */
7050 if ((l1 != l2) || (s1 != s2))
7051 return 0;
7053 /* Disallow mixed attributes. */
7054 if ((l1 & s2) || (l2 & s1))
7055 return 0;
7058 /* Check for mismatched ISR attribute. */
7059 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
7060 if (! l1)
7061 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
7062 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
7063 if (! l2)
7064 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
7065 if (l1 != l2)
7066 return 0;
7068 l1 = lookup_attribute ("cmse_nonsecure_call",
7069 TYPE_ATTRIBUTES (type1)) != NULL;
7070 l2 = lookup_attribute ("cmse_nonsecure_call",
7071 TYPE_ATTRIBUTES (type2)) != NULL;
7073 if (l1 != l2)
7074 return 0;
7076 return 1;
7079 /* Assigns default attributes to newly defined type. This is used to
7080 set short_call/long_call attributes for function types of
7081 functions defined inside corresponding #pragma scopes. */
7082 static void
7083 arm_set_default_type_attributes (tree type)
7085 /* Add __attribute__ ((long_call)) to all functions, when
7086 inside #pragma long_calls or __attribute__ ((short_call)),
7087 when inside #pragma no_long_calls. */
7088 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
7090 tree type_attr_list, attr_name;
7091 type_attr_list = TYPE_ATTRIBUTES (type);
7093 if (arm_pragma_long_calls == LONG)
7094 attr_name = get_identifier ("long_call");
7095 else if (arm_pragma_long_calls == SHORT)
7096 attr_name = get_identifier ("short_call");
7097 else
7098 return;
7100 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
7101 TYPE_ATTRIBUTES (type) = type_attr_list;
7105 /* Return true if DECL is known to be linked into section SECTION. */
7107 static bool
7108 arm_function_in_section_p (tree decl, section *section)
7110 /* We can only be certain about the prevailing symbol definition. */
7111 if (!decl_binds_to_current_def_p (decl))
7112 return false;
7114 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
7115 if (!DECL_SECTION_NAME (decl))
7117 /* Make sure that we will not create a unique section for DECL. */
7118 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
7119 return false;
7122 return function_section (decl) == section;
7125 /* Return nonzero if a 32-bit "long_call" should be generated for
7126 a call from the current function to DECL. We generate a long_call
7127 if the function:
7129 a. has an __attribute__((long call))
7130 or b. is within the scope of a #pragma long_calls
7131 or c. the -mlong-calls command line switch has been specified
7133 However we do not generate a long call if the function:
7135 d. has an __attribute__ ((short_call))
7136 or e. is inside the scope of a #pragma no_long_calls
7137 or f. is defined in the same section as the current function. */
7139 bool
7140 arm_is_long_call_p (tree decl)
7142 tree attrs;
7144 if (!decl)
7145 return TARGET_LONG_CALLS;
7147 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
7148 if (lookup_attribute ("short_call", attrs))
7149 return false;
7151 /* For "f", be conservative, and only cater for cases in which the
7152 whole of the current function is placed in the same section. */
7153 if (!flag_reorder_blocks_and_partition
7154 && TREE_CODE (decl) == FUNCTION_DECL
7155 && arm_function_in_section_p (decl, current_function_section ()))
7156 return false;
7158 if (lookup_attribute ("long_call", attrs))
7159 return true;
7161 return TARGET_LONG_CALLS;
7164 /* Return nonzero if it is ok to make a tail-call to DECL. */
7165 static bool
7166 arm_function_ok_for_sibcall (tree decl, tree exp)
7168 unsigned long func_type;
7170 if (cfun->machine->sibcall_blocked)
7171 return false;
7173 /* Never tailcall something if we are generating code for Thumb-1. */
7174 if (TARGET_THUMB1)
7175 return false;
7177 /* The PIC register is live on entry to VxWorks PLT entries, so we
7178 must make the call before restoring the PIC register. */
7179 if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
7180 return false;
7182 /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7183 may be used both as target of the call and base register for restoring
7184 the VFP registers */
7185 if (TARGET_APCS_FRAME && TARGET_ARM
7186 && TARGET_HARD_FLOAT
7187 && decl && arm_is_long_call_p (decl))
7188 return false;
7190 /* If we are interworking and the function is not declared static
7191 then we can't tail-call it unless we know that it exists in this
7192 compilation unit (since it might be a Thumb routine). */
7193 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
7194 && !TREE_ASM_WRITTEN (decl))
7195 return false;
7197 func_type = arm_current_func_type ();
7198 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
7199 if (IS_INTERRUPT (func_type))
7200 return false;
7202 /* ARMv8-M non-secure entry functions need to return with bxns which is only
7203 generated for entry functions themselves. */
7204 if (IS_CMSE_ENTRY (arm_current_func_type ()))
7205 return false;
7207 /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7208 this would complicate matters for later code generation. */
7209 if (TREE_CODE (exp) == CALL_EXPR)
7211 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7212 if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype)))
7213 return false;
7216 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
7218 /* Check that the return value locations are the same. For
7219 example that we aren't returning a value from the sibling in
7220 a VFP register but then need to transfer it to a core
7221 register. */
7222 rtx a, b;
7223 tree decl_or_type = decl;
7225 /* If it is an indirect function pointer, get the function type. */
7226 if (!decl)
7227 decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7229 a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
7230 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
7231 cfun->decl, false);
7232 if (!rtx_equal_p (a, b))
7233 return false;
7236 /* Never tailcall if function may be called with a misaligned SP. */
7237 if (IS_STACKALIGN (func_type))
7238 return false;
7240 /* The AAPCS says that, on bare-metal, calls to unresolved weak
7241 references should become a NOP. Don't convert such calls into
7242 sibling calls. */
7243 if (TARGET_AAPCS_BASED
7244 && arm_abi == ARM_ABI_AAPCS
7245 && decl
7246 && DECL_WEAK (decl))
7247 return false;
7249 /* We cannot do a tailcall for an indirect call by descriptor if all the
7250 argument registers are used because the only register left to load the
7251 address is IP and it will already contain the static chain. */
7252 if (!decl && CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines)
7254 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7255 CUMULATIVE_ARGS cum;
7256 cumulative_args_t cum_v;
7258 arm_init_cumulative_args (&cum, fntype, NULL_RTX, NULL_TREE);
7259 cum_v = pack_cumulative_args (&cum);
7261 for (tree t = TYPE_ARG_TYPES (fntype); t; t = TREE_CHAIN (t))
7263 tree type = TREE_VALUE (t);
7264 if (!VOID_TYPE_P (type))
7265 arm_function_arg_advance (cum_v, TYPE_MODE (type), type, true);
7268 if (!arm_function_arg (cum_v, SImode, integer_type_node, true))
7269 return false;
7272 /* Everything else is ok. */
7273 return true;
7277 /* Addressing mode support functions. */
7279 /* Return nonzero if X is a legitimate immediate operand when compiling
7280 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
7282 legitimate_pic_operand_p (rtx x)
7284 if (GET_CODE (x) == SYMBOL_REF
7285 || (GET_CODE (x) == CONST
7286 && GET_CODE (XEXP (x, 0)) == PLUS
7287 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
7288 return 0;
7290 return 1;
7293 /* Record that the current function needs a PIC register. Initialize
7294 cfun->machine->pic_reg if we have not already done so. */
7296 static void
7297 require_pic_register (void)
7299 /* A lot of the logic here is made obscure by the fact that this
7300 routine gets called as part of the rtx cost estimation process.
7301 We don't want those calls to affect any assumptions about the real
7302 function; and further, we can't call entry_of_function() until we
7303 start the real expansion process. */
7304 if (!crtl->uses_pic_offset_table)
7306 gcc_assert (can_create_pseudo_p ());
7307 if (arm_pic_register != INVALID_REGNUM
7308 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
7310 if (!cfun->machine->pic_reg)
7311 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
7313 /* Play games to avoid marking the function as needing pic
7314 if we are being called as part of the cost-estimation
7315 process. */
7316 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7317 crtl->uses_pic_offset_table = 1;
7319 else
7321 rtx_insn *seq, *insn;
7323 if (!cfun->machine->pic_reg)
7324 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
7326 /* Play games to avoid marking the function as needing pic
7327 if we are being called as part of the cost-estimation
7328 process. */
7329 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7331 crtl->uses_pic_offset_table = 1;
7332 start_sequence ();
7334 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
7335 && arm_pic_register > LAST_LO_REGNUM)
7336 emit_move_insn (cfun->machine->pic_reg,
7337 gen_rtx_REG (Pmode, arm_pic_register));
7338 else
7339 arm_load_pic_register (0UL);
7341 seq = get_insns ();
7342 end_sequence ();
7344 for (insn = seq; insn; insn = NEXT_INSN (insn))
7345 if (INSN_P (insn))
7346 INSN_LOCATION (insn) = prologue_location;
7348 /* We can be called during expansion of PHI nodes, where
7349 we can't yet emit instructions directly in the final
7350 insn stream. Queue the insns on the entry edge, they will
7351 be committed after everything else is expanded. */
7352 insert_insn_on_edge (seq,
7353 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
7360 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
7362 if (GET_CODE (orig) == SYMBOL_REF
7363 || GET_CODE (orig) == LABEL_REF)
7365 if (reg == 0)
7367 gcc_assert (can_create_pseudo_p ());
7368 reg = gen_reg_rtx (Pmode);
7371 /* VxWorks does not impose a fixed gap between segments; the run-time
7372 gap can be different from the object-file gap. We therefore can't
7373 use GOTOFF unless we are absolutely sure that the symbol is in the
7374 same segment as the GOT. Unfortunately, the flexibility of linker
7375 scripts means that we can't be sure of that in general, so assume
7376 that GOTOFF is never valid on VxWorks. */
7377 /* References to weak symbols cannot be resolved locally: they
7378 may be overridden by a non-weak definition at link time. */
7379 rtx_insn *insn;
7380 if ((GET_CODE (orig) == LABEL_REF
7381 || (GET_CODE (orig) == SYMBOL_REF
7382 && SYMBOL_REF_LOCAL_P (orig)
7383 && (SYMBOL_REF_DECL (orig)
7384 ? !DECL_WEAK (SYMBOL_REF_DECL (orig)) : 1)))
7385 && NEED_GOT_RELOC
7386 && arm_pic_data_is_text_relative)
7387 insn = arm_pic_static_addr (orig, reg);
7388 else
7390 rtx pat;
7391 rtx mem;
7393 /* If this function doesn't have a pic register, create one now. */
7394 require_pic_register ();
7396 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
7398 /* Make the MEM as close to a constant as possible. */
7399 mem = SET_SRC (pat);
7400 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
7401 MEM_READONLY_P (mem) = 1;
7402 MEM_NOTRAP_P (mem) = 1;
7404 insn = emit_insn (pat);
7407 /* Put a REG_EQUAL note on this insn, so that it can be optimized
7408 by loop. */
7409 set_unique_reg_note (insn, REG_EQUAL, orig);
7411 return reg;
7413 else if (GET_CODE (orig) == CONST)
7415 rtx base, offset;
7417 if (GET_CODE (XEXP (orig, 0)) == PLUS
7418 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
7419 return orig;
7421 /* Handle the case where we have: const (UNSPEC_TLS). */
7422 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
7423 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
7424 return orig;
7426 /* Handle the case where we have:
7427 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
7428 CONST_INT. */
7429 if (GET_CODE (XEXP (orig, 0)) == PLUS
7430 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
7431 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
7433 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
7434 return orig;
7437 if (reg == 0)
7439 gcc_assert (can_create_pseudo_p ());
7440 reg = gen_reg_rtx (Pmode);
7443 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
7445 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
7446 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
7447 base == reg ? 0 : reg);
7449 if (CONST_INT_P (offset))
7451 /* The base register doesn't really matter, we only want to
7452 test the index for the appropriate mode. */
7453 if (!arm_legitimate_index_p (mode, offset, SET, 0))
7455 gcc_assert (can_create_pseudo_p ());
7456 offset = force_reg (Pmode, offset);
7459 if (CONST_INT_P (offset))
7460 return plus_constant (Pmode, base, INTVAL (offset));
7463 if (GET_MODE_SIZE (mode) > 4
7464 && (GET_MODE_CLASS (mode) == MODE_INT
7465 || TARGET_SOFT_FLOAT))
7467 emit_insn (gen_addsi3 (reg, base, offset));
7468 return reg;
7471 return gen_rtx_PLUS (Pmode, base, offset);
7474 return orig;
7478 /* Find a spare register to use during the prolog of a function. */
7480 static int
7481 thumb_find_work_register (unsigned long pushed_regs_mask)
7483 int reg;
7485 /* Check the argument registers first as these are call-used. The
7486 register allocation order means that sometimes r3 might be used
7487 but earlier argument registers might not, so check them all. */
7488 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
7489 if (!df_regs_ever_live_p (reg))
7490 return reg;
7492 /* Before going on to check the call-saved registers we can try a couple
7493 more ways of deducing that r3 is available. The first is when we are
7494 pushing anonymous arguments onto the stack and we have less than 4
7495 registers worth of fixed arguments(*). In this case r3 will be part of
7496 the variable argument list and so we can be sure that it will be
7497 pushed right at the start of the function. Hence it will be available
7498 for the rest of the prologue.
7499 (*): ie crtl->args.pretend_args_size is greater than 0. */
7500 if (cfun->machine->uses_anonymous_args
7501 && crtl->args.pretend_args_size > 0)
7502 return LAST_ARG_REGNUM;
7504 /* The other case is when we have fixed arguments but less than 4 registers
7505 worth. In this case r3 might be used in the body of the function, but
7506 it is not being used to convey an argument into the function. In theory
7507 we could just check crtl->args.size to see how many bytes are
7508 being passed in argument registers, but it seems that it is unreliable.
7509 Sometimes it will have the value 0 when in fact arguments are being
7510 passed. (See testcase execute/20021111-1.c for an example). So we also
7511 check the args_info.nregs field as well. The problem with this field is
7512 that it makes no allowances for arguments that are passed to the
7513 function but which are not used. Hence we could miss an opportunity
7514 when a function has an unused argument in r3. But it is better to be
7515 safe than to be sorry. */
7516 if (! cfun->machine->uses_anonymous_args
7517 && crtl->args.size >= 0
7518 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
7519 && (TARGET_AAPCS_BASED
7520 ? crtl->args.info.aapcs_ncrn < 4
7521 : crtl->args.info.nregs < 4))
7522 return LAST_ARG_REGNUM;
7524 /* Otherwise look for a call-saved register that is going to be pushed. */
7525 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
7526 if (pushed_regs_mask & (1 << reg))
7527 return reg;
7529 if (TARGET_THUMB2)
7531 /* Thumb-2 can use high regs. */
7532 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
7533 if (pushed_regs_mask & (1 << reg))
7534 return reg;
7536 /* Something went wrong - thumb_compute_save_reg_mask()
7537 should have arranged for a suitable register to be pushed. */
7538 gcc_unreachable ();
7541 static GTY(()) int pic_labelno;
7543 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
7544 low register. */
7546 void
7547 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
7549 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
7551 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
7552 return;
7554 gcc_assert (flag_pic);
7556 pic_reg = cfun->machine->pic_reg;
7557 if (TARGET_VXWORKS_RTP)
7559 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
7560 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7561 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
7563 emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
7565 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7566 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
7568 else
7570 /* We use an UNSPEC rather than a LABEL_REF because this label
7571 never appears in the code stream. */
7573 labelno = GEN_INT (pic_labelno++);
7574 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7575 l1 = gen_rtx_CONST (VOIDmode, l1);
7577 /* On the ARM the PC register contains 'dot + 8' at the time of the
7578 addition, on the Thumb it is 'dot + 4'. */
7579 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7580 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
7581 UNSPEC_GOTSYM_OFF);
7582 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7584 if (TARGET_32BIT)
7586 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7588 else /* TARGET_THUMB1 */
7590 if (arm_pic_register != INVALID_REGNUM
7591 && REGNO (pic_reg) > LAST_LO_REGNUM)
7593 /* We will have pushed the pic register, so we should always be
7594 able to find a work register. */
7595 pic_tmp = gen_rtx_REG (SImode,
7596 thumb_find_work_register (saved_regs));
7597 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
7598 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
7599 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
7601 else if (arm_pic_register != INVALID_REGNUM
7602 && arm_pic_register > LAST_LO_REGNUM
7603 && REGNO (pic_reg) <= LAST_LO_REGNUM)
7605 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7606 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
7607 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
7609 else
7610 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7614 /* Need to emit this whether or not we obey regdecls,
7615 since setjmp/longjmp can cause life info to screw up. */
7616 emit_use (pic_reg);
7619 /* Generate code to load the address of a static var when flag_pic is set. */
7620 static rtx_insn *
7621 arm_pic_static_addr (rtx orig, rtx reg)
7623 rtx l1, labelno, offset_rtx;
7625 gcc_assert (flag_pic);
7627 /* We use an UNSPEC rather than a LABEL_REF because this label
7628 never appears in the code stream. */
7629 labelno = GEN_INT (pic_labelno++);
7630 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7631 l1 = gen_rtx_CONST (VOIDmode, l1);
7633 /* On the ARM the PC register contains 'dot + 8' at the time of the
7634 addition, on the Thumb it is 'dot + 4'. */
7635 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7636 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
7637 UNSPEC_SYMBOL_OFFSET);
7638 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
7640 return emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
7643 /* Return nonzero if X is valid as an ARM state addressing register. */
7644 static int
7645 arm_address_register_rtx_p (rtx x, int strict_p)
7647 int regno;
7649 if (!REG_P (x))
7650 return 0;
7652 regno = REGNO (x);
7654 if (strict_p)
7655 return ARM_REGNO_OK_FOR_BASE_P (regno);
7657 return (regno <= LAST_ARM_REGNUM
7658 || regno >= FIRST_PSEUDO_REGISTER
7659 || regno == FRAME_POINTER_REGNUM
7660 || regno == ARG_POINTER_REGNUM);
7663 /* Return TRUE if this rtx is the difference of a symbol and a label,
7664 and will reduce to a PC-relative relocation in the object file.
7665 Expressions like this can be left alone when generating PIC, rather
7666 than forced through the GOT. */
7667 static int
7668 pcrel_constant_p (rtx x)
7670 if (GET_CODE (x) == MINUS)
7671 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
7673 return FALSE;
7676 /* Return true if X will surely end up in an index register after next
7677 splitting pass. */
7678 static bool
7679 will_be_in_index_register (const_rtx x)
7681 /* arm.md: calculate_pic_address will split this into a register. */
7682 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
7685 /* Return nonzero if X is a valid ARM state address operand. */
7687 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
7688 int strict_p)
7690 bool use_ldrd;
7691 enum rtx_code code = GET_CODE (x);
7693 if (arm_address_register_rtx_p (x, strict_p))
7694 return 1;
7696 use_ldrd = (TARGET_LDRD
7697 && (mode == DImode || mode == DFmode));
7699 if (code == POST_INC || code == PRE_DEC
7700 || ((code == PRE_INC || code == POST_DEC)
7701 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7702 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7704 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7705 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7706 && GET_CODE (XEXP (x, 1)) == PLUS
7707 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7709 rtx addend = XEXP (XEXP (x, 1), 1);
7711 /* Don't allow ldrd post increment by register because it's hard
7712 to fixup invalid register choices. */
7713 if (use_ldrd
7714 && GET_CODE (x) == POST_MODIFY
7715 && REG_P (addend))
7716 return 0;
7718 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
7719 && arm_legitimate_index_p (mode, addend, outer, strict_p));
7722 /* After reload constants split into minipools will have addresses
7723 from a LABEL_REF. */
7724 else if (reload_completed
7725 && (code == LABEL_REF
7726 || (code == CONST
7727 && GET_CODE (XEXP (x, 0)) == PLUS
7728 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7729 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7730 return 1;
7732 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7733 return 0;
7735 else if (code == PLUS)
7737 rtx xop0 = XEXP (x, 0);
7738 rtx xop1 = XEXP (x, 1);
7740 return ((arm_address_register_rtx_p (xop0, strict_p)
7741 && ((CONST_INT_P (xop1)
7742 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
7743 || (!strict_p && will_be_in_index_register (xop1))))
7744 || (arm_address_register_rtx_p (xop1, strict_p)
7745 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
7748 #if 0
7749 /* Reload currently can't handle MINUS, so disable this for now */
7750 else if (GET_CODE (x) == MINUS)
7752 rtx xop0 = XEXP (x, 0);
7753 rtx xop1 = XEXP (x, 1);
7755 return (arm_address_register_rtx_p (xop0, strict_p)
7756 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
7758 #endif
7760 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7761 && code == SYMBOL_REF
7762 && CONSTANT_POOL_ADDRESS_P (x)
7763 && ! (flag_pic
7764 && symbol_mentioned_p (get_pool_constant (x))
7765 && ! pcrel_constant_p (get_pool_constant (x))))
7766 return 1;
7768 return 0;
7771 /* Return true if we can avoid creating a constant pool entry for x. */
7772 static bool
7773 can_avoid_literal_pool_for_label_p (rtx x)
7775 /* Normally we can assign constant values to target registers without
7776 the help of constant pool. But there are cases we have to use constant
7777 pool like:
7778 1) assign a label to register.
7779 2) sign-extend a 8bit value to 32bit and then assign to register.
7781 Constant pool access in format:
7782 (set (reg r0) (mem (symbol_ref (".LC0"))))
7783 will cause the use of literal pool (later in function arm_reorg).
7784 So here we mark such format as an invalid format, then the compiler
7785 will adjust it into:
7786 (set (reg r0) (symbol_ref (".LC0")))
7787 (set (reg r0) (mem (reg r0))).
7788 No extra register is required, and (mem (reg r0)) won't cause the use
7789 of literal pools. */
7790 if (arm_disable_literal_pool && GET_CODE (x) == SYMBOL_REF
7791 && CONSTANT_POOL_ADDRESS_P (x))
7792 return 1;
7793 return 0;
7797 /* Return nonzero if X is a valid Thumb-2 address operand. */
7798 static int
7799 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7801 bool use_ldrd;
7802 enum rtx_code code = GET_CODE (x);
7804 if (arm_address_register_rtx_p (x, strict_p))
7805 return 1;
7807 use_ldrd = (TARGET_LDRD
7808 && (mode == DImode || mode == DFmode));
7810 if (code == POST_INC || code == PRE_DEC
7811 || ((code == PRE_INC || code == POST_DEC)
7812 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7813 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7815 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7816 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7817 && GET_CODE (XEXP (x, 1)) == PLUS
7818 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7820 /* Thumb-2 only has autoincrement by constant. */
7821 rtx addend = XEXP (XEXP (x, 1), 1);
7822 HOST_WIDE_INT offset;
7824 if (!CONST_INT_P (addend))
7825 return 0;
7827 offset = INTVAL(addend);
7828 if (GET_MODE_SIZE (mode) <= 4)
7829 return (offset > -256 && offset < 256);
7831 return (use_ldrd && offset > -1024 && offset < 1024
7832 && (offset & 3) == 0);
7835 /* After reload constants split into minipools will have addresses
7836 from a LABEL_REF. */
7837 else if (reload_completed
7838 && (code == LABEL_REF
7839 || (code == CONST
7840 && GET_CODE (XEXP (x, 0)) == PLUS
7841 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7842 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7843 return 1;
7845 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7846 return 0;
7848 else if (code == PLUS)
7850 rtx xop0 = XEXP (x, 0);
7851 rtx xop1 = XEXP (x, 1);
7853 return ((arm_address_register_rtx_p (xop0, strict_p)
7854 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
7855 || (!strict_p && will_be_in_index_register (xop1))))
7856 || (arm_address_register_rtx_p (xop1, strict_p)
7857 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
7860 else if (can_avoid_literal_pool_for_label_p (x))
7861 return 0;
7863 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7864 && code == SYMBOL_REF
7865 && CONSTANT_POOL_ADDRESS_P (x)
7866 && ! (flag_pic
7867 && symbol_mentioned_p (get_pool_constant (x))
7868 && ! pcrel_constant_p (get_pool_constant (x))))
7869 return 1;
7871 return 0;
7874 /* Return nonzero if INDEX is valid for an address index operand in
7875 ARM state. */
7876 static int
7877 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
7878 int strict_p)
7880 HOST_WIDE_INT range;
7881 enum rtx_code code = GET_CODE (index);
7883 /* Standard coprocessor addressing modes. */
7884 if (TARGET_HARD_FLOAT
7885 && (mode == SFmode || mode == DFmode))
7886 return (code == CONST_INT && INTVAL (index) < 1024
7887 && INTVAL (index) > -1024
7888 && (INTVAL (index) & 3) == 0);
7890 /* For quad modes, we restrict the constant offset to be slightly less
7891 than what the instruction format permits. We do this because for
7892 quad mode moves, we will actually decompose them into two separate
7893 double-mode reads or writes. INDEX must therefore be a valid
7894 (double-mode) offset and so should INDEX+8. */
7895 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7896 return (code == CONST_INT
7897 && INTVAL (index) < 1016
7898 && INTVAL (index) > -1024
7899 && (INTVAL (index) & 3) == 0);
7901 /* We have no such constraint on double mode offsets, so we permit the
7902 full range of the instruction format. */
7903 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7904 return (code == CONST_INT
7905 && INTVAL (index) < 1024
7906 && INTVAL (index) > -1024
7907 && (INTVAL (index) & 3) == 0);
7909 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7910 return (code == CONST_INT
7911 && INTVAL (index) < 1024
7912 && INTVAL (index) > -1024
7913 && (INTVAL (index) & 3) == 0);
7915 if (arm_address_register_rtx_p (index, strict_p)
7916 && (GET_MODE_SIZE (mode) <= 4))
7917 return 1;
7919 if (mode == DImode || mode == DFmode)
7921 if (code == CONST_INT)
7923 HOST_WIDE_INT val = INTVAL (index);
7925 if (TARGET_LDRD)
7926 return val > -256 && val < 256;
7927 else
7928 return val > -4096 && val < 4092;
7931 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
7934 if (GET_MODE_SIZE (mode) <= 4
7935 && ! (arm_arch4
7936 && (mode == HImode
7937 || mode == HFmode
7938 || (mode == QImode && outer == SIGN_EXTEND))))
7940 if (code == MULT)
7942 rtx xiop0 = XEXP (index, 0);
7943 rtx xiop1 = XEXP (index, 1);
7945 return ((arm_address_register_rtx_p (xiop0, strict_p)
7946 && power_of_two_operand (xiop1, SImode))
7947 || (arm_address_register_rtx_p (xiop1, strict_p)
7948 && power_of_two_operand (xiop0, SImode)));
7950 else if (code == LSHIFTRT || code == ASHIFTRT
7951 || code == ASHIFT || code == ROTATERT)
7953 rtx op = XEXP (index, 1);
7955 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7956 && CONST_INT_P (op)
7957 && INTVAL (op) > 0
7958 && INTVAL (op) <= 31);
7962 /* For ARM v4 we may be doing a sign-extend operation during the
7963 load. */
7964 if (arm_arch4)
7966 if (mode == HImode
7967 || mode == HFmode
7968 || (outer == SIGN_EXTEND && mode == QImode))
7969 range = 256;
7970 else
7971 range = 4096;
7973 else
7974 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
7976 return (code == CONST_INT
7977 && INTVAL (index) < range
7978 && INTVAL (index) > -range);
7981 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7982 index operand. i.e. 1, 2, 4 or 8. */
7983 static bool
7984 thumb2_index_mul_operand (rtx op)
7986 HOST_WIDE_INT val;
7988 if (!CONST_INT_P (op))
7989 return false;
7991 val = INTVAL(op);
7992 return (val == 1 || val == 2 || val == 4 || val == 8);
7995 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
7996 static int
7997 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
7999 enum rtx_code code = GET_CODE (index);
8001 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
8002 /* Standard coprocessor addressing modes. */
8003 if (TARGET_HARD_FLOAT
8004 && (mode == SFmode || mode == DFmode))
8005 return (code == CONST_INT && INTVAL (index) < 1024
8006 /* Thumb-2 allows only > -256 index range for it's core register
8007 load/stores. Since we allow SF/DF in core registers, we have
8008 to use the intersection between -256~4096 (core) and -1024~1024
8009 (coprocessor). */
8010 && INTVAL (index) > -256
8011 && (INTVAL (index) & 3) == 0);
8013 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8015 /* For DImode assume values will usually live in core regs
8016 and only allow LDRD addressing modes. */
8017 if (!TARGET_LDRD || mode != DImode)
8018 return (code == CONST_INT
8019 && INTVAL (index) < 1024
8020 && INTVAL (index) > -1024
8021 && (INTVAL (index) & 3) == 0);
8024 /* For quad modes, we restrict the constant offset to be slightly less
8025 than what the instruction format permits. We do this because for
8026 quad mode moves, we will actually decompose them into two separate
8027 double-mode reads or writes. INDEX must therefore be a valid
8028 (double-mode) offset and so should INDEX+8. */
8029 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8030 return (code == CONST_INT
8031 && INTVAL (index) < 1016
8032 && INTVAL (index) > -1024
8033 && (INTVAL (index) & 3) == 0);
8035 /* We have no such constraint on double mode offsets, so we permit the
8036 full range of the instruction format. */
8037 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8038 return (code == CONST_INT
8039 && INTVAL (index) < 1024
8040 && INTVAL (index) > -1024
8041 && (INTVAL (index) & 3) == 0);
8043 if (arm_address_register_rtx_p (index, strict_p)
8044 && (GET_MODE_SIZE (mode) <= 4))
8045 return 1;
8047 if (mode == DImode || mode == DFmode)
8049 if (code == CONST_INT)
8051 HOST_WIDE_INT val = INTVAL (index);
8052 /* ??? Can we assume ldrd for thumb2? */
8053 /* Thumb-2 ldrd only has reg+const addressing modes. */
8054 /* ldrd supports offsets of +-1020.
8055 However the ldr fallback does not. */
8056 return val > -256 && val < 256 && (val & 3) == 0;
8058 else
8059 return 0;
8062 if (code == MULT)
8064 rtx xiop0 = XEXP (index, 0);
8065 rtx xiop1 = XEXP (index, 1);
8067 return ((arm_address_register_rtx_p (xiop0, strict_p)
8068 && thumb2_index_mul_operand (xiop1))
8069 || (arm_address_register_rtx_p (xiop1, strict_p)
8070 && thumb2_index_mul_operand (xiop0)));
8072 else if (code == ASHIFT)
8074 rtx op = XEXP (index, 1);
8076 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8077 && CONST_INT_P (op)
8078 && INTVAL (op) > 0
8079 && INTVAL (op) <= 3);
8082 return (code == CONST_INT
8083 && INTVAL (index) < 4096
8084 && INTVAL (index) > -256);
8087 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
8088 static int
8089 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
8091 int regno;
8093 if (!REG_P (x))
8094 return 0;
8096 regno = REGNO (x);
8098 if (strict_p)
8099 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
8101 return (regno <= LAST_LO_REGNUM
8102 || regno > LAST_VIRTUAL_REGISTER
8103 || regno == FRAME_POINTER_REGNUM
8104 || (GET_MODE_SIZE (mode) >= 4
8105 && (regno == STACK_POINTER_REGNUM
8106 || regno >= FIRST_PSEUDO_REGISTER
8107 || x == hard_frame_pointer_rtx
8108 || x == arg_pointer_rtx)));
8111 /* Return nonzero if x is a legitimate index register. This is the case
8112 for any base register that can access a QImode object. */
8113 inline static int
8114 thumb1_index_register_rtx_p (rtx x, int strict_p)
8116 return thumb1_base_register_rtx_p (x, QImode, strict_p);
8119 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8121 The AP may be eliminated to either the SP or the FP, so we use the
8122 least common denominator, e.g. SImode, and offsets from 0 to 64.
8124 ??? Verify whether the above is the right approach.
8126 ??? Also, the FP may be eliminated to the SP, so perhaps that
8127 needs special handling also.
8129 ??? Look at how the mips16 port solves this problem. It probably uses
8130 better ways to solve some of these problems.
8132 Although it is not incorrect, we don't accept QImode and HImode
8133 addresses based on the frame pointer or arg pointer until the
8134 reload pass starts. This is so that eliminating such addresses
8135 into stack based ones won't produce impossible code. */
8137 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8139 if (TARGET_HAVE_MOVT && can_avoid_literal_pool_for_label_p (x))
8140 return 0;
8142 /* ??? Not clear if this is right. Experiment. */
8143 if (GET_MODE_SIZE (mode) < 4
8144 && !(reload_in_progress || reload_completed)
8145 && (reg_mentioned_p (frame_pointer_rtx, x)
8146 || reg_mentioned_p (arg_pointer_rtx, x)
8147 || reg_mentioned_p (virtual_incoming_args_rtx, x)
8148 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
8149 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
8150 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
8151 return 0;
8153 /* Accept any base register. SP only in SImode or larger. */
8154 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
8155 return 1;
8157 /* This is PC relative data before arm_reorg runs. */
8158 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
8159 && GET_CODE (x) == SYMBOL_REF
8160 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
8161 return 1;
8163 /* This is PC relative data after arm_reorg runs. */
8164 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
8165 && reload_completed
8166 && (GET_CODE (x) == LABEL_REF
8167 || (GET_CODE (x) == CONST
8168 && GET_CODE (XEXP (x, 0)) == PLUS
8169 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8170 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8171 return 1;
8173 /* Post-inc indexing only supported for SImode and larger. */
8174 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
8175 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
8176 return 1;
8178 else if (GET_CODE (x) == PLUS)
8180 /* REG+REG address can be any two index registers. */
8181 /* We disallow FRAME+REG addressing since we know that FRAME
8182 will be replaced with STACK, and SP relative addressing only
8183 permits SP+OFFSET. */
8184 if (GET_MODE_SIZE (mode) <= 4
8185 && XEXP (x, 0) != frame_pointer_rtx
8186 && XEXP (x, 1) != frame_pointer_rtx
8187 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8188 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
8189 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
8190 return 1;
8192 /* REG+const has 5-7 bit offset for non-SP registers. */
8193 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8194 || XEXP (x, 0) == arg_pointer_rtx)
8195 && CONST_INT_P (XEXP (x, 1))
8196 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
8197 return 1;
8199 /* REG+const has 10-bit offset for SP, but only SImode and
8200 larger is supported. */
8201 /* ??? Should probably check for DI/DFmode overflow here
8202 just like GO_IF_LEGITIMATE_OFFSET does. */
8203 else if (REG_P (XEXP (x, 0))
8204 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
8205 && GET_MODE_SIZE (mode) >= 4
8206 && CONST_INT_P (XEXP (x, 1))
8207 && INTVAL (XEXP (x, 1)) >= 0
8208 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
8209 && (INTVAL (XEXP (x, 1)) & 3) == 0)
8210 return 1;
8212 else if (REG_P (XEXP (x, 0))
8213 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
8214 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
8215 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
8216 && REGNO (XEXP (x, 0))
8217 <= LAST_VIRTUAL_POINTER_REGISTER))
8218 && GET_MODE_SIZE (mode) >= 4
8219 && CONST_INT_P (XEXP (x, 1))
8220 && (INTVAL (XEXP (x, 1)) & 3) == 0)
8221 return 1;
8224 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8225 && GET_MODE_SIZE (mode) == 4
8226 && GET_CODE (x) == SYMBOL_REF
8227 && CONSTANT_POOL_ADDRESS_P (x)
8228 && ! (flag_pic
8229 && symbol_mentioned_p (get_pool_constant (x))
8230 && ! pcrel_constant_p (get_pool_constant (x))))
8231 return 1;
8233 return 0;
8236 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
8237 instruction of mode MODE. */
8239 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
8241 switch (GET_MODE_SIZE (mode))
8243 case 1:
8244 return val >= 0 && val < 32;
8246 case 2:
8247 return val >= 0 && val < 64 && (val & 1) == 0;
8249 default:
8250 return (val >= 0
8251 && (val + GET_MODE_SIZE (mode)) <= 128
8252 && (val & 3) == 0);
8256 bool
8257 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
8259 if (TARGET_ARM)
8260 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
8261 else if (TARGET_THUMB2)
8262 return thumb2_legitimate_address_p (mode, x, strict_p);
8263 else /* if (TARGET_THUMB1) */
8264 return thumb1_legitimate_address_p (mode, x, strict_p);
8267 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
8269 Given an rtx X being reloaded into a reg required to be
8270 in class CLASS, return the class of reg to actually use.
8271 In general this is just CLASS, but for the Thumb core registers and
8272 immediate constants we prefer a LO_REGS class or a subset. */
8274 static reg_class_t
8275 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
8277 if (TARGET_32BIT)
8278 return rclass;
8279 else
8281 if (rclass == GENERAL_REGS)
8282 return LO_REGS;
8283 else
8284 return rclass;
8288 /* Build the SYMBOL_REF for __tls_get_addr. */
8290 static GTY(()) rtx tls_get_addr_libfunc;
8292 static rtx
8293 get_tls_get_addr (void)
8295 if (!tls_get_addr_libfunc)
8296 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
8297 return tls_get_addr_libfunc;
8301 arm_load_tp (rtx target)
8303 if (!target)
8304 target = gen_reg_rtx (SImode);
8306 if (TARGET_HARD_TP)
8308 /* Can return in any reg. */
8309 emit_insn (gen_load_tp_hard (target));
8311 else
8313 /* Always returned in r0. Immediately copy the result into a pseudo,
8314 otherwise other uses of r0 (e.g. setting up function arguments) may
8315 clobber the value. */
8317 rtx tmp;
8319 emit_insn (gen_load_tp_soft ());
8321 tmp = gen_rtx_REG (SImode, R0_REGNUM);
8322 emit_move_insn (target, tmp);
8324 return target;
8327 static rtx
8328 load_tls_operand (rtx x, rtx reg)
8330 rtx tmp;
8332 if (reg == NULL_RTX)
8333 reg = gen_reg_rtx (SImode);
8335 tmp = gen_rtx_CONST (SImode, x);
8337 emit_move_insn (reg, tmp);
8339 return reg;
8342 static rtx_insn *
8343 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
8345 rtx label, labelno, sum;
8347 gcc_assert (reloc != TLS_DESCSEQ);
8348 start_sequence ();
8350 labelno = GEN_INT (pic_labelno++);
8351 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8352 label = gen_rtx_CONST (VOIDmode, label);
8354 sum = gen_rtx_UNSPEC (Pmode,
8355 gen_rtvec (4, x, GEN_INT (reloc), label,
8356 GEN_INT (TARGET_ARM ? 8 : 4)),
8357 UNSPEC_TLS);
8358 reg = load_tls_operand (sum, reg);
8360 if (TARGET_ARM)
8361 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
8362 else
8363 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8365 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
8366 LCT_PURE, /* LCT_CONST? */
8367 Pmode, reg, Pmode);
8369 rtx_insn *insns = get_insns ();
8370 end_sequence ();
8372 return insns;
8375 static rtx
8376 arm_tls_descseq_addr (rtx x, rtx reg)
8378 rtx labelno = GEN_INT (pic_labelno++);
8379 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8380 rtx sum = gen_rtx_UNSPEC (Pmode,
8381 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
8382 gen_rtx_CONST (VOIDmode, label),
8383 GEN_INT (!TARGET_ARM)),
8384 UNSPEC_TLS);
8385 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
8387 emit_insn (gen_tlscall (x, labelno));
8388 if (!reg)
8389 reg = gen_reg_rtx (SImode);
8390 else
8391 gcc_assert (REGNO (reg) != R0_REGNUM);
8393 emit_move_insn (reg, reg0);
8395 return reg;
8399 legitimize_tls_address (rtx x, rtx reg)
8401 rtx dest, tp, label, labelno, sum, ret, eqv, addend;
8402 rtx_insn *insns;
8403 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
8405 switch (model)
8407 case TLS_MODEL_GLOBAL_DYNAMIC:
8408 if (TARGET_GNU2_TLS)
8410 reg = arm_tls_descseq_addr (x, reg);
8412 tp = arm_load_tp (NULL_RTX);
8414 dest = gen_rtx_PLUS (Pmode, tp, reg);
8416 else
8418 /* Original scheme */
8419 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
8420 dest = gen_reg_rtx (Pmode);
8421 emit_libcall_block (insns, dest, ret, x);
8423 return dest;
8425 case TLS_MODEL_LOCAL_DYNAMIC:
8426 if (TARGET_GNU2_TLS)
8428 reg = arm_tls_descseq_addr (x, reg);
8430 tp = arm_load_tp (NULL_RTX);
8432 dest = gen_rtx_PLUS (Pmode, tp, reg);
8434 else
8436 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
8438 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
8439 share the LDM result with other LD model accesses. */
8440 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
8441 UNSPEC_TLS);
8442 dest = gen_reg_rtx (Pmode);
8443 emit_libcall_block (insns, dest, ret, eqv);
8445 /* Load the addend. */
8446 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
8447 GEN_INT (TLS_LDO32)),
8448 UNSPEC_TLS);
8449 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
8450 dest = gen_rtx_PLUS (Pmode, dest, addend);
8452 return dest;
8454 case TLS_MODEL_INITIAL_EXEC:
8455 labelno = GEN_INT (pic_labelno++);
8456 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8457 label = gen_rtx_CONST (VOIDmode, label);
8458 sum = gen_rtx_UNSPEC (Pmode,
8459 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
8460 GEN_INT (TARGET_ARM ? 8 : 4)),
8461 UNSPEC_TLS);
8462 reg = load_tls_operand (sum, reg);
8464 if (TARGET_ARM)
8465 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
8466 else if (TARGET_THUMB2)
8467 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
8468 else
8470 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8471 emit_move_insn (reg, gen_const_mem (SImode, reg));
8474 tp = arm_load_tp (NULL_RTX);
8476 return gen_rtx_PLUS (Pmode, tp, reg);
8478 case TLS_MODEL_LOCAL_EXEC:
8479 tp = arm_load_tp (NULL_RTX);
8481 reg = gen_rtx_UNSPEC (Pmode,
8482 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
8483 UNSPEC_TLS);
8484 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
8486 return gen_rtx_PLUS (Pmode, tp, reg);
8488 default:
8489 abort ();
8493 /* Try machine-dependent ways of modifying an illegitimate address
8494 to be legitimate. If we find one, return the new, valid address. */
8496 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8498 if (arm_tls_referenced_p (x))
8500 rtx addend = NULL;
8502 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
8504 addend = XEXP (XEXP (x, 0), 1);
8505 x = XEXP (XEXP (x, 0), 0);
8508 if (GET_CODE (x) != SYMBOL_REF)
8509 return x;
8511 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
8513 x = legitimize_tls_address (x, NULL_RTX);
8515 if (addend)
8517 x = gen_rtx_PLUS (SImode, x, addend);
8518 orig_x = x;
8520 else
8521 return x;
8524 if (!TARGET_ARM)
8526 /* TODO: legitimize_address for Thumb2. */
8527 if (TARGET_THUMB2)
8528 return x;
8529 return thumb_legitimize_address (x, orig_x, mode);
8532 if (GET_CODE (x) == PLUS)
8534 rtx xop0 = XEXP (x, 0);
8535 rtx xop1 = XEXP (x, 1);
8537 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
8538 xop0 = force_reg (SImode, xop0);
8540 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
8541 && !symbol_mentioned_p (xop1))
8542 xop1 = force_reg (SImode, xop1);
8544 if (ARM_BASE_REGISTER_RTX_P (xop0)
8545 && CONST_INT_P (xop1))
8547 HOST_WIDE_INT n, low_n;
8548 rtx base_reg, val;
8549 n = INTVAL (xop1);
8551 /* VFP addressing modes actually allow greater offsets, but for
8552 now we just stick with the lowest common denominator. */
8553 if (mode == DImode || mode == DFmode)
8555 low_n = n & 0x0f;
8556 n &= ~0x0f;
8557 if (low_n > 4)
8559 n += 16;
8560 low_n -= 16;
8563 else
8565 low_n = ((mode) == TImode ? 0
8566 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
8567 n -= low_n;
8570 base_reg = gen_reg_rtx (SImode);
8571 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
8572 emit_move_insn (base_reg, val);
8573 x = plus_constant (Pmode, base_reg, low_n);
8575 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8576 x = gen_rtx_PLUS (SImode, xop0, xop1);
8579 /* XXX We don't allow MINUS any more -- see comment in
8580 arm_legitimate_address_outer_p (). */
8581 else if (GET_CODE (x) == MINUS)
8583 rtx xop0 = XEXP (x, 0);
8584 rtx xop1 = XEXP (x, 1);
8586 if (CONSTANT_P (xop0))
8587 xop0 = force_reg (SImode, xop0);
8589 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
8590 xop1 = force_reg (SImode, xop1);
8592 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8593 x = gen_rtx_MINUS (SImode, xop0, xop1);
8596 /* Make sure to take full advantage of the pre-indexed addressing mode
8597 with absolute addresses which often allows for the base register to
8598 be factorized for multiple adjacent memory references, and it might
8599 even allows for the mini pool to be avoided entirely. */
8600 else if (CONST_INT_P (x) && optimize > 0)
8602 unsigned int bits;
8603 HOST_WIDE_INT mask, base, index;
8604 rtx base_reg;
8606 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
8607 use a 8-bit index. So let's use a 12-bit index for SImode only and
8608 hope that arm_gen_constant will enable ldrb to use more bits. */
8609 bits = (mode == SImode) ? 12 : 8;
8610 mask = (1 << bits) - 1;
8611 base = INTVAL (x) & ~mask;
8612 index = INTVAL (x) & mask;
8613 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
8615 /* It'll most probably be more efficient to generate the base
8616 with more bits set and use a negative index instead. */
8617 base |= mask;
8618 index -= mask;
8620 base_reg = force_reg (SImode, GEN_INT (base));
8621 x = plus_constant (Pmode, base_reg, index);
8624 if (flag_pic)
8626 /* We need to find and carefully transform any SYMBOL and LABEL
8627 references; so go back to the original address expression. */
8628 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8630 if (new_x != orig_x)
8631 x = new_x;
8634 return x;
8638 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8639 to be legitimate. If we find one, return the new, valid address. */
8641 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8643 if (GET_CODE (x) == PLUS
8644 && CONST_INT_P (XEXP (x, 1))
8645 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
8646 || INTVAL (XEXP (x, 1)) < 0))
8648 rtx xop0 = XEXP (x, 0);
8649 rtx xop1 = XEXP (x, 1);
8650 HOST_WIDE_INT offset = INTVAL (xop1);
8652 /* Try and fold the offset into a biasing of the base register and
8653 then offsetting that. Don't do this when optimizing for space
8654 since it can cause too many CSEs. */
8655 if (optimize_size && offset >= 0
8656 && offset < 256 + 31 * GET_MODE_SIZE (mode))
8658 HOST_WIDE_INT delta;
8660 if (offset >= 256)
8661 delta = offset - (256 - GET_MODE_SIZE (mode));
8662 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
8663 delta = 31 * GET_MODE_SIZE (mode);
8664 else
8665 delta = offset & (~31 * GET_MODE_SIZE (mode));
8667 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
8668 NULL_RTX);
8669 x = plus_constant (Pmode, xop0, delta);
8671 else if (offset < 0 && offset > -256)
8672 /* Small negative offsets are best done with a subtract before the
8673 dereference, forcing these into a register normally takes two
8674 instructions. */
8675 x = force_operand (x, NULL_RTX);
8676 else
8678 /* For the remaining cases, force the constant into a register. */
8679 xop1 = force_reg (SImode, xop1);
8680 x = gen_rtx_PLUS (SImode, xop0, xop1);
8683 else if (GET_CODE (x) == PLUS
8684 && s_register_operand (XEXP (x, 1), SImode)
8685 && !s_register_operand (XEXP (x, 0), SImode))
8687 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
8689 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
8692 if (flag_pic)
8694 /* We need to find and carefully transform any SYMBOL and LABEL
8695 references; so go back to the original address expression. */
8696 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8698 if (new_x != orig_x)
8699 x = new_x;
8702 return x;
8705 /* Return TRUE if X contains any TLS symbol references. */
8707 bool
8708 arm_tls_referenced_p (rtx x)
8710 if (! TARGET_HAVE_TLS)
8711 return false;
8713 subrtx_iterator::array_type array;
8714 FOR_EACH_SUBRTX (iter, array, x, ALL)
8716 const_rtx x = *iter;
8717 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
8719 /* ARM currently does not provide relocations to encode TLS variables
8720 into AArch32 instructions, only data, so there is no way to
8721 currently implement these if a literal pool is disabled. */
8722 if (arm_disable_literal_pool)
8723 sorry ("accessing thread-local storage is not currently supported "
8724 "with -mpure-code or -mslow-flash-data");
8726 return true;
8729 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8730 TLS offsets, not real symbol references. */
8731 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8732 iter.skip_subrtxes ();
8734 return false;
8737 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8739 On the ARM, allow any integer (invalid ones are removed later by insn
8740 patterns), nice doubles and symbol_refs which refer to the function's
8741 constant pool XXX.
8743 When generating pic allow anything. */
8745 static bool
8746 arm_legitimate_constant_p_1 (machine_mode, rtx x)
8748 return flag_pic || !label_mentioned_p (x);
8751 static bool
8752 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8754 /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
8755 RTX. These RTX must therefore be allowed for Thumb-1 so that when run
8756 for ARMv8-M Baseline or later the result is valid. */
8757 if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
8758 x = XEXP (x, 0);
8760 return (CONST_INT_P (x)
8761 || CONST_DOUBLE_P (x)
8762 || CONSTANT_ADDRESS_P (x)
8763 || (TARGET_HAVE_MOVT && GET_CODE (x) == SYMBOL_REF)
8764 || flag_pic);
8767 static bool
8768 arm_legitimate_constant_p (machine_mode mode, rtx x)
8770 return (!arm_cannot_force_const_mem (mode, x)
8771 && (TARGET_32BIT
8772 ? arm_legitimate_constant_p_1 (mode, x)
8773 : thumb_legitimate_constant_p (mode, x)));
8776 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8778 static bool
8779 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8781 rtx base, offset;
8783 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8785 split_const (x, &base, &offset);
8786 if (GET_CODE (base) == SYMBOL_REF
8787 && !offset_within_block_p (base, INTVAL (offset)))
8788 return true;
8790 return arm_tls_referenced_p (x);
8793 #define REG_OR_SUBREG_REG(X) \
8794 (REG_P (X) \
8795 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8797 #define REG_OR_SUBREG_RTX(X) \
8798 (REG_P (X) ? (X) : SUBREG_REG (X))
8800 static inline int
8801 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8803 machine_mode mode = GET_MODE (x);
8804 int total, words;
8806 switch (code)
8808 case ASHIFT:
8809 case ASHIFTRT:
8810 case LSHIFTRT:
8811 case ROTATERT:
8812 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8814 case PLUS:
8815 case MINUS:
8816 case COMPARE:
8817 case NEG:
8818 case NOT:
8819 return COSTS_N_INSNS (1);
8821 case MULT:
8822 if (arm_arch6m && arm_m_profile_small_mul)
8823 return COSTS_N_INSNS (32);
8825 if (CONST_INT_P (XEXP (x, 1)))
8827 int cycles = 0;
8828 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8830 while (i)
8832 i >>= 2;
8833 cycles++;
8835 return COSTS_N_INSNS (2) + cycles;
8837 return COSTS_N_INSNS (1) + 16;
8839 case SET:
8840 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8841 the mode. */
8842 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8843 return (COSTS_N_INSNS (words)
8844 + 4 * ((MEM_P (SET_SRC (x)))
8845 + MEM_P (SET_DEST (x))));
8847 case CONST_INT:
8848 if (outer == SET)
8850 if (UINTVAL (x) < 256
8851 /* 16-bit constant. */
8852 || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
8853 return 0;
8854 if (thumb_shiftable_const (INTVAL (x)))
8855 return COSTS_N_INSNS (2);
8856 return COSTS_N_INSNS (3);
8858 else if ((outer == PLUS || outer == COMPARE)
8859 && INTVAL (x) < 256 && INTVAL (x) > -256)
8860 return 0;
8861 else if ((outer == IOR || outer == XOR || outer == AND)
8862 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8863 return COSTS_N_INSNS (1);
8864 else if (outer == AND)
8866 int i;
8867 /* This duplicates the tests in the andsi3 expander. */
8868 for (i = 9; i <= 31; i++)
8869 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
8870 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
8871 return COSTS_N_INSNS (2);
8873 else if (outer == ASHIFT || outer == ASHIFTRT
8874 || outer == LSHIFTRT)
8875 return 0;
8876 return COSTS_N_INSNS (2);
8878 case CONST:
8879 case CONST_DOUBLE:
8880 case LABEL_REF:
8881 case SYMBOL_REF:
8882 return COSTS_N_INSNS (3);
8884 case UDIV:
8885 case UMOD:
8886 case DIV:
8887 case MOD:
8888 return 100;
8890 case TRUNCATE:
8891 return 99;
8893 case AND:
8894 case XOR:
8895 case IOR:
8896 /* XXX guess. */
8897 return 8;
8899 case MEM:
8900 /* XXX another guess. */
8901 /* Memory costs quite a lot for the first word, but subsequent words
8902 load at the equivalent of a single insn each. */
8903 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8904 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8905 ? 4 : 0));
8907 case IF_THEN_ELSE:
8908 /* XXX a guess. */
8909 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8910 return 14;
8911 return 2;
8913 case SIGN_EXTEND:
8914 case ZERO_EXTEND:
8915 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
8916 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
8918 if (mode == SImode)
8919 return total;
8921 if (arm_arch6)
8922 return total + COSTS_N_INSNS (1);
8924 /* Assume a two-shift sequence. Increase the cost slightly so
8925 we prefer actual shifts over an extend operation. */
8926 return total + 1 + COSTS_N_INSNS (2);
8928 default:
8929 return 99;
8933 /* Estimates the size cost of thumb1 instructions.
8934 For now most of the code is copied from thumb1_rtx_costs. We need more
8935 fine grain tuning when we have more related test cases. */
8936 static inline int
8937 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8939 machine_mode mode = GET_MODE (x);
8940 int words, cost;
8942 switch (code)
8944 case ASHIFT:
8945 case ASHIFTRT:
8946 case LSHIFTRT:
8947 case ROTATERT:
8948 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8950 case PLUS:
8951 case MINUS:
8952 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8953 defined by RTL expansion, especially for the expansion of
8954 multiplication. */
8955 if ((GET_CODE (XEXP (x, 0)) == MULT
8956 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
8957 || (GET_CODE (XEXP (x, 1)) == MULT
8958 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
8959 return COSTS_N_INSNS (2);
8960 /* Fall through. */
8961 case COMPARE:
8962 case NEG:
8963 case NOT:
8964 return COSTS_N_INSNS (1);
8966 case MULT:
8967 if (CONST_INT_P (XEXP (x, 1)))
8969 /* Thumb1 mul instruction can't operate on const. We must Load it
8970 into a register first. */
8971 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
8972 /* For the targets which have a very small and high-latency multiply
8973 unit, we prefer to synthesize the mult with up to 5 instructions,
8974 giving a good balance between size and performance. */
8975 if (arm_arch6m && arm_m_profile_small_mul)
8976 return COSTS_N_INSNS (5);
8977 else
8978 return COSTS_N_INSNS (1) + const_size;
8980 return COSTS_N_INSNS (1);
8982 case SET:
8983 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8984 the mode. */
8985 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8986 cost = COSTS_N_INSNS (words);
8987 if (satisfies_constraint_J (SET_SRC (x))
8988 || satisfies_constraint_K (SET_SRC (x))
8989 /* Too big an immediate for a 2-byte mov, using MOVT. */
8990 || (CONST_INT_P (SET_SRC (x))
8991 && UINTVAL (SET_SRC (x)) >= 256
8992 && TARGET_HAVE_MOVT
8993 && satisfies_constraint_j (SET_SRC (x)))
8994 /* thumb1_movdi_insn. */
8995 || ((words > 1) && MEM_P (SET_SRC (x))))
8996 cost += COSTS_N_INSNS (1);
8997 return cost;
8999 case CONST_INT:
9000 if (outer == SET)
9002 if (UINTVAL (x) < 256)
9003 return COSTS_N_INSNS (1);
9004 /* movw is 4byte long. */
9005 if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
9006 return COSTS_N_INSNS (2);
9007 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
9008 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
9009 return COSTS_N_INSNS (2);
9010 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
9011 if (thumb_shiftable_const (INTVAL (x)))
9012 return COSTS_N_INSNS (2);
9013 return COSTS_N_INSNS (3);
9015 else if ((outer == PLUS || outer == COMPARE)
9016 && INTVAL (x) < 256 && INTVAL (x) > -256)
9017 return 0;
9018 else if ((outer == IOR || outer == XOR || outer == AND)
9019 && INTVAL (x) < 256 && INTVAL (x) >= -256)
9020 return COSTS_N_INSNS (1);
9021 else if (outer == AND)
9023 int i;
9024 /* This duplicates the tests in the andsi3 expander. */
9025 for (i = 9; i <= 31; i++)
9026 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9027 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9028 return COSTS_N_INSNS (2);
9030 else if (outer == ASHIFT || outer == ASHIFTRT
9031 || outer == LSHIFTRT)
9032 return 0;
9033 return COSTS_N_INSNS (2);
9035 case CONST:
9036 case CONST_DOUBLE:
9037 case LABEL_REF:
9038 case SYMBOL_REF:
9039 return COSTS_N_INSNS (3);
9041 case UDIV:
9042 case UMOD:
9043 case DIV:
9044 case MOD:
9045 return 100;
9047 case TRUNCATE:
9048 return 99;
9050 case AND:
9051 case XOR:
9052 case IOR:
9053 return COSTS_N_INSNS (1);
9055 case MEM:
9056 return (COSTS_N_INSNS (1)
9057 + COSTS_N_INSNS (1)
9058 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9059 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9060 ? COSTS_N_INSNS (1) : 0));
9062 case IF_THEN_ELSE:
9063 /* XXX a guess. */
9064 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9065 return 14;
9066 return 2;
9068 case ZERO_EXTEND:
9069 /* XXX still guessing. */
9070 switch (GET_MODE (XEXP (x, 0)))
9072 case E_QImode:
9073 return (1 + (mode == DImode ? 4 : 0)
9074 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9076 case E_HImode:
9077 return (4 + (mode == DImode ? 4 : 0)
9078 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9080 case E_SImode:
9081 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9083 default:
9084 return 99;
9087 default:
9088 return 99;
9092 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9093 operand, then return the operand that is being shifted. If the shift
9094 is not by a constant, then set SHIFT_REG to point to the operand.
9095 Return NULL if OP is not a shifter operand. */
9096 static rtx
9097 shifter_op_p (rtx op, rtx *shift_reg)
9099 enum rtx_code code = GET_CODE (op);
9101 if (code == MULT && CONST_INT_P (XEXP (op, 1))
9102 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9103 return XEXP (op, 0);
9104 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9105 return XEXP (op, 0);
9106 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9107 || code == ASHIFTRT)
9109 if (!CONST_INT_P (XEXP (op, 1)))
9110 *shift_reg = XEXP (op, 1);
9111 return XEXP (op, 0);
9114 return NULL;
9117 static bool
9118 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9120 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9121 rtx_code code = GET_CODE (x);
9122 gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
9124 switch (XINT (x, 1))
9126 case UNSPEC_UNALIGNED_LOAD:
9127 /* We can only do unaligned loads into the integer unit, and we can't
9128 use LDM or LDRD. */
9129 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9130 if (speed_p)
9131 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9132 + extra_cost->ldst.load_unaligned);
9134 #ifdef NOT_YET
9135 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9136 ADDR_SPACE_GENERIC, speed_p);
9137 #endif
9138 return true;
9140 case UNSPEC_UNALIGNED_STORE:
9141 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9142 if (speed_p)
9143 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9144 + extra_cost->ldst.store_unaligned);
9146 *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
9147 #ifdef NOT_YET
9148 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9149 ADDR_SPACE_GENERIC, speed_p);
9150 #endif
9151 return true;
9153 case UNSPEC_VRINTZ:
9154 case UNSPEC_VRINTP:
9155 case UNSPEC_VRINTM:
9156 case UNSPEC_VRINTR:
9157 case UNSPEC_VRINTX:
9158 case UNSPEC_VRINTA:
9159 if (speed_p)
9160 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9162 return true;
9163 default:
9164 *cost = COSTS_N_INSNS (2);
9165 break;
9167 return true;
9170 /* Cost of a libcall. We assume one insn per argument, an amount for the
9171 call (one insn for -Os) and then one for processing the result. */
9172 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9174 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9175 do \
9177 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9178 if (shift_op != NULL \
9179 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9181 if (shift_reg) \
9183 if (speed_p) \
9184 *cost += extra_cost->alu.arith_shift_reg; \
9185 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9186 ASHIFT, 1, speed_p); \
9188 else if (speed_p) \
9189 *cost += extra_cost->alu.arith_shift; \
9191 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
9192 ASHIFT, 0, speed_p) \
9193 + rtx_cost (XEXP (x, 1 - IDX), \
9194 GET_MODE (shift_op), \
9195 OP, 1, speed_p)); \
9196 return true; \
9199 while (0);
9201 /* RTX costs. Make an estimate of the cost of executing the operation
9202 X, which is contained with an operation with code OUTER_CODE.
9203 SPEED_P indicates whether the cost desired is the performance cost,
9204 or the size cost. The estimate is stored in COST and the return
9205 value is TRUE if the cost calculation is final, or FALSE if the
9206 caller should recurse through the operands of X to add additional
9207 costs.
9209 We currently make no attempt to model the size savings of Thumb-2
9210 16-bit instructions. At the normal points in compilation where
9211 this code is called we have no measure of whether the condition
9212 flags are live or not, and thus no realistic way to determine what
9213 the size will eventually be. */
9214 static bool
9215 arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
9216 const struct cpu_cost_table *extra_cost,
9217 int *cost, bool speed_p)
9219 machine_mode mode = GET_MODE (x);
9221 *cost = COSTS_N_INSNS (1);
9223 if (TARGET_THUMB1)
9225 if (speed_p)
9226 *cost = thumb1_rtx_costs (x, code, outer_code);
9227 else
9228 *cost = thumb1_size_rtx_costs (x, code, outer_code);
9229 return true;
9232 switch (code)
9234 case SET:
9235 *cost = 0;
9236 /* SET RTXs don't have a mode so we get it from the destination. */
9237 mode = GET_MODE (SET_DEST (x));
9239 if (REG_P (SET_SRC (x))
9240 && REG_P (SET_DEST (x)))
9242 /* Assume that most copies can be done with a single insn,
9243 unless we don't have HW FP, in which case everything
9244 larger than word mode will require two insns. */
9245 *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9246 && GET_MODE_SIZE (mode) > 4)
9247 || mode == DImode)
9248 ? 2 : 1);
9249 /* Conditional register moves can be encoded
9250 in 16 bits in Thumb mode. */
9251 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9252 *cost >>= 1;
9254 return true;
9257 if (CONST_INT_P (SET_SRC (x)))
9259 /* Handle CONST_INT here, since the value doesn't have a mode
9260 and we would otherwise be unable to work out the true cost. */
9261 *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
9262 0, speed_p);
9263 outer_code = SET;
9264 /* Slightly lower the cost of setting a core reg to a constant.
9265 This helps break up chains and allows for better scheduling. */
9266 if (REG_P (SET_DEST (x))
9267 && REGNO (SET_DEST (x)) <= LR_REGNUM)
9268 *cost -= 1;
9269 x = SET_SRC (x);
9270 /* Immediate moves with an immediate in the range [0, 255] can be
9271 encoded in 16 bits in Thumb mode. */
9272 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9273 && INTVAL (x) >= 0 && INTVAL (x) <=255)
9274 *cost >>= 1;
9275 goto const_int_cost;
9278 return false;
9280 case MEM:
9281 /* A memory access costs 1 insn if the mode is small, or the address is
9282 a single register, otherwise it costs one insn per word. */
9283 if (REG_P (XEXP (x, 0)))
9284 *cost = COSTS_N_INSNS (1);
9285 else if (flag_pic
9286 && GET_CODE (XEXP (x, 0)) == PLUS
9287 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9288 /* This will be split into two instructions.
9289 See arm.md:calculate_pic_address. */
9290 *cost = COSTS_N_INSNS (2);
9291 else
9292 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9294 /* For speed optimizations, add the costs of the address and
9295 accessing memory. */
9296 if (speed_p)
9297 #ifdef NOT_YET
9298 *cost += (extra_cost->ldst.load
9299 + arm_address_cost (XEXP (x, 0), mode,
9300 ADDR_SPACE_GENERIC, speed_p));
9301 #else
9302 *cost += extra_cost->ldst.load;
9303 #endif
9304 return true;
9306 case PARALLEL:
9308 /* Calculations of LDM costs are complex. We assume an initial cost
9309 (ldm_1st) which will load the number of registers mentioned in
9310 ldm_regs_per_insn_1st registers; then each additional
9311 ldm_regs_per_insn_subsequent registers cost one more insn. The
9312 formula for N regs is thus:
9314 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9315 + ldm_regs_per_insn_subsequent - 1)
9316 / ldm_regs_per_insn_subsequent).
9318 Additional costs may also be added for addressing. A similar
9319 formula is used for STM. */
9321 bool is_ldm = load_multiple_operation (x, SImode);
9322 bool is_stm = store_multiple_operation (x, SImode);
9324 if (is_ldm || is_stm)
9326 if (speed_p)
9328 HOST_WIDE_INT nregs = XVECLEN (x, 0);
9329 HOST_WIDE_INT regs_per_insn_1st = is_ldm
9330 ? extra_cost->ldst.ldm_regs_per_insn_1st
9331 : extra_cost->ldst.stm_regs_per_insn_1st;
9332 HOST_WIDE_INT regs_per_insn_sub = is_ldm
9333 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9334 : extra_cost->ldst.stm_regs_per_insn_subsequent;
9336 *cost += regs_per_insn_1st
9337 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9338 + regs_per_insn_sub - 1)
9339 / regs_per_insn_sub);
9340 return true;
9344 return false;
9346 case DIV:
9347 case UDIV:
9348 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9349 && (mode == SFmode || !TARGET_VFP_SINGLE))
9350 *cost += COSTS_N_INSNS (speed_p
9351 ? extra_cost->fp[mode != SFmode].div : 0);
9352 else if (mode == SImode && TARGET_IDIV)
9353 *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
9354 else
9355 *cost = LIBCALL_COST (2);
9357 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9358 possible udiv is prefered. */
9359 *cost += (code == DIV ? COSTS_N_INSNS (1) : 0);
9360 return false; /* All arguments must be in registers. */
9362 case MOD:
9363 /* MOD by a power of 2 can be expanded as:
9364 rsbs r1, r0, #0
9365 and r0, r0, #(n - 1)
9366 and r1, r1, #(n - 1)
9367 rsbpl r0, r1, #0. */
9368 if (CONST_INT_P (XEXP (x, 1))
9369 && exact_log2 (INTVAL (XEXP (x, 1))) > 0
9370 && mode == SImode)
9372 *cost += COSTS_N_INSNS (3);
9374 if (speed_p)
9375 *cost += 2 * extra_cost->alu.logical
9376 + extra_cost->alu.arith;
9377 return true;
9380 /* Fall-through. */
9381 case UMOD:
9382 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9383 possible udiv is prefered. */
9384 *cost = LIBCALL_COST (2) + (code == MOD ? COSTS_N_INSNS (1) : 0);
9385 return false; /* All arguments must be in registers. */
9387 case ROTATE:
9388 if (mode == SImode && REG_P (XEXP (x, 1)))
9390 *cost += (COSTS_N_INSNS (1)
9391 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9392 if (speed_p)
9393 *cost += extra_cost->alu.shift_reg;
9394 return true;
9396 /* Fall through */
9397 case ROTATERT:
9398 case ASHIFT:
9399 case LSHIFTRT:
9400 case ASHIFTRT:
9401 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9403 *cost += (COSTS_N_INSNS (2)
9404 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9405 if (speed_p)
9406 *cost += 2 * extra_cost->alu.shift;
9407 return true;
9409 else if (mode == SImode)
9411 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9412 /* Slightly disparage register shifts at -Os, but not by much. */
9413 if (!CONST_INT_P (XEXP (x, 1)))
9414 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9415 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9416 return true;
9418 else if (GET_MODE_CLASS (mode) == MODE_INT
9419 && GET_MODE_SIZE (mode) < 4)
9421 if (code == ASHIFT)
9423 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9424 /* Slightly disparage register shifts at -Os, but not by
9425 much. */
9426 if (!CONST_INT_P (XEXP (x, 1)))
9427 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9428 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9430 else if (code == LSHIFTRT || code == ASHIFTRT)
9432 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9434 /* Can use SBFX/UBFX. */
9435 if (speed_p)
9436 *cost += extra_cost->alu.bfx;
9437 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9439 else
9441 *cost += COSTS_N_INSNS (1);
9442 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9443 if (speed_p)
9445 if (CONST_INT_P (XEXP (x, 1)))
9446 *cost += 2 * extra_cost->alu.shift;
9447 else
9448 *cost += (extra_cost->alu.shift
9449 + extra_cost->alu.shift_reg);
9451 else
9452 /* Slightly disparage register shifts. */
9453 *cost += !CONST_INT_P (XEXP (x, 1));
9456 else /* Rotates. */
9458 *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
9459 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9460 if (speed_p)
9462 if (CONST_INT_P (XEXP (x, 1)))
9463 *cost += (2 * extra_cost->alu.shift
9464 + extra_cost->alu.log_shift);
9465 else
9466 *cost += (extra_cost->alu.shift
9467 + extra_cost->alu.shift_reg
9468 + extra_cost->alu.log_shift_reg);
9471 return true;
9474 *cost = LIBCALL_COST (2);
9475 return false;
9477 case BSWAP:
9478 if (arm_arch6)
9480 if (mode == SImode)
9482 if (speed_p)
9483 *cost += extra_cost->alu.rev;
9485 return false;
9488 else
9490 /* No rev instruction available. Look at arm_legacy_rev
9491 and thumb_legacy_rev for the form of RTL used then. */
9492 if (TARGET_THUMB)
9494 *cost += COSTS_N_INSNS (9);
9496 if (speed_p)
9498 *cost += 6 * extra_cost->alu.shift;
9499 *cost += 3 * extra_cost->alu.logical;
9502 else
9504 *cost += COSTS_N_INSNS (4);
9506 if (speed_p)
9508 *cost += 2 * extra_cost->alu.shift;
9509 *cost += extra_cost->alu.arith_shift;
9510 *cost += 2 * extra_cost->alu.logical;
9513 return true;
9515 return false;
9517 case MINUS:
9518 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9519 && (mode == SFmode || !TARGET_VFP_SINGLE))
9521 if (GET_CODE (XEXP (x, 0)) == MULT
9522 || GET_CODE (XEXP (x, 1)) == MULT)
9524 rtx mul_op0, mul_op1, sub_op;
9526 if (speed_p)
9527 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9529 if (GET_CODE (XEXP (x, 0)) == MULT)
9531 mul_op0 = XEXP (XEXP (x, 0), 0);
9532 mul_op1 = XEXP (XEXP (x, 0), 1);
9533 sub_op = XEXP (x, 1);
9535 else
9537 mul_op0 = XEXP (XEXP (x, 1), 0);
9538 mul_op1 = XEXP (XEXP (x, 1), 1);
9539 sub_op = XEXP (x, 0);
9542 /* The first operand of the multiply may be optionally
9543 negated. */
9544 if (GET_CODE (mul_op0) == NEG)
9545 mul_op0 = XEXP (mul_op0, 0);
9547 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9548 + rtx_cost (mul_op1, mode, code, 0, speed_p)
9549 + rtx_cost (sub_op, mode, code, 0, speed_p));
9551 return true;
9554 if (speed_p)
9555 *cost += extra_cost->fp[mode != SFmode].addsub;
9556 return false;
9559 if (mode == SImode)
9561 rtx shift_by_reg = NULL;
9562 rtx shift_op;
9563 rtx non_shift_op;
9565 shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9566 if (shift_op == NULL)
9568 shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9569 non_shift_op = XEXP (x, 0);
9571 else
9572 non_shift_op = XEXP (x, 1);
9574 if (shift_op != NULL)
9576 if (shift_by_reg != NULL)
9578 if (speed_p)
9579 *cost += extra_cost->alu.arith_shift_reg;
9580 *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
9582 else if (speed_p)
9583 *cost += extra_cost->alu.arith_shift;
9585 *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
9586 *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
9587 return true;
9590 if (arm_arch_thumb2
9591 && GET_CODE (XEXP (x, 1)) == MULT)
9593 /* MLS. */
9594 if (speed_p)
9595 *cost += extra_cost->mult[0].add;
9596 *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
9597 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
9598 *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
9599 return true;
9602 if (CONST_INT_P (XEXP (x, 0)))
9604 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9605 INTVAL (XEXP (x, 0)), NULL_RTX,
9606 NULL_RTX, 1, 0);
9607 *cost = COSTS_N_INSNS (insns);
9608 if (speed_p)
9609 *cost += insns * extra_cost->alu.arith;
9610 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9611 return true;
9613 else if (speed_p)
9614 *cost += extra_cost->alu.arith;
9616 return false;
9619 if (GET_MODE_CLASS (mode) == MODE_INT
9620 && GET_MODE_SIZE (mode) < 4)
9622 rtx shift_op, shift_reg;
9623 shift_reg = NULL;
9625 /* We check both sides of the MINUS for shifter operands since,
9626 unlike PLUS, it's not commutative. */
9628 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
9629 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
9631 /* Slightly disparage, as we might need to widen the result. */
9632 *cost += 1;
9633 if (speed_p)
9634 *cost += extra_cost->alu.arith;
9636 if (CONST_INT_P (XEXP (x, 0)))
9638 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9639 return true;
9642 return false;
9645 if (mode == DImode)
9647 *cost += COSTS_N_INSNS (1);
9649 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9651 rtx op1 = XEXP (x, 1);
9653 if (speed_p)
9654 *cost += 2 * extra_cost->alu.arith;
9656 if (GET_CODE (op1) == ZERO_EXTEND)
9657 *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
9658 0, speed_p);
9659 else
9660 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
9661 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9662 0, speed_p);
9663 return true;
9665 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9667 if (speed_p)
9668 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9669 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
9670 0, speed_p)
9671 + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
9672 return true;
9674 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9675 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9677 if (speed_p)
9678 *cost += (extra_cost->alu.arith
9679 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9680 ? extra_cost->alu.arith
9681 : extra_cost->alu.arith_shift));
9682 *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
9683 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
9684 GET_CODE (XEXP (x, 1)), 0, speed_p));
9685 return true;
9688 if (speed_p)
9689 *cost += 2 * extra_cost->alu.arith;
9690 return false;
9693 /* Vector mode? */
9695 *cost = LIBCALL_COST (2);
9696 return false;
9698 case PLUS:
9699 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9700 && (mode == SFmode || !TARGET_VFP_SINGLE))
9702 if (GET_CODE (XEXP (x, 0)) == MULT)
9704 rtx mul_op0, mul_op1, add_op;
9706 if (speed_p)
9707 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9709 mul_op0 = XEXP (XEXP (x, 0), 0);
9710 mul_op1 = XEXP (XEXP (x, 0), 1);
9711 add_op = XEXP (x, 1);
9713 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9714 + rtx_cost (mul_op1, mode, code, 0, speed_p)
9715 + rtx_cost (add_op, mode, code, 0, speed_p));
9717 return true;
9720 if (speed_p)
9721 *cost += extra_cost->fp[mode != SFmode].addsub;
9722 return false;
9724 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9726 *cost = LIBCALL_COST (2);
9727 return false;
9730 /* Narrow modes can be synthesized in SImode, but the range
9731 of useful sub-operations is limited. Check for shift operations
9732 on one of the operands. Only left shifts can be used in the
9733 narrow modes. */
9734 if (GET_MODE_CLASS (mode) == MODE_INT
9735 && GET_MODE_SIZE (mode) < 4)
9737 rtx shift_op, shift_reg;
9738 shift_reg = NULL;
9740 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
9742 if (CONST_INT_P (XEXP (x, 1)))
9744 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9745 INTVAL (XEXP (x, 1)), NULL_RTX,
9746 NULL_RTX, 1, 0);
9747 *cost = COSTS_N_INSNS (insns);
9748 if (speed_p)
9749 *cost += insns * extra_cost->alu.arith;
9750 /* Slightly penalize a narrow operation as the result may
9751 need widening. */
9752 *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9753 return true;
9756 /* Slightly penalize a narrow operation as the result may
9757 need widening. */
9758 *cost += 1;
9759 if (speed_p)
9760 *cost += extra_cost->alu.arith;
9762 return false;
9765 if (mode == SImode)
9767 rtx shift_op, shift_reg;
9769 if (TARGET_INT_SIMD
9770 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9771 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9773 /* UXTA[BH] or SXTA[BH]. */
9774 if (speed_p)
9775 *cost += extra_cost->alu.extend_arith;
9776 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9777 0, speed_p)
9778 + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
9779 return true;
9782 shift_reg = NULL;
9783 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9784 if (shift_op != NULL)
9786 if (shift_reg)
9788 if (speed_p)
9789 *cost += extra_cost->alu.arith_shift_reg;
9790 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
9792 else if (speed_p)
9793 *cost += extra_cost->alu.arith_shift;
9795 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
9796 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9797 return true;
9799 if (GET_CODE (XEXP (x, 0)) == MULT)
9801 rtx mul_op = XEXP (x, 0);
9803 if (TARGET_DSP_MULTIPLY
9804 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9805 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9806 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9807 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9808 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9809 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9810 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9811 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9812 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9813 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9814 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9815 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9816 == 16))))))
9818 /* SMLA[BT][BT]. */
9819 if (speed_p)
9820 *cost += extra_cost->mult[0].extend_add;
9821 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
9822 SIGN_EXTEND, 0, speed_p)
9823 + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
9824 SIGN_EXTEND, 0, speed_p)
9825 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9826 return true;
9829 if (speed_p)
9830 *cost += extra_cost->mult[0].add;
9831 *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
9832 + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
9833 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9834 return true;
9836 if (CONST_INT_P (XEXP (x, 1)))
9838 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9839 INTVAL (XEXP (x, 1)), NULL_RTX,
9840 NULL_RTX, 1, 0);
9841 *cost = COSTS_N_INSNS (insns);
9842 if (speed_p)
9843 *cost += insns * extra_cost->alu.arith;
9844 *cost += rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9845 return true;
9847 else if (speed_p)
9848 *cost += extra_cost->alu.arith;
9850 return false;
9853 if (mode == DImode)
9855 if (arm_arch3m
9856 && GET_CODE (XEXP (x, 0)) == MULT
9857 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
9858 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
9859 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
9860 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
9862 if (speed_p)
9863 *cost += extra_cost->mult[1].extend_add;
9864 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
9865 ZERO_EXTEND, 0, speed_p)
9866 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
9867 ZERO_EXTEND, 0, speed_p)
9868 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9869 return true;
9872 *cost += COSTS_N_INSNS (1);
9874 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9875 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9877 if (speed_p)
9878 *cost += (extra_cost->alu.arith
9879 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9880 ? extra_cost->alu.arith
9881 : extra_cost->alu.arith_shift));
9883 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9884 0, speed_p)
9885 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9886 return true;
9889 if (speed_p)
9890 *cost += 2 * extra_cost->alu.arith;
9891 return false;
9894 /* Vector mode? */
9895 *cost = LIBCALL_COST (2);
9896 return false;
9897 case IOR:
9898 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
9900 if (speed_p)
9901 *cost += extra_cost->alu.rev;
9903 return true;
9905 /* Fall through. */
9906 case AND: case XOR:
9907 if (mode == SImode)
9909 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9910 rtx op0 = XEXP (x, 0);
9911 rtx shift_op, shift_reg;
9913 if (subcode == NOT
9914 && (code == AND
9915 || (code == IOR && TARGET_THUMB2)))
9916 op0 = XEXP (op0, 0);
9918 shift_reg = NULL;
9919 shift_op = shifter_op_p (op0, &shift_reg);
9920 if (shift_op != NULL)
9922 if (shift_reg)
9924 if (speed_p)
9925 *cost += extra_cost->alu.log_shift_reg;
9926 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
9928 else if (speed_p)
9929 *cost += extra_cost->alu.log_shift;
9931 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
9932 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9933 return true;
9936 if (CONST_INT_P (XEXP (x, 1)))
9938 int insns = arm_gen_constant (code, SImode, NULL_RTX,
9939 INTVAL (XEXP (x, 1)), NULL_RTX,
9940 NULL_RTX, 1, 0);
9942 *cost = COSTS_N_INSNS (insns);
9943 if (speed_p)
9944 *cost += insns * extra_cost->alu.logical;
9945 *cost += rtx_cost (op0, mode, code, 0, speed_p);
9946 return true;
9949 if (speed_p)
9950 *cost += extra_cost->alu.logical;
9951 *cost += (rtx_cost (op0, mode, code, 0, speed_p)
9952 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9953 return true;
9956 if (mode == DImode)
9958 rtx op0 = XEXP (x, 0);
9959 enum rtx_code subcode = GET_CODE (op0);
9961 *cost += COSTS_N_INSNS (1);
9963 if (subcode == NOT
9964 && (code == AND
9965 || (code == IOR && TARGET_THUMB2)))
9966 op0 = XEXP (op0, 0);
9968 if (GET_CODE (op0) == ZERO_EXTEND)
9970 if (speed_p)
9971 *cost += 2 * extra_cost->alu.logical;
9973 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
9974 0, speed_p)
9975 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
9976 return true;
9978 else if (GET_CODE (op0) == SIGN_EXTEND)
9980 if (speed_p)
9981 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
9983 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
9984 0, speed_p)
9985 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
9986 return true;
9989 if (speed_p)
9990 *cost += 2 * extra_cost->alu.logical;
9992 return true;
9994 /* Vector mode? */
9996 *cost = LIBCALL_COST (2);
9997 return false;
9999 case MULT:
10000 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10001 && (mode == SFmode || !TARGET_VFP_SINGLE))
10003 rtx op0 = XEXP (x, 0);
10005 if (GET_CODE (op0) == NEG && !flag_rounding_math)
10006 op0 = XEXP (op0, 0);
10008 if (speed_p)
10009 *cost += extra_cost->fp[mode != SFmode].mult;
10011 *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
10012 + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
10013 return true;
10015 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10017 *cost = LIBCALL_COST (2);
10018 return false;
10021 if (mode == SImode)
10023 if (TARGET_DSP_MULTIPLY
10024 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10025 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10026 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10027 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10028 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10029 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10030 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10031 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10032 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10033 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10034 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10035 && (INTVAL (XEXP (XEXP (x, 1), 1))
10036 == 16))))))
10038 /* SMUL[TB][TB]. */
10039 if (speed_p)
10040 *cost += extra_cost->mult[0].extend;
10041 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
10042 SIGN_EXTEND, 0, speed_p);
10043 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
10044 SIGN_EXTEND, 1, speed_p);
10045 return true;
10047 if (speed_p)
10048 *cost += extra_cost->mult[0].simple;
10049 return false;
10052 if (mode == DImode)
10054 if (arm_arch3m
10055 && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10056 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10057 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10058 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
10060 if (speed_p)
10061 *cost += extra_cost->mult[1].extend;
10062 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
10063 ZERO_EXTEND, 0, speed_p)
10064 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10065 ZERO_EXTEND, 0, speed_p));
10066 return true;
10069 *cost = LIBCALL_COST (2);
10070 return false;
10073 /* Vector mode? */
10074 *cost = LIBCALL_COST (2);
10075 return false;
10077 case NEG:
10078 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10079 && (mode == SFmode || !TARGET_VFP_SINGLE))
10081 if (GET_CODE (XEXP (x, 0)) == MULT)
10083 /* VNMUL. */
10084 *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
10085 return true;
10088 if (speed_p)
10089 *cost += extra_cost->fp[mode != SFmode].neg;
10091 return false;
10093 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10095 *cost = LIBCALL_COST (1);
10096 return false;
10099 if (mode == SImode)
10101 if (GET_CODE (XEXP (x, 0)) == ABS)
10103 *cost += COSTS_N_INSNS (1);
10104 /* Assume the non-flag-changing variant. */
10105 if (speed_p)
10106 *cost += (extra_cost->alu.log_shift
10107 + extra_cost->alu.arith_shift);
10108 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
10109 return true;
10112 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10113 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10115 *cost += COSTS_N_INSNS (1);
10116 /* No extra cost for MOV imm and MVN imm. */
10117 /* If the comparison op is using the flags, there's no further
10118 cost, otherwise we need to add the cost of the comparison. */
10119 if (!(REG_P (XEXP (XEXP (x, 0), 0))
10120 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10121 && XEXP (XEXP (x, 0), 1) == const0_rtx))
10123 mode = GET_MODE (XEXP (XEXP (x, 0), 0));
10124 *cost += (COSTS_N_INSNS (1)
10125 + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
10126 0, speed_p)
10127 + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
10128 1, speed_p));
10129 if (speed_p)
10130 *cost += extra_cost->alu.arith;
10132 return true;
10135 if (speed_p)
10136 *cost += extra_cost->alu.arith;
10137 return false;
10140 if (GET_MODE_CLASS (mode) == MODE_INT
10141 && GET_MODE_SIZE (mode) < 4)
10143 /* Slightly disparage, as we might need an extend operation. */
10144 *cost += 1;
10145 if (speed_p)
10146 *cost += extra_cost->alu.arith;
10147 return false;
10150 if (mode == DImode)
10152 *cost += COSTS_N_INSNS (1);
10153 if (speed_p)
10154 *cost += 2 * extra_cost->alu.arith;
10155 return false;
10158 /* Vector mode? */
10159 *cost = LIBCALL_COST (1);
10160 return false;
10162 case NOT:
10163 if (mode == SImode)
10165 rtx shift_op;
10166 rtx shift_reg = NULL;
10168 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10170 if (shift_op)
10172 if (shift_reg != NULL)
10174 if (speed_p)
10175 *cost += extra_cost->alu.log_shift_reg;
10176 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10178 else if (speed_p)
10179 *cost += extra_cost->alu.log_shift;
10180 *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
10181 return true;
10184 if (speed_p)
10185 *cost += extra_cost->alu.logical;
10186 return false;
10188 if (mode == DImode)
10190 *cost += COSTS_N_INSNS (1);
10191 return false;
10194 /* Vector mode? */
10196 *cost += LIBCALL_COST (1);
10197 return false;
10199 case IF_THEN_ELSE:
10201 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10203 *cost += COSTS_N_INSNS (3);
10204 return true;
10206 int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
10207 int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
10209 *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
10210 /* Assume that if one arm of the if_then_else is a register,
10211 that it will be tied with the result and eliminate the
10212 conditional insn. */
10213 if (REG_P (XEXP (x, 1)))
10214 *cost += op2cost;
10215 else if (REG_P (XEXP (x, 2)))
10216 *cost += op1cost;
10217 else
10219 if (speed_p)
10221 if (extra_cost->alu.non_exec_costs_exec)
10222 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10223 else
10224 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10226 else
10227 *cost += op1cost + op2cost;
10230 return true;
10232 case COMPARE:
10233 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10234 *cost = 0;
10235 else
10237 machine_mode op0mode;
10238 /* We'll mostly assume that the cost of a compare is the cost of the
10239 LHS. However, there are some notable exceptions. */
10241 /* Floating point compares are never done as side-effects. */
10242 op0mode = GET_MODE (XEXP (x, 0));
10243 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10244 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10246 if (speed_p)
10247 *cost += extra_cost->fp[op0mode != SFmode].compare;
10249 if (XEXP (x, 1) == CONST0_RTX (op0mode))
10251 *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
10252 return true;
10255 return false;
10257 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10259 *cost = LIBCALL_COST (2);
10260 return false;
10263 /* DImode compares normally take two insns. */
10264 if (op0mode == DImode)
10266 *cost += COSTS_N_INSNS (1);
10267 if (speed_p)
10268 *cost += 2 * extra_cost->alu.arith;
10269 return false;
10272 if (op0mode == SImode)
10274 rtx shift_op;
10275 rtx shift_reg;
10277 if (XEXP (x, 1) == const0_rtx
10278 && !(REG_P (XEXP (x, 0))
10279 || (GET_CODE (XEXP (x, 0)) == SUBREG
10280 && REG_P (SUBREG_REG (XEXP (x, 0))))))
10282 *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10284 /* Multiply operations that set the flags are often
10285 significantly more expensive. */
10286 if (speed_p
10287 && GET_CODE (XEXP (x, 0)) == MULT
10288 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10289 *cost += extra_cost->mult[0].flag_setting;
10291 if (speed_p
10292 && GET_CODE (XEXP (x, 0)) == PLUS
10293 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10294 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10295 0), 1), mode))
10296 *cost += extra_cost->mult[0].flag_setting;
10297 return true;
10300 shift_reg = NULL;
10301 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10302 if (shift_op != NULL)
10304 if (shift_reg != NULL)
10306 *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
10307 1, speed_p);
10308 if (speed_p)
10309 *cost += extra_cost->alu.arith_shift_reg;
10311 else if (speed_p)
10312 *cost += extra_cost->alu.arith_shift;
10313 *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
10314 *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
10315 return true;
10318 if (speed_p)
10319 *cost += extra_cost->alu.arith;
10320 if (CONST_INT_P (XEXP (x, 1))
10321 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10323 *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10324 return true;
10326 return false;
10329 /* Vector mode? */
10331 *cost = LIBCALL_COST (2);
10332 return false;
10334 return true;
10336 case EQ:
10337 case NE:
10338 case LT:
10339 case LE:
10340 case GT:
10341 case GE:
10342 case LTU:
10343 case LEU:
10344 case GEU:
10345 case GTU:
10346 case ORDERED:
10347 case UNORDERED:
10348 case UNEQ:
10349 case UNLE:
10350 case UNLT:
10351 case UNGE:
10352 case UNGT:
10353 case LTGT:
10354 if (outer_code == SET)
10356 /* Is it a store-flag operation? */
10357 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10358 && XEXP (x, 1) == const0_rtx)
10360 /* Thumb also needs an IT insn. */
10361 *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
10362 return true;
10364 if (XEXP (x, 1) == const0_rtx)
10366 switch (code)
10368 case LT:
10369 /* LSR Rd, Rn, #31. */
10370 if (speed_p)
10371 *cost += extra_cost->alu.shift;
10372 break;
10374 case EQ:
10375 /* RSBS T1, Rn, #0
10376 ADC Rd, Rn, T1. */
10378 case NE:
10379 /* SUBS T1, Rn, #1
10380 SBC Rd, Rn, T1. */
10381 *cost += COSTS_N_INSNS (1);
10382 break;
10384 case LE:
10385 /* RSBS T1, Rn, Rn, LSR #31
10386 ADC Rd, Rn, T1. */
10387 *cost += COSTS_N_INSNS (1);
10388 if (speed_p)
10389 *cost += extra_cost->alu.arith_shift;
10390 break;
10392 case GT:
10393 /* RSB Rd, Rn, Rn, ASR #1
10394 LSR Rd, Rd, #31. */
10395 *cost += COSTS_N_INSNS (1);
10396 if (speed_p)
10397 *cost += (extra_cost->alu.arith_shift
10398 + extra_cost->alu.shift);
10399 break;
10401 case GE:
10402 /* ASR Rd, Rn, #31
10403 ADD Rd, Rn, #1. */
10404 *cost += COSTS_N_INSNS (1);
10405 if (speed_p)
10406 *cost += extra_cost->alu.shift;
10407 break;
10409 default:
10410 /* Remaining cases are either meaningless or would take
10411 three insns anyway. */
10412 *cost = COSTS_N_INSNS (3);
10413 break;
10415 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10416 return true;
10418 else
10420 *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10421 if (CONST_INT_P (XEXP (x, 1))
10422 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10424 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10425 return true;
10428 return false;
10431 /* Not directly inside a set. If it involves the condition code
10432 register it must be the condition for a branch, cond_exec or
10433 I_T_E operation. Since the comparison is performed elsewhere
10434 this is just the control part which has no additional
10435 cost. */
10436 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10437 && XEXP (x, 1) == const0_rtx)
10439 *cost = 0;
10440 return true;
10442 return false;
10444 case ABS:
10445 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10446 && (mode == SFmode || !TARGET_VFP_SINGLE))
10448 if (speed_p)
10449 *cost += extra_cost->fp[mode != SFmode].neg;
10451 return false;
10453 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10455 *cost = LIBCALL_COST (1);
10456 return false;
10459 if (mode == SImode)
10461 if (speed_p)
10462 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10463 return false;
10465 /* Vector mode? */
10466 *cost = LIBCALL_COST (1);
10467 return false;
10469 case SIGN_EXTEND:
10470 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10471 && MEM_P (XEXP (x, 0)))
10473 if (mode == DImode)
10474 *cost += COSTS_N_INSNS (1);
10476 if (!speed_p)
10477 return true;
10479 if (GET_MODE (XEXP (x, 0)) == SImode)
10480 *cost += extra_cost->ldst.load;
10481 else
10482 *cost += extra_cost->ldst.load_sign_extend;
10484 if (mode == DImode)
10485 *cost += extra_cost->alu.shift;
10487 return true;
10490 /* Widening from less than 32-bits requires an extend operation. */
10491 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10493 /* We have SXTB/SXTH. */
10494 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10495 if (speed_p)
10496 *cost += extra_cost->alu.extend;
10498 else if (GET_MODE (XEXP (x, 0)) != SImode)
10500 /* Needs two shifts. */
10501 *cost += COSTS_N_INSNS (1);
10502 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10503 if (speed_p)
10504 *cost += 2 * extra_cost->alu.shift;
10507 /* Widening beyond 32-bits requires one more insn. */
10508 if (mode == DImode)
10510 *cost += COSTS_N_INSNS (1);
10511 if (speed_p)
10512 *cost += extra_cost->alu.shift;
10515 return true;
10517 case ZERO_EXTEND:
10518 if ((arm_arch4
10519 || GET_MODE (XEXP (x, 0)) == SImode
10520 || GET_MODE (XEXP (x, 0)) == QImode)
10521 && MEM_P (XEXP (x, 0)))
10523 *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10525 if (mode == DImode)
10526 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10528 return true;
10531 /* Widening from less than 32-bits requires an extend operation. */
10532 if (GET_MODE (XEXP (x, 0)) == QImode)
10534 /* UXTB can be a shorter instruction in Thumb2, but it might
10535 be slower than the AND Rd, Rn, #255 alternative. When
10536 optimizing for speed it should never be slower to use
10537 AND, and we don't really model 16-bit vs 32-bit insns
10538 here. */
10539 if (speed_p)
10540 *cost += extra_cost->alu.logical;
10542 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10544 /* We have UXTB/UXTH. */
10545 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10546 if (speed_p)
10547 *cost += extra_cost->alu.extend;
10549 else if (GET_MODE (XEXP (x, 0)) != SImode)
10551 /* Needs two shifts. It's marginally preferable to use
10552 shifts rather than two BIC instructions as the second
10553 shift may merge with a subsequent insn as a shifter
10554 op. */
10555 *cost = COSTS_N_INSNS (2);
10556 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10557 if (speed_p)
10558 *cost += 2 * extra_cost->alu.shift;
10561 /* Widening beyond 32-bits requires one more insn. */
10562 if (mode == DImode)
10564 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10567 return true;
10569 case CONST_INT:
10570 *cost = 0;
10571 /* CONST_INT has no mode, so we cannot tell for sure how many
10572 insns are really going to be needed. The best we can do is
10573 look at the value passed. If it fits in SImode, then assume
10574 that's the mode it will be used for. Otherwise assume it
10575 will be used in DImode. */
10576 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10577 mode = SImode;
10578 else
10579 mode = DImode;
10581 /* Avoid blowing up in arm_gen_constant (). */
10582 if (!(outer_code == PLUS
10583 || outer_code == AND
10584 || outer_code == IOR
10585 || outer_code == XOR
10586 || outer_code == MINUS))
10587 outer_code = SET;
10589 const_int_cost:
10590 if (mode == SImode)
10592 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10593 INTVAL (x), NULL, NULL,
10594 0, 0));
10595 /* Extra costs? */
10597 else
10599 *cost += COSTS_N_INSNS (arm_gen_constant
10600 (outer_code, SImode, NULL,
10601 trunc_int_for_mode (INTVAL (x), SImode),
10602 NULL, NULL, 0, 0)
10603 + arm_gen_constant (outer_code, SImode, NULL,
10604 INTVAL (x) >> 32, NULL,
10605 NULL, 0, 0));
10606 /* Extra costs? */
10609 return true;
10611 case CONST:
10612 case LABEL_REF:
10613 case SYMBOL_REF:
10614 if (speed_p)
10616 if (arm_arch_thumb2 && !flag_pic)
10617 *cost += COSTS_N_INSNS (1);
10618 else
10619 *cost += extra_cost->ldst.load;
10621 else
10622 *cost += COSTS_N_INSNS (1);
10624 if (flag_pic)
10626 *cost += COSTS_N_INSNS (1);
10627 if (speed_p)
10628 *cost += extra_cost->alu.arith;
10631 return true;
10633 case CONST_FIXED:
10634 *cost = COSTS_N_INSNS (4);
10635 /* Fixme. */
10636 return true;
10638 case CONST_DOUBLE:
10639 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10640 && (mode == SFmode || !TARGET_VFP_SINGLE))
10642 if (vfp3_const_double_rtx (x))
10644 if (speed_p)
10645 *cost += extra_cost->fp[mode == DFmode].fpconst;
10646 return true;
10649 if (speed_p)
10651 if (mode == DFmode)
10652 *cost += extra_cost->ldst.loadd;
10653 else
10654 *cost += extra_cost->ldst.loadf;
10656 else
10657 *cost += COSTS_N_INSNS (1 + (mode == DFmode));
10659 return true;
10661 *cost = COSTS_N_INSNS (4);
10662 return true;
10664 case CONST_VECTOR:
10665 /* Fixme. */
10666 if (TARGET_NEON
10667 && TARGET_HARD_FLOAT
10668 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10669 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10670 *cost = COSTS_N_INSNS (1);
10671 else
10672 *cost = COSTS_N_INSNS (4);
10673 return true;
10675 case HIGH:
10676 case LO_SUM:
10677 /* When optimizing for size, we prefer constant pool entries to
10678 MOVW/MOVT pairs, so bump the cost of these slightly. */
10679 if (!speed_p)
10680 *cost += 1;
10681 return true;
10683 case CLZ:
10684 if (speed_p)
10685 *cost += extra_cost->alu.clz;
10686 return false;
10688 case SMIN:
10689 if (XEXP (x, 1) == const0_rtx)
10691 if (speed_p)
10692 *cost += extra_cost->alu.log_shift;
10693 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10694 return true;
10696 /* Fall through. */
10697 case SMAX:
10698 case UMIN:
10699 case UMAX:
10700 *cost += COSTS_N_INSNS (1);
10701 return false;
10703 case TRUNCATE:
10704 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10705 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10706 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10707 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10708 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10709 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10710 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10711 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10712 == ZERO_EXTEND))))
10714 if (speed_p)
10715 *cost += extra_cost->mult[1].extend;
10716 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
10717 ZERO_EXTEND, 0, speed_p)
10718 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
10719 ZERO_EXTEND, 0, speed_p));
10720 return true;
10722 *cost = LIBCALL_COST (1);
10723 return false;
10725 case UNSPEC_VOLATILE:
10726 case UNSPEC:
10727 return arm_unspec_cost (x, outer_code, speed_p, cost);
10729 case PC:
10730 /* Reading the PC is like reading any other register. Writing it
10731 is more expensive, but we take that into account elsewhere. */
10732 *cost = 0;
10733 return true;
10735 case ZERO_EXTRACT:
10736 /* TODO: Simple zero_extract of bottom bits using AND. */
10737 /* Fall through. */
10738 case SIGN_EXTRACT:
10739 if (arm_arch6
10740 && mode == SImode
10741 && CONST_INT_P (XEXP (x, 1))
10742 && CONST_INT_P (XEXP (x, 2)))
10744 if (speed_p)
10745 *cost += extra_cost->alu.bfx;
10746 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10747 return true;
10749 /* Without UBFX/SBFX, need to resort to shift operations. */
10750 *cost += COSTS_N_INSNS (1);
10751 if (speed_p)
10752 *cost += 2 * extra_cost->alu.shift;
10753 *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
10754 return true;
10756 case FLOAT_EXTEND:
10757 if (TARGET_HARD_FLOAT)
10759 if (speed_p)
10760 *cost += extra_cost->fp[mode == DFmode].widen;
10761 if (!TARGET_VFP5
10762 && GET_MODE (XEXP (x, 0)) == HFmode)
10764 /* Pre v8, widening HF->DF is a two-step process, first
10765 widening to SFmode. */
10766 *cost += COSTS_N_INSNS (1);
10767 if (speed_p)
10768 *cost += extra_cost->fp[0].widen;
10770 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10771 return true;
10774 *cost = LIBCALL_COST (1);
10775 return false;
10777 case FLOAT_TRUNCATE:
10778 if (TARGET_HARD_FLOAT)
10780 if (speed_p)
10781 *cost += extra_cost->fp[mode == DFmode].narrow;
10782 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10783 return true;
10784 /* Vector modes? */
10786 *cost = LIBCALL_COST (1);
10787 return false;
10789 case FMA:
10790 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
10792 rtx op0 = XEXP (x, 0);
10793 rtx op1 = XEXP (x, 1);
10794 rtx op2 = XEXP (x, 2);
10797 /* vfms or vfnma. */
10798 if (GET_CODE (op0) == NEG)
10799 op0 = XEXP (op0, 0);
10801 /* vfnms or vfnma. */
10802 if (GET_CODE (op2) == NEG)
10803 op2 = XEXP (op2, 0);
10805 *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
10806 *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
10807 *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
10809 if (speed_p)
10810 *cost += extra_cost->fp[mode ==DFmode].fma;
10812 return true;
10815 *cost = LIBCALL_COST (3);
10816 return false;
10818 case FIX:
10819 case UNSIGNED_FIX:
10820 if (TARGET_HARD_FLOAT)
10822 /* The *combine_vcvtf2i reduces a vmul+vcvt into
10823 a vcvt fixed-point conversion. */
10824 if (code == FIX && mode == SImode
10825 && GET_CODE (XEXP (x, 0)) == FIX
10826 && GET_MODE (XEXP (x, 0)) == SFmode
10827 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10828 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
10829 > 0)
10831 if (speed_p)
10832 *cost += extra_cost->fp[0].toint;
10834 *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10835 code, 0, speed_p);
10836 return true;
10839 if (GET_MODE_CLASS (mode) == MODE_INT)
10841 mode = GET_MODE (XEXP (x, 0));
10842 if (speed_p)
10843 *cost += extra_cost->fp[mode == DFmode].toint;
10844 /* Strip of the 'cost' of rounding towards zero. */
10845 if (GET_CODE (XEXP (x, 0)) == FIX)
10846 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
10847 0, speed_p);
10848 else
10849 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10850 /* ??? Increase the cost to deal with transferring from
10851 FP -> CORE registers? */
10852 return true;
10854 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
10855 && TARGET_VFP5)
10857 if (speed_p)
10858 *cost += extra_cost->fp[mode == DFmode].roundint;
10859 return false;
10861 /* Vector costs? */
10863 *cost = LIBCALL_COST (1);
10864 return false;
10866 case FLOAT:
10867 case UNSIGNED_FLOAT:
10868 if (TARGET_HARD_FLOAT)
10870 /* ??? Increase the cost to deal with transferring from CORE
10871 -> FP registers? */
10872 if (speed_p)
10873 *cost += extra_cost->fp[mode == DFmode].fromint;
10874 return false;
10876 *cost = LIBCALL_COST (1);
10877 return false;
10879 case CALL:
10880 return true;
10882 case ASM_OPERANDS:
10884 /* Just a guess. Guess number of instructions in the asm
10885 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
10886 though (see PR60663). */
10887 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
10888 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
10890 *cost = COSTS_N_INSNS (asm_length + num_operands);
10891 return true;
10893 default:
10894 if (mode != VOIDmode)
10895 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
10896 else
10897 *cost = COSTS_N_INSNS (4); /* Who knows? */
10898 return false;
10902 #undef HANDLE_NARROW_SHIFT_ARITH
10904 /* RTX costs entry point. */
10906 static bool
10907 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
10908 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
10910 bool result;
10911 int code = GET_CODE (x);
10912 gcc_assert (current_tune->insn_extra_cost);
10914 result = arm_rtx_costs_internal (x, (enum rtx_code) code,
10915 (enum rtx_code) outer_code,
10916 current_tune->insn_extra_cost,
10917 total, speed);
10919 if (dump_file && (dump_flags & TDF_DETAILS))
10921 print_rtl_single (dump_file, x);
10922 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
10923 *total, result ? "final" : "partial");
10925 return result;
10928 /* All address computations that can be done are free, but rtx cost returns
10929 the same for practically all of them. So we weight the different types
10930 of address here in the order (most pref first):
10931 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
10932 static inline int
10933 arm_arm_address_cost (rtx x)
10935 enum rtx_code c = GET_CODE (x);
10937 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
10938 return 0;
10939 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
10940 return 10;
10942 if (c == PLUS)
10944 if (CONST_INT_P (XEXP (x, 1)))
10945 return 2;
10947 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
10948 return 3;
10950 return 4;
10953 return 6;
10956 static inline int
10957 arm_thumb_address_cost (rtx x)
10959 enum rtx_code c = GET_CODE (x);
10961 if (c == REG)
10962 return 1;
10963 if (c == PLUS
10964 && REG_P (XEXP (x, 0))
10965 && CONST_INT_P (XEXP (x, 1)))
10966 return 1;
10968 return 2;
10971 static int
10972 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
10973 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
10975 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
10978 /* Adjust cost hook for XScale. */
10979 static bool
10980 xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
10981 int * cost)
10983 /* Some true dependencies can have a higher cost depending
10984 on precisely how certain input operands are used. */
10985 if (dep_type == 0
10986 && recog_memoized (insn) >= 0
10987 && recog_memoized (dep) >= 0)
10989 int shift_opnum = get_attr_shift (insn);
10990 enum attr_type attr_type = get_attr_type (dep);
10992 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
10993 operand for INSN. If we have a shifted input operand and the
10994 instruction we depend on is another ALU instruction, then we may
10995 have to account for an additional stall. */
10996 if (shift_opnum != 0
10997 && (attr_type == TYPE_ALU_SHIFT_IMM
10998 || attr_type == TYPE_ALUS_SHIFT_IMM
10999 || attr_type == TYPE_LOGIC_SHIFT_IMM
11000 || attr_type == TYPE_LOGICS_SHIFT_IMM
11001 || attr_type == TYPE_ALU_SHIFT_REG
11002 || attr_type == TYPE_ALUS_SHIFT_REG
11003 || attr_type == TYPE_LOGIC_SHIFT_REG
11004 || attr_type == TYPE_LOGICS_SHIFT_REG
11005 || attr_type == TYPE_MOV_SHIFT
11006 || attr_type == TYPE_MVN_SHIFT
11007 || attr_type == TYPE_MOV_SHIFT_REG
11008 || attr_type == TYPE_MVN_SHIFT_REG))
11010 rtx shifted_operand;
11011 int opno;
11013 /* Get the shifted operand. */
11014 extract_insn (insn);
11015 shifted_operand = recog_data.operand[shift_opnum];
11017 /* Iterate over all the operands in DEP. If we write an operand
11018 that overlaps with SHIFTED_OPERAND, then we have increase the
11019 cost of this dependency. */
11020 extract_insn (dep);
11021 preprocess_constraints (dep);
11022 for (opno = 0; opno < recog_data.n_operands; opno++)
11024 /* We can ignore strict inputs. */
11025 if (recog_data.operand_type[opno] == OP_IN)
11026 continue;
11028 if (reg_overlap_mentioned_p (recog_data.operand[opno],
11029 shifted_operand))
11031 *cost = 2;
11032 return false;
11037 return true;
11040 /* Adjust cost hook for Cortex A9. */
11041 static bool
11042 cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11043 int * cost)
11045 switch (dep_type)
11047 case REG_DEP_ANTI:
11048 *cost = 0;
11049 return false;
11051 case REG_DEP_TRUE:
11052 case REG_DEP_OUTPUT:
11053 if (recog_memoized (insn) >= 0
11054 && recog_memoized (dep) >= 0)
11056 if (GET_CODE (PATTERN (insn)) == SET)
11058 if (GET_MODE_CLASS
11059 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11060 || GET_MODE_CLASS
11061 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11063 enum attr_type attr_type_insn = get_attr_type (insn);
11064 enum attr_type attr_type_dep = get_attr_type (dep);
11066 /* By default all dependencies of the form
11067 s0 = s0 <op> s1
11068 s0 = s0 <op> s2
11069 have an extra latency of 1 cycle because
11070 of the input and output dependency in this
11071 case. However this gets modeled as an true
11072 dependency and hence all these checks. */
11073 if (REG_P (SET_DEST (PATTERN (insn)))
11074 && reg_set_p (SET_DEST (PATTERN (insn)), dep))
11076 /* FMACS is a special case where the dependent
11077 instruction can be issued 3 cycles before
11078 the normal latency in case of an output
11079 dependency. */
11080 if ((attr_type_insn == TYPE_FMACS
11081 || attr_type_insn == TYPE_FMACD)
11082 && (attr_type_dep == TYPE_FMACS
11083 || attr_type_dep == TYPE_FMACD))
11085 if (dep_type == REG_DEP_OUTPUT)
11086 *cost = insn_default_latency (dep) - 3;
11087 else
11088 *cost = insn_default_latency (dep);
11089 return false;
11091 else
11093 if (dep_type == REG_DEP_OUTPUT)
11094 *cost = insn_default_latency (dep) + 1;
11095 else
11096 *cost = insn_default_latency (dep);
11098 return false;
11103 break;
11105 default:
11106 gcc_unreachable ();
11109 return true;
11112 /* Adjust cost hook for FA726TE. */
11113 static bool
11114 fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11115 int * cost)
11117 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11118 have penalty of 3. */
11119 if (dep_type == REG_DEP_TRUE
11120 && recog_memoized (insn) >= 0
11121 && recog_memoized (dep) >= 0
11122 && get_attr_conds (dep) == CONDS_SET)
11124 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11125 if (get_attr_conds (insn) == CONDS_USE
11126 && get_attr_type (insn) != TYPE_BRANCH)
11128 *cost = 3;
11129 return false;
11132 if (GET_CODE (PATTERN (insn)) == COND_EXEC
11133 || get_attr_conds (insn) == CONDS_USE)
11135 *cost = 0;
11136 return false;
11140 return true;
11143 /* Implement TARGET_REGISTER_MOVE_COST.
11145 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11146 it is typically more expensive than a single memory access. We set
11147 the cost to less than two memory accesses so that floating
11148 point to integer conversion does not go through memory. */
11151 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11152 reg_class_t from, reg_class_t to)
11154 if (TARGET_32BIT)
11156 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11157 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11158 return 15;
11159 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11160 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11161 return 4;
11162 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11163 return 20;
11164 else
11165 return 2;
11167 else
11169 if (from == HI_REGS || to == HI_REGS)
11170 return 4;
11171 else
11172 return 2;
11176 /* Implement TARGET_MEMORY_MOVE_COST. */
11179 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11180 bool in ATTRIBUTE_UNUSED)
11182 if (TARGET_32BIT)
11183 return 10;
11184 else
11186 if (GET_MODE_SIZE (mode) < 4)
11187 return 8;
11188 else
11189 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11193 /* Vectorizer cost model implementation. */
11195 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11196 static int
11197 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11198 tree vectype,
11199 int misalign ATTRIBUTE_UNUSED)
11201 unsigned elements;
11203 switch (type_of_cost)
11205 case scalar_stmt:
11206 return current_tune->vec_costs->scalar_stmt_cost;
11208 case scalar_load:
11209 return current_tune->vec_costs->scalar_load_cost;
11211 case scalar_store:
11212 return current_tune->vec_costs->scalar_store_cost;
11214 case vector_stmt:
11215 return current_tune->vec_costs->vec_stmt_cost;
11217 case vector_load:
11218 return current_tune->vec_costs->vec_align_load_cost;
11220 case vector_store:
11221 return current_tune->vec_costs->vec_store_cost;
11223 case vec_to_scalar:
11224 return current_tune->vec_costs->vec_to_scalar_cost;
11226 case scalar_to_vec:
11227 return current_tune->vec_costs->scalar_to_vec_cost;
11229 case unaligned_load:
11230 return current_tune->vec_costs->vec_unalign_load_cost;
11232 case unaligned_store:
11233 return current_tune->vec_costs->vec_unalign_store_cost;
11235 case cond_branch_taken:
11236 return current_tune->vec_costs->cond_taken_branch_cost;
11238 case cond_branch_not_taken:
11239 return current_tune->vec_costs->cond_not_taken_branch_cost;
11241 case vec_perm:
11242 case vec_promote_demote:
11243 return current_tune->vec_costs->vec_stmt_cost;
11245 case vec_construct:
11246 elements = TYPE_VECTOR_SUBPARTS (vectype);
11247 return elements / 2 + 1;
11249 default:
11250 gcc_unreachable ();
11254 /* Implement targetm.vectorize.add_stmt_cost. */
11256 static unsigned
11257 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11258 struct _stmt_vec_info *stmt_info, int misalign,
11259 enum vect_cost_model_location where)
11261 unsigned *cost = (unsigned *) data;
11262 unsigned retval = 0;
11264 if (flag_vect_cost_model)
11266 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11267 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11269 /* Statements in an inner loop relative to the loop being
11270 vectorized are weighted more heavily. The value here is
11271 arbitrary and could potentially be improved with analysis. */
11272 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11273 count *= 50; /* FIXME. */
11275 retval = (unsigned) (count * stmt_cost);
11276 cost[where] += retval;
11279 return retval;
11282 /* Return true if and only if this insn can dual-issue only as older. */
11283 static bool
11284 cortexa7_older_only (rtx_insn *insn)
11286 if (recog_memoized (insn) < 0)
11287 return false;
11289 switch (get_attr_type (insn))
11291 case TYPE_ALU_DSP_REG:
11292 case TYPE_ALU_SREG:
11293 case TYPE_ALUS_SREG:
11294 case TYPE_LOGIC_REG:
11295 case TYPE_LOGICS_REG:
11296 case TYPE_ADC_REG:
11297 case TYPE_ADCS_REG:
11298 case TYPE_ADR:
11299 case TYPE_BFM:
11300 case TYPE_REV:
11301 case TYPE_MVN_REG:
11302 case TYPE_SHIFT_IMM:
11303 case TYPE_SHIFT_REG:
11304 case TYPE_LOAD_BYTE:
11305 case TYPE_LOAD1:
11306 case TYPE_STORE1:
11307 case TYPE_FFARITHS:
11308 case TYPE_FADDS:
11309 case TYPE_FFARITHD:
11310 case TYPE_FADDD:
11311 case TYPE_FMOV:
11312 case TYPE_F_CVT:
11313 case TYPE_FCMPS:
11314 case TYPE_FCMPD:
11315 case TYPE_FCONSTS:
11316 case TYPE_FCONSTD:
11317 case TYPE_FMULS:
11318 case TYPE_FMACS:
11319 case TYPE_FMULD:
11320 case TYPE_FMACD:
11321 case TYPE_FDIVS:
11322 case TYPE_FDIVD:
11323 case TYPE_F_MRC:
11324 case TYPE_F_MRRC:
11325 case TYPE_F_FLAG:
11326 case TYPE_F_LOADS:
11327 case TYPE_F_STORES:
11328 return true;
11329 default:
11330 return false;
11334 /* Return true if and only if this insn can dual-issue as younger. */
11335 static bool
11336 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11338 if (recog_memoized (insn) < 0)
11340 if (verbose > 5)
11341 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11342 return false;
11345 switch (get_attr_type (insn))
11347 case TYPE_ALU_IMM:
11348 case TYPE_ALUS_IMM:
11349 case TYPE_LOGIC_IMM:
11350 case TYPE_LOGICS_IMM:
11351 case TYPE_EXTEND:
11352 case TYPE_MVN_IMM:
11353 case TYPE_MOV_IMM:
11354 case TYPE_MOV_REG:
11355 case TYPE_MOV_SHIFT:
11356 case TYPE_MOV_SHIFT_REG:
11357 case TYPE_BRANCH:
11358 case TYPE_CALL:
11359 return true;
11360 default:
11361 return false;
11366 /* Look for an instruction that can dual issue only as an older
11367 instruction, and move it in front of any instructions that can
11368 dual-issue as younger, while preserving the relative order of all
11369 other instructions in the ready list. This is a hueuristic to help
11370 dual-issue in later cycles, by postponing issue of more flexible
11371 instructions. This heuristic may affect dual issue opportunities
11372 in the current cycle. */
11373 static void
11374 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11375 int *n_readyp, int clock)
11377 int i;
11378 int first_older_only = -1, first_younger = -1;
11380 if (verbose > 5)
11381 fprintf (file,
11382 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11383 clock,
11384 *n_readyp);
11386 /* Traverse the ready list from the head (the instruction to issue
11387 first), and looking for the first instruction that can issue as
11388 younger and the first instruction that can dual-issue only as
11389 older. */
11390 for (i = *n_readyp - 1; i >= 0; i--)
11392 rtx_insn *insn = ready[i];
11393 if (cortexa7_older_only (insn))
11395 first_older_only = i;
11396 if (verbose > 5)
11397 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11398 break;
11400 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11401 first_younger = i;
11404 /* Nothing to reorder because either no younger insn found or insn
11405 that can dual-issue only as older appears before any insn that
11406 can dual-issue as younger. */
11407 if (first_younger == -1)
11409 if (verbose > 5)
11410 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11411 return;
11414 /* Nothing to reorder because no older-only insn in the ready list. */
11415 if (first_older_only == -1)
11417 if (verbose > 5)
11418 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11419 return;
11422 /* Move first_older_only insn before first_younger. */
11423 if (verbose > 5)
11424 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11425 INSN_UID(ready [first_older_only]),
11426 INSN_UID(ready [first_younger]));
11427 rtx_insn *first_older_only_insn = ready [first_older_only];
11428 for (i = first_older_only; i < first_younger; i++)
11430 ready[i] = ready[i+1];
11433 ready[i] = first_older_only_insn;
11434 return;
11437 /* Implement TARGET_SCHED_REORDER. */
11438 static int
11439 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
11440 int clock)
11442 switch (arm_tune)
11444 case TARGET_CPU_cortexa7:
11445 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11446 break;
11447 default:
11448 /* Do nothing for other cores. */
11449 break;
11452 return arm_issue_rate ();
11455 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11456 It corrects the value of COST based on the relationship between
11457 INSN and DEP through the dependence LINK. It returns the new
11458 value. There is a per-core adjust_cost hook to adjust scheduler costs
11459 and the per-core hook can choose to completely override the generic
11460 adjust_cost function. Only put bits of code into arm_adjust_cost that
11461 are common across all cores. */
11462 static int
11463 arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
11464 unsigned int)
11466 rtx i_pat, d_pat;
11468 /* When generating Thumb-1 code, we want to place flag-setting operations
11469 close to a conditional branch which depends on them, so that we can
11470 omit the comparison. */
11471 if (TARGET_THUMB1
11472 && dep_type == 0
11473 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11474 && recog_memoized (dep) >= 0
11475 && get_attr_conds (dep) == CONDS_SET)
11476 return 0;
11478 if (current_tune->sched_adjust_cost != NULL)
11480 if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost))
11481 return cost;
11484 /* XXX Is this strictly true? */
11485 if (dep_type == REG_DEP_ANTI
11486 || dep_type == REG_DEP_OUTPUT)
11487 return 0;
11489 /* Call insns don't incur a stall, even if they follow a load. */
11490 if (dep_type == 0
11491 && CALL_P (insn))
11492 return 1;
11494 if ((i_pat = single_set (insn)) != NULL
11495 && MEM_P (SET_SRC (i_pat))
11496 && (d_pat = single_set (dep)) != NULL
11497 && MEM_P (SET_DEST (d_pat)))
11499 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11500 /* This is a load after a store, there is no conflict if the load reads
11501 from a cached area. Assume that loads from the stack, and from the
11502 constant pool are cached, and that others will miss. This is a
11503 hack. */
11505 if ((GET_CODE (src_mem) == SYMBOL_REF
11506 && CONSTANT_POOL_ADDRESS_P (src_mem))
11507 || reg_mentioned_p (stack_pointer_rtx, src_mem)
11508 || reg_mentioned_p (frame_pointer_rtx, src_mem)
11509 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11510 return 1;
11513 return cost;
11517 arm_max_conditional_execute (void)
11519 return max_insns_skipped;
11522 static int
11523 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11525 if (TARGET_32BIT)
11526 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
11527 else
11528 return (optimize > 0) ? 2 : 0;
11531 static int
11532 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
11534 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11537 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11538 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11539 sequences of non-executed instructions in IT blocks probably take the same
11540 amount of time as executed instructions (and the IT instruction itself takes
11541 space in icache). This function was experimentally determined to give good
11542 results on a popular embedded benchmark. */
11544 static int
11545 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
11547 return (TARGET_32BIT && speed_p) ? 1
11548 : arm_default_branch_cost (speed_p, predictable_p);
11551 static int
11552 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
11554 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11557 static bool fp_consts_inited = false;
11559 static REAL_VALUE_TYPE value_fp0;
11561 static void
11562 init_fp_table (void)
11564 REAL_VALUE_TYPE r;
11566 r = REAL_VALUE_ATOF ("0", DFmode);
11567 value_fp0 = r;
11568 fp_consts_inited = true;
11571 /* Return TRUE if rtx X is a valid immediate FP constant. */
11573 arm_const_double_rtx (rtx x)
11575 const REAL_VALUE_TYPE *r;
11577 if (!fp_consts_inited)
11578 init_fp_table ();
11580 r = CONST_DOUBLE_REAL_VALUE (x);
11581 if (REAL_VALUE_MINUS_ZERO (*r))
11582 return 0;
11584 if (real_equal (r, &value_fp0))
11585 return 1;
11587 return 0;
11590 /* VFPv3 has a fairly wide range of representable immediates, formed from
11591 "quarter-precision" floating-point values. These can be evaluated using this
11592 formula (with ^ for exponentiation):
11594 -1^s * n * 2^-r
11596 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11597 16 <= n <= 31 and 0 <= r <= 7.
11599 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11601 - A (most-significant) is the sign bit.
11602 - BCD are the exponent (encoded as r XOR 3).
11603 - EFGH are the mantissa (encoded as n - 16).
11606 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11607 fconst[sd] instruction, or -1 if X isn't suitable. */
11608 static int
11609 vfp3_const_double_index (rtx x)
11611 REAL_VALUE_TYPE r, m;
11612 int sign, exponent;
11613 unsigned HOST_WIDE_INT mantissa, mant_hi;
11614 unsigned HOST_WIDE_INT mask;
11615 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
11616 bool fail;
11618 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
11619 return -1;
11621 r = *CONST_DOUBLE_REAL_VALUE (x);
11623 /* We can't represent these things, so detect them first. */
11624 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
11625 return -1;
11627 /* Extract sign, exponent and mantissa. */
11628 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
11629 r = real_value_abs (&r);
11630 exponent = REAL_EXP (&r);
11631 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11632 highest (sign) bit, with a fixed binary point at bit point_pos.
11633 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11634 bits for the mantissa, this may fail (low bits would be lost). */
11635 real_ldexp (&m, &r, point_pos - exponent);
11636 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
11637 mantissa = w.elt (0);
11638 mant_hi = w.elt (1);
11640 /* If there are bits set in the low part of the mantissa, we can't
11641 represent this value. */
11642 if (mantissa != 0)
11643 return -1;
11645 /* Now make it so that mantissa contains the most-significant bits, and move
11646 the point_pos to indicate that the least-significant bits have been
11647 discarded. */
11648 point_pos -= HOST_BITS_PER_WIDE_INT;
11649 mantissa = mant_hi;
11651 /* We can permit four significant bits of mantissa only, plus a high bit
11652 which is always 1. */
11653 mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
11654 if ((mantissa & mask) != 0)
11655 return -1;
11657 /* Now we know the mantissa is in range, chop off the unneeded bits. */
11658 mantissa >>= point_pos - 5;
11660 /* The mantissa may be zero. Disallow that case. (It's possible to load the
11661 floating-point immediate zero with Neon using an integer-zero load, but
11662 that case is handled elsewhere.) */
11663 if (mantissa == 0)
11664 return -1;
11666 gcc_assert (mantissa >= 16 && mantissa <= 31);
11668 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11669 normalized significands are in the range [1, 2). (Our mantissa is shifted
11670 left 4 places at this point relative to normalized IEEE754 values). GCC
11671 internally uses [0.5, 1) (see real.c), so the exponent returned from
11672 REAL_EXP must be altered. */
11673 exponent = 5 - exponent;
11675 if (exponent < 0 || exponent > 7)
11676 return -1;
11678 /* Sign, mantissa and exponent are now in the correct form to plug into the
11679 formula described in the comment above. */
11680 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
11683 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
11685 vfp3_const_double_rtx (rtx x)
11687 if (!TARGET_VFP3)
11688 return 0;
11690 return vfp3_const_double_index (x) != -1;
11693 /* Recognize immediates which can be used in various Neon instructions. Legal
11694 immediates are described by the following table (for VMVN variants, the
11695 bitwise inverse of the constant shown is recognized. In either case, VMOV
11696 is output and the correct instruction to use for a given constant is chosen
11697 by the assembler). The constant shown is replicated across all elements of
11698 the destination vector.
11700 insn elems variant constant (binary)
11701 ---- ----- ------- -----------------
11702 vmov i32 0 00000000 00000000 00000000 abcdefgh
11703 vmov i32 1 00000000 00000000 abcdefgh 00000000
11704 vmov i32 2 00000000 abcdefgh 00000000 00000000
11705 vmov i32 3 abcdefgh 00000000 00000000 00000000
11706 vmov i16 4 00000000 abcdefgh
11707 vmov i16 5 abcdefgh 00000000
11708 vmvn i32 6 00000000 00000000 00000000 abcdefgh
11709 vmvn i32 7 00000000 00000000 abcdefgh 00000000
11710 vmvn i32 8 00000000 abcdefgh 00000000 00000000
11711 vmvn i32 9 abcdefgh 00000000 00000000 00000000
11712 vmvn i16 10 00000000 abcdefgh
11713 vmvn i16 11 abcdefgh 00000000
11714 vmov i32 12 00000000 00000000 abcdefgh 11111111
11715 vmvn i32 13 00000000 00000000 abcdefgh 11111111
11716 vmov i32 14 00000000 abcdefgh 11111111 11111111
11717 vmvn i32 15 00000000 abcdefgh 11111111 11111111
11718 vmov i8 16 abcdefgh
11719 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
11720 eeeeeeee ffffffff gggggggg hhhhhhhh
11721 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
11722 vmov f32 19 00000000 00000000 00000000 00000000
11724 For case 18, B = !b. Representable values are exactly those accepted by
11725 vfp3_const_double_index, but are output as floating-point numbers rather
11726 than indices.
11728 For case 19, we will change it to vmov.i32 when assembling.
11730 Variants 0-5 (inclusive) may also be used as immediates for the second
11731 operand of VORR/VBIC instructions.
11733 The INVERSE argument causes the bitwise inverse of the given operand to be
11734 recognized instead (used for recognizing legal immediates for the VAND/VORN
11735 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11736 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11737 output, rather than the real insns vbic/vorr).
11739 INVERSE makes no difference to the recognition of float vectors.
11741 The return value is the variant of immediate as shown in the above table, or
11742 -1 if the given value doesn't match any of the listed patterns.
11744 static int
11745 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
11746 rtx *modconst, int *elementwidth)
11748 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
11749 matches = 1; \
11750 for (i = 0; i < idx; i += (STRIDE)) \
11751 if (!(TEST)) \
11752 matches = 0; \
11753 if (matches) \
11755 immtype = (CLASS); \
11756 elsize = (ELSIZE); \
11757 break; \
11760 unsigned int i, elsize = 0, idx = 0, n_elts;
11761 unsigned int innersize;
11762 unsigned char bytes[16];
11763 int immtype = -1, matches;
11764 unsigned int invmask = inverse ? 0xff : 0;
11765 bool vector = GET_CODE (op) == CONST_VECTOR;
11767 if (vector)
11768 n_elts = CONST_VECTOR_NUNITS (op);
11769 else
11771 n_elts = 1;
11772 if (mode == VOIDmode)
11773 mode = DImode;
11776 innersize = GET_MODE_UNIT_SIZE (mode);
11778 /* Vectors of float constants. */
11779 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
11781 rtx el0 = CONST_VECTOR_ELT (op, 0);
11783 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
11784 return -1;
11786 /* FP16 vectors cannot be represented. */
11787 if (GET_MODE_INNER (mode) == HFmode)
11788 return -1;
11790 /* All elements in the vector must be the same. Note that 0.0 and -0.0
11791 are distinct in this context. */
11792 if (!const_vec_duplicate_p (op))
11793 return -1;
11795 if (modconst)
11796 *modconst = CONST_VECTOR_ELT (op, 0);
11798 if (elementwidth)
11799 *elementwidth = 0;
11801 if (el0 == CONST0_RTX (GET_MODE (el0)))
11802 return 19;
11803 else
11804 return 18;
11807 /* The tricks done in the code below apply for little-endian vector layout.
11808 For big-endian vectors only allow vectors of the form { a, a, a..., a }.
11809 FIXME: Implement logic for big-endian vectors. */
11810 if (BYTES_BIG_ENDIAN && vector && !const_vec_duplicate_p (op))
11811 return -1;
11813 /* Splat vector constant out into a byte vector. */
11814 for (i = 0; i < n_elts; i++)
11816 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
11817 unsigned HOST_WIDE_INT elpart;
11819 gcc_assert (CONST_INT_P (el));
11820 elpart = INTVAL (el);
11822 for (unsigned int byte = 0; byte < innersize; byte++)
11824 bytes[idx++] = (elpart & 0xff) ^ invmask;
11825 elpart >>= BITS_PER_UNIT;
11829 /* Sanity check. */
11830 gcc_assert (idx == GET_MODE_SIZE (mode));
11834 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
11835 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11837 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11838 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11840 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
11841 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11843 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
11844 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
11846 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
11848 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
11850 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
11851 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11853 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11854 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11856 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
11857 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11859 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
11860 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
11862 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
11864 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
11866 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11867 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11869 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11870 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11872 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
11873 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11875 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
11876 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11878 CHECK (1, 8, 16, bytes[i] == bytes[0]);
11880 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
11881 && bytes[i] == bytes[(i + 8) % idx]);
11883 while (0);
11885 if (immtype == -1)
11886 return -1;
11888 if (elementwidth)
11889 *elementwidth = elsize;
11891 if (modconst)
11893 unsigned HOST_WIDE_INT imm = 0;
11895 /* Un-invert bytes of recognized vector, if necessary. */
11896 if (invmask != 0)
11897 for (i = 0; i < idx; i++)
11898 bytes[i] ^= invmask;
11900 if (immtype == 17)
11902 /* FIXME: Broken on 32-bit H_W_I hosts. */
11903 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
11905 for (i = 0; i < 8; i++)
11906 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
11907 << (i * BITS_PER_UNIT);
11909 *modconst = GEN_INT (imm);
11911 else
11913 unsigned HOST_WIDE_INT imm = 0;
11915 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
11916 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
11918 *modconst = GEN_INT (imm);
11922 return immtype;
11923 #undef CHECK
11926 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
11927 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
11928 float elements), and a modified constant (whatever should be output for a
11929 VMOV) in *MODCONST. */
11932 neon_immediate_valid_for_move (rtx op, machine_mode mode,
11933 rtx *modconst, int *elementwidth)
11935 rtx tmpconst;
11936 int tmpwidth;
11937 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
11939 if (retval == -1)
11940 return 0;
11942 if (modconst)
11943 *modconst = tmpconst;
11945 if (elementwidth)
11946 *elementwidth = tmpwidth;
11948 return 1;
11951 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
11952 the immediate is valid, write a constant suitable for using as an operand
11953 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
11954 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
11957 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
11958 rtx *modconst, int *elementwidth)
11960 rtx tmpconst;
11961 int tmpwidth;
11962 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
11964 if (retval < 0 || retval > 5)
11965 return 0;
11967 if (modconst)
11968 *modconst = tmpconst;
11970 if (elementwidth)
11971 *elementwidth = tmpwidth;
11973 return 1;
11976 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
11977 the immediate is valid, write a constant suitable for using as an operand
11978 to VSHR/VSHL to *MODCONST and the corresponding element width to
11979 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
11980 because they have different limitations. */
11983 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
11984 rtx *modconst, int *elementwidth,
11985 bool isleftshift)
11987 unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
11988 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
11989 unsigned HOST_WIDE_INT last_elt = 0;
11990 unsigned HOST_WIDE_INT maxshift;
11992 /* Split vector constant out into a byte vector. */
11993 for (i = 0; i < n_elts; i++)
11995 rtx el = CONST_VECTOR_ELT (op, i);
11996 unsigned HOST_WIDE_INT elpart;
11998 if (CONST_INT_P (el))
11999 elpart = INTVAL (el);
12000 else if (CONST_DOUBLE_P (el))
12001 return 0;
12002 else
12003 gcc_unreachable ();
12005 if (i != 0 && elpart != last_elt)
12006 return 0;
12008 last_elt = elpart;
12011 /* Shift less than element size. */
12012 maxshift = innersize * 8;
12014 if (isleftshift)
12016 /* Left shift immediate value can be from 0 to <size>-1. */
12017 if (last_elt >= maxshift)
12018 return 0;
12020 else
12022 /* Right shift immediate value can be from 1 to <size>. */
12023 if (last_elt == 0 || last_elt > maxshift)
12024 return 0;
12027 if (elementwidth)
12028 *elementwidth = innersize * 8;
12030 if (modconst)
12031 *modconst = CONST_VECTOR_ELT (op, 0);
12033 return 1;
12036 /* Return a string suitable for output of Neon immediate logic operation
12037 MNEM. */
12039 char *
12040 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12041 int inverse, int quad)
12043 int width, is_valid;
12044 static char templ[40];
12046 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12048 gcc_assert (is_valid != 0);
12050 if (quad)
12051 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12052 else
12053 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12055 return templ;
12058 /* Return a string suitable for output of Neon immediate shift operation
12059 (VSHR or VSHL) MNEM. */
12061 char *
12062 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12063 machine_mode mode, int quad,
12064 bool isleftshift)
12066 int width, is_valid;
12067 static char templ[40];
12069 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12070 gcc_assert (is_valid != 0);
12072 if (quad)
12073 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12074 else
12075 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12077 return templ;
12080 /* Output a sequence of pairwise operations to implement a reduction.
12081 NOTE: We do "too much work" here, because pairwise operations work on two
12082 registers-worth of operands in one go. Unfortunately we can't exploit those
12083 extra calculations to do the full operation in fewer steps, I don't think.
12084 Although all vector elements of the result but the first are ignored, we
12085 actually calculate the same result in each of the elements. An alternative
12086 such as initially loading a vector with zero to use as each of the second
12087 operands would use up an additional register and take an extra instruction,
12088 for no particular gain. */
12090 void
12091 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12092 rtx (*reduc) (rtx, rtx, rtx))
12094 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
12095 rtx tmpsum = op1;
12097 for (i = parts / 2; i >= 1; i /= 2)
12099 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12100 emit_insn (reduc (dest, tmpsum, tmpsum));
12101 tmpsum = dest;
12105 /* If VALS is a vector constant that can be loaded into a register
12106 using VDUP, generate instructions to do so and return an RTX to
12107 assign to the register. Otherwise return NULL_RTX. */
12109 static rtx
12110 neon_vdup_constant (rtx vals)
12112 machine_mode mode = GET_MODE (vals);
12113 machine_mode inner_mode = GET_MODE_INNER (mode);
12114 rtx x;
12116 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12117 return NULL_RTX;
12119 if (!const_vec_duplicate_p (vals, &x))
12120 /* The elements are not all the same. We could handle repeating
12121 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12122 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12123 vdup.i16). */
12124 return NULL_RTX;
12126 /* We can load this constant by using VDUP and a constant in a
12127 single ARM register. This will be cheaper than a vector
12128 load. */
12130 x = copy_to_mode_reg (inner_mode, x);
12131 return gen_rtx_VEC_DUPLICATE (mode, x);
12134 /* Generate code to load VALS, which is a PARALLEL containing only
12135 constants (for vec_init) or CONST_VECTOR, efficiently into a
12136 register. Returns an RTX to copy into the register, or NULL_RTX
12137 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12140 neon_make_constant (rtx vals)
12142 machine_mode mode = GET_MODE (vals);
12143 rtx target;
12144 rtx const_vec = NULL_RTX;
12145 int n_elts = GET_MODE_NUNITS (mode);
12146 int n_const = 0;
12147 int i;
12149 if (GET_CODE (vals) == CONST_VECTOR)
12150 const_vec = vals;
12151 else if (GET_CODE (vals) == PARALLEL)
12153 /* A CONST_VECTOR must contain only CONST_INTs and
12154 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12155 Only store valid constants in a CONST_VECTOR. */
12156 for (i = 0; i < n_elts; ++i)
12158 rtx x = XVECEXP (vals, 0, i);
12159 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12160 n_const++;
12162 if (n_const == n_elts)
12163 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12165 else
12166 gcc_unreachable ();
12168 if (const_vec != NULL
12169 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12170 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12171 return const_vec;
12172 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12173 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12174 pipeline cycle; creating the constant takes one or two ARM
12175 pipeline cycles. */
12176 return target;
12177 else if (const_vec != NULL_RTX)
12178 /* Load from constant pool. On Cortex-A8 this takes two cycles
12179 (for either double or quad vectors). We can not take advantage
12180 of single-cycle VLD1 because we need a PC-relative addressing
12181 mode. */
12182 return const_vec;
12183 else
12184 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12185 We can not construct an initializer. */
12186 return NULL_RTX;
12189 /* Initialize vector TARGET to VALS. */
12191 void
12192 neon_expand_vector_init (rtx target, rtx vals)
12194 machine_mode mode = GET_MODE (target);
12195 machine_mode inner_mode = GET_MODE_INNER (mode);
12196 int n_elts = GET_MODE_NUNITS (mode);
12197 int n_var = 0, one_var = -1;
12198 bool all_same = true;
12199 rtx x, mem;
12200 int i;
12202 for (i = 0; i < n_elts; ++i)
12204 x = XVECEXP (vals, 0, i);
12205 if (!CONSTANT_P (x))
12206 ++n_var, one_var = i;
12208 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12209 all_same = false;
12212 if (n_var == 0)
12214 rtx constant = neon_make_constant (vals);
12215 if (constant != NULL_RTX)
12217 emit_move_insn (target, constant);
12218 return;
12222 /* Splat a single non-constant element if we can. */
12223 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12225 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12226 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
12227 return;
12230 /* One field is non-constant. Load constant then overwrite varying
12231 field. This is more efficient than using the stack. */
12232 if (n_var == 1)
12234 rtx copy = copy_rtx (vals);
12235 rtx index = GEN_INT (one_var);
12237 /* Load constant part of vector, substitute neighboring value for
12238 varying element. */
12239 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12240 neon_expand_vector_init (target, copy);
12242 /* Insert variable. */
12243 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12244 switch (mode)
12246 case E_V8QImode:
12247 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12248 break;
12249 case E_V16QImode:
12250 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12251 break;
12252 case E_V4HImode:
12253 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12254 break;
12255 case E_V8HImode:
12256 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12257 break;
12258 case E_V2SImode:
12259 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12260 break;
12261 case E_V4SImode:
12262 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12263 break;
12264 case E_V2SFmode:
12265 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12266 break;
12267 case E_V4SFmode:
12268 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12269 break;
12270 case E_V2DImode:
12271 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12272 break;
12273 default:
12274 gcc_unreachable ();
12276 return;
12279 /* Construct the vector in memory one field at a time
12280 and load the whole vector. */
12281 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12282 for (i = 0; i < n_elts; i++)
12283 emit_move_insn (adjust_address_nv (mem, inner_mode,
12284 i * GET_MODE_SIZE (inner_mode)),
12285 XVECEXP (vals, 0, i));
12286 emit_move_insn (target, mem);
12289 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12290 ERR if it doesn't. EXP indicates the source location, which includes the
12291 inlining history for intrinsics. */
12293 static void
12294 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12295 const_tree exp, const char *desc)
12297 HOST_WIDE_INT lane;
12299 gcc_assert (CONST_INT_P (operand));
12301 lane = INTVAL (operand);
12303 if (lane < low || lane >= high)
12305 if (exp)
12306 error ("%K%s %wd out of range %wd - %wd",
12307 exp, desc, lane, low, high - 1);
12308 else
12309 error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
12313 /* Bounds-check lanes. */
12315 void
12316 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12317 const_tree exp)
12319 bounds_check (operand, low, high, exp, "lane");
12322 /* Bounds-check constants. */
12324 void
12325 arm_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12327 bounds_check (operand, low, high, NULL_TREE, "constant");
12330 HOST_WIDE_INT
12331 neon_element_bits (machine_mode mode)
12333 return GET_MODE_UNIT_BITSIZE (mode);
12337 /* Predicates for `match_operand' and `match_operator'. */
12339 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12340 WB is true if full writeback address modes are allowed and is false
12341 if limited writeback address modes (POST_INC and PRE_DEC) are
12342 allowed. */
12345 arm_coproc_mem_operand (rtx op, bool wb)
12347 rtx ind;
12349 /* Reject eliminable registers. */
12350 if (! (reload_in_progress || reload_completed || lra_in_progress)
12351 && ( reg_mentioned_p (frame_pointer_rtx, op)
12352 || reg_mentioned_p (arg_pointer_rtx, op)
12353 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12354 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12355 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12356 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12357 return FALSE;
12359 /* Constants are converted into offsets from labels. */
12360 if (!MEM_P (op))
12361 return FALSE;
12363 ind = XEXP (op, 0);
12365 if (reload_completed
12366 && (GET_CODE (ind) == LABEL_REF
12367 || (GET_CODE (ind) == CONST
12368 && GET_CODE (XEXP (ind, 0)) == PLUS
12369 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12370 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12371 return TRUE;
12373 /* Match: (mem (reg)). */
12374 if (REG_P (ind))
12375 return arm_address_register_rtx_p (ind, 0);
12377 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12378 acceptable in any case (subject to verification by
12379 arm_address_register_rtx_p). We need WB to be true to accept
12380 PRE_INC and POST_DEC. */
12381 if (GET_CODE (ind) == POST_INC
12382 || GET_CODE (ind) == PRE_DEC
12383 || (wb
12384 && (GET_CODE (ind) == PRE_INC
12385 || GET_CODE (ind) == POST_DEC)))
12386 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12388 if (wb
12389 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12390 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12391 && GET_CODE (XEXP (ind, 1)) == PLUS
12392 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12393 ind = XEXP (ind, 1);
12395 /* Match:
12396 (plus (reg)
12397 (const)). */
12398 if (GET_CODE (ind) == PLUS
12399 && REG_P (XEXP (ind, 0))
12400 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12401 && CONST_INT_P (XEXP (ind, 1))
12402 && INTVAL (XEXP (ind, 1)) > -1024
12403 && INTVAL (XEXP (ind, 1)) < 1024
12404 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12405 return TRUE;
12407 return FALSE;
12410 /* Return TRUE if OP is a memory operand which we can load or store a vector
12411 to/from. TYPE is one of the following values:
12412 0 - Vector load/stor (vldr)
12413 1 - Core registers (ldm)
12414 2 - Element/structure loads (vld1)
12417 neon_vector_mem_operand (rtx op, int type, bool strict)
12419 rtx ind;
12421 /* Reject eliminable registers. */
12422 if (strict && ! (reload_in_progress || reload_completed)
12423 && (reg_mentioned_p (frame_pointer_rtx, op)
12424 || reg_mentioned_p (arg_pointer_rtx, op)
12425 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12426 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12427 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12428 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12429 return FALSE;
12431 /* Constants are converted into offsets from labels. */
12432 if (!MEM_P (op))
12433 return FALSE;
12435 ind = XEXP (op, 0);
12437 if (reload_completed
12438 && (GET_CODE (ind) == LABEL_REF
12439 || (GET_CODE (ind) == CONST
12440 && GET_CODE (XEXP (ind, 0)) == PLUS
12441 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12442 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12443 return TRUE;
12445 /* Match: (mem (reg)). */
12446 if (REG_P (ind))
12447 return arm_address_register_rtx_p (ind, 0);
12449 /* Allow post-increment with Neon registers. */
12450 if ((type != 1 && GET_CODE (ind) == POST_INC)
12451 || (type == 0 && GET_CODE (ind) == PRE_DEC))
12452 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12454 /* Allow post-increment by register for VLDn */
12455 if (type == 2 && GET_CODE (ind) == POST_MODIFY
12456 && GET_CODE (XEXP (ind, 1)) == PLUS
12457 && REG_P (XEXP (XEXP (ind, 1), 1)))
12458 return true;
12460 /* Match:
12461 (plus (reg)
12462 (const)). */
12463 if (type == 0
12464 && GET_CODE (ind) == PLUS
12465 && REG_P (XEXP (ind, 0))
12466 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12467 && CONST_INT_P (XEXP (ind, 1))
12468 && INTVAL (XEXP (ind, 1)) > -1024
12469 /* For quad modes, we restrict the constant offset to be slightly less
12470 than what the instruction format permits. We have no such constraint
12471 on double mode offsets. (This must match arm_legitimate_index_p.) */
12472 && (INTVAL (XEXP (ind, 1))
12473 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12474 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12475 return TRUE;
12477 return FALSE;
12480 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12481 type. */
12483 neon_struct_mem_operand (rtx op)
12485 rtx ind;
12487 /* Reject eliminable registers. */
12488 if (! (reload_in_progress || reload_completed)
12489 && ( reg_mentioned_p (frame_pointer_rtx, op)
12490 || reg_mentioned_p (arg_pointer_rtx, op)
12491 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12492 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12493 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12494 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12495 return FALSE;
12497 /* Constants are converted into offsets from labels. */
12498 if (!MEM_P (op))
12499 return FALSE;
12501 ind = XEXP (op, 0);
12503 if (reload_completed
12504 && (GET_CODE (ind) == LABEL_REF
12505 || (GET_CODE (ind) == CONST
12506 && GET_CODE (XEXP (ind, 0)) == PLUS
12507 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12508 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12509 return TRUE;
12511 /* Match: (mem (reg)). */
12512 if (REG_P (ind))
12513 return arm_address_register_rtx_p (ind, 0);
12515 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12516 if (GET_CODE (ind) == POST_INC
12517 || GET_CODE (ind) == PRE_DEC)
12518 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12520 return FALSE;
12523 /* Return true if X is a register that will be eliminated later on. */
12525 arm_eliminable_register (rtx x)
12527 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
12528 || REGNO (x) == ARG_POINTER_REGNUM
12529 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
12530 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
12533 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12534 coprocessor registers. Otherwise return NO_REGS. */
12536 enum reg_class
12537 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
12539 if (mode == HFmode)
12541 if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
12542 return GENERAL_REGS;
12543 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
12544 return NO_REGS;
12545 return GENERAL_REGS;
12548 /* The neon move patterns handle all legitimate vector and struct
12549 addresses. */
12550 if (TARGET_NEON
12551 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
12552 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
12553 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
12554 || VALID_NEON_STRUCT_MODE (mode)))
12555 return NO_REGS;
12557 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
12558 return NO_REGS;
12560 return GENERAL_REGS;
12563 /* Values which must be returned in the most-significant end of the return
12564 register. */
12566 static bool
12567 arm_return_in_msb (const_tree valtype)
12569 return (TARGET_AAPCS_BASED
12570 && BYTES_BIG_ENDIAN
12571 && (AGGREGATE_TYPE_P (valtype)
12572 || TREE_CODE (valtype) == COMPLEX_TYPE
12573 || FIXED_POINT_TYPE_P (valtype)));
12576 /* Return TRUE if X references a SYMBOL_REF. */
12578 symbol_mentioned_p (rtx x)
12580 const char * fmt;
12581 int i;
12583 if (GET_CODE (x) == SYMBOL_REF)
12584 return 1;
12586 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12587 are constant offsets, not symbols. */
12588 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12589 return 0;
12591 fmt = GET_RTX_FORMAT (GET_CODE (x));
12593 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12595 if (fmt[i] == 'E')
12597 int j;
12599 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12600 if (symbol_mentioned_p (XVECEXP (x, i, j)))
12601 return 1;
12603 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
12604 return 1;
12607 return 0;
12610 /* Return TRUE if X references a LABEL_REF. */
12612 label_mentioned_p (rtx x)
12614 const char * fmt;
12615 int i;
12617 if (GET_CODE (x) == LABEL_REF)
12618 return 1;
12620 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12621 instruction, but they are constant offsets, not symbols. */
12622 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12623 return 0;
12625 fmt = GET_RTX_FORMAT (GET_CODE (x));
12626 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12628 if (fmt[i] == 'E')
12630 int j;
12632 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12633 if (label_mentioned_p (XVECEXP (x, i, j)))
12634 return 1;
12636 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
12637 return 1;
12640 return 0;
12644 tls_mentioned_p (rtx x)
12646 switch (GET_CODE (x))
12648 case CONST:
12649 return tls_mentioned_p (XEXP (x, 0));
12651 case UNSPEC:
12652 if (XINT (x, 1) == UNSPEC_TLS)
12653 return 1;
12655 /* Fall through. */
12656 default:
12657 return 0;
12661 /* Must not copy any rtx that uses a pc-relative address.
12662 Also, disallow copying of load-exclusive instructions that
12663 may appear after splitting of compare-and-swap-style operations
12664 so as to prevent those loops from being transformed away from their
12665 canonical forms (see PR 69904). */
12667 static bool
12668 arm_cannot_copy_insn_p (rtx_insn *insn)
12670 /* The tls call insn cannot be copied, as it is paired with a data
12671 word. */
12672 if (recog_memoized (insn) == CODE_FOR_tlscall)
12673 return true;
12675 subrtx_iterator::array_type array;
12676 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
12678 const_rtx x = *iter;
12679 if (GET_CODE (x) == UNSPEC
12680 && (XINT (x, 1) == UNSPEC_PIC_BASE
12681 || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
12682 return true;
12685 rtx set = single_set (insn);
12686 if (set)
12688 rtx src = SET_SRC (set);
12689 if (GET_CODE (src) == ZERO_EXTEND)
12690 src = XEXP (src, 0);
12692 /* Catch the load-exclusive and load-acquire operations. */
12693 if (GET_CODE (src) == UNSPEC_VOLATILE
12694 && (XINT (src, 1) == VUNSPEC_LL
12695 || XINT (src, 1) == VUNSPEC_LAX))
12696 return true;
12698 return false;
12701 enum rtx_code
12702 minmax_code (rtx x)
12704 enum rtx_code code = GET_CODE (x);
12706 switch (code)
12708 case SMAX:
12709 return GE;
12710 case SMIN:
12711 return LE;
12712 case UMIN:
12713 return LEU;
12714 case UMAX:
12715 return GEU;
12716 default:
12717 gcc_unreachable ();
12721 /* Match pair of min/max operators that can be implemented via usat/ssat. */
12723 bool
12724 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
12725 int *mask, bool *signed_sat)
12727 /* The high bound must be a power of two minus one. */
12728 int log = exact_log2 (INTVAL (hi_bound) + 1);
12729 if (log == -1)
12730 return false;
12732 /* The low bound is either zero (for usat) or one less than the
12733 negation of the high bound (for ssat). */
12734 if (INTVAL (lo_bound) == 0)
12736 if (mask)
12737 *mask = log;
12738 if (signed_sat)
12739 *signed_sat = false;
12741 return true;
12744 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
12746 if (mask)
12747 *mask = log + 1;
12748 if (signed_sat)
12749 *signed_sat = true;
12751 return true;
12754 return false;
12757 /* Return 1 if memory locations are adjacent. */
12759 adjacent_mem_locations (rtx a, rtx b)
12761 /* We don't guarantee to preserve the order of these memory refs. */
12762 if (volatile_refs_p (a) || volatile_refs_p (b))
12763 return 0;
12765 if ((REG_P (XEXP (a, 0))
12766 || (GET_CODE (XEXP (a, 0)) == PLUS
12767 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
12768 && (REG_P (XEXP (b, 0))
12769 || (GET_CODE (XEXP (b, 0)) == PLUS
12770 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
12772 HOST_WIDE_INT val0 = 0, val1 = 0;
12773 rtx reg0, reg1;
12774 int val_diff;
12776 if (GET_CODE (XEXP (a, 0)) == PLUS)
12778 reg0 = XEXP (XEXP (a, 0), 0);
12779 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
12781 else
12782 reg0 = XEXP (a, 0);
12784 if (GET_CODE (XEXP (b, 0)) == PLUS)
12786 reg1 = XEXP (XEXP (b, 0), 0);
12787 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
12789 else
12790 reg1 = XEXP (b, 0);
12792 /* Don't accept any offset that will require multiple
12793 instructions to handle, since this would cause the
12794 arith_adjacentmem pattern to output an overlong sequence. */
12795 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
12796 return 0;
12798 /* Don't allow an eliminable register: register elimination can make
12799 the offset too large. */
12800 if (arm_eliminable_register (reg0))
12801 return 0;
12803 val_diff = val1 - val0;
12805 if (arm_ld_sched)
12807 /* If the target has load delay slots, then there's no benefit
12808 to using an ldm instruction unless the offset is zero and
12809 we are optimizing for size. */
12810 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
12811 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
12812 && (val_diff == 4 || val_diff == -4));
12815 return ((REGNO (reg0) == REGNO (reg1))
12816 && (val_diff == 4 || val_diff == -4));
12819 return 0;
12822 /* Return true if OP is a valid load or store multiple operation. LOAD is true
12823 for load operations, false for store operations. CONSECUTIVE is true
12824 if the register numbers in the operation must be consecutive in the register
12825 bank. RETURN_PC is true if value is to be loaded in PC.
12826 The pattern we are trying to match for load is:
12827 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
12828 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
12831 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
12833 where
12834 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
12835 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
12836 3. If consecutive is TRUE, then for kth register being loaded,
12837 REGNO (R_dk) = REGNO (R_d0) + k.
12838 The pattern for store is similar. */
12839 bool
12840 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
12841 bool consecutive, bool return_pc)
12843 HOST_WIDE_INT count = XVECLEN (op, 0);
12844 rtx reg, mem, addr;
12845 unsigned regno;
12846 unsigned first_regno;
12847 HOST_WIDE_INT i = 1, base = 0, offset = 0;
12848 rtx elt;
12849 bool addr_reg_in_reglist = false;
12850 bool update = false;
12851 int reg_increment;
12852 int offset_adj;
12853 int regs_per_val;
12855 /* If not in SImode, then registers must be consecutive
12856 (e.g., VLDM instructions for DFmode). */
12857 gcc_assert ((mode == SImode) || consecutive);
12858 /* Setting return_pc for stores is illegal. */
12859 gcc_assert (!return_pc || load);
12861 /* Set up the increments and the regs per val based on the mode. */
12862 reg_increment = GET_MODE_SIZE (mode);
12863 regs_per_val = reg_increment / 4;
12864 offset_adj = return_pc ? 1 : 0;
12866 if (count <= 1
12867 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
12868 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
12869 return false;
12871 /* Check if this is a write-back. */
12872 elt = XVECEXP (op, 0, offset_adj);
12873 if (GET_CODE (SET_SRC (elt)) == PLUS)
12875 i++;
12876 base = 1;
12877 update = true;
12879 /* The offset adjustment must be the number of registers being
12880 popped times the size of a single register. */
12881 if (!REG_P (SET_DEST (elt))
12882 || !REG_P (XEXP (SET_SRC (elt), 0))
12883 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
12884 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
12885 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
12886 ((count - 1 - offset_adj) * reg_increment))
12887 return false;
12890 i = i + offset_adj;
12891 base = base + offset_adj;
12892 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
12893 success depends on the type: VLDM can do just one reg,
12894 LDM must do at least two. */
12895 if ((count <= i) && (mode == SImode))
12896 return false;
12898 elt = XVECEXP (op, 0, i - 1);
12899 if (GET_CODE (elt) != SET)
12900 return false;
12902 if (load)
12904 reg = SET_DEST (elt);
12905 mem = SET_SRC (elt);
12907 else
12909 reg = SET_SRC (elt);
12910 mem = SET_DEST (elt);
12913 if (!REG_P (reg) || !MEM_P (mem))
12914 return false;
12916 regno = REGNO (reg);
12917 first_regno = regno;
12918 addr = XEXP (mem, 0);
12919 if (GET_CODE (addr) == PLUS)
12921 if (!CONST_INT_P (XEXP (addr, 1)))
12922 return false;
12924 offset = INTVAL (XEXP (addr, 1));
12925 addr = XEXP (addr, 0);
12928 if (!REG_P (addr))
12929 return false;
12931 /* Don't allow SP to be loaded unless it is also the base register. It
12932 guarantees that SP is reset correctly when an LDM instruction
12933 is interrupted. Otherwise, we might end up with a corrupt stack. */
12934 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
12935 return false;
12937 for (; i < count; i++)
12939 elt = XVECEXP (op, 0, i);
12940 if (GET_CODE (elt) != SET)
12941 return false;
12943 if (load)
12945 reg = SET_DEST (elt);
12946 mem = SET_SRC (elt);
12948 else
12950 reg = SET_SRC (elt);
12951 mem = SET_DEST (elt);
12954 if (!REG_P (reg)
12955 || GET_MODE (reg) != mode
12956 || REGNO (reg) <= regno
12957 || (consecutive
12958 && (REGNO (reg) !=
12959 (unsigned int) (first_regno + regs_per_val * (i - base))))
12960 /* Don't allow SP to be loaded unless it is also the base register. It
12961 guarantees that SP is reset correctly when an LDM instruction
12962 is interrupted. Otherwise, we might end up with a corrupt stack. */
12963 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
12964 || !MEM_P (mem)
12965 || GET_MODE (mem) != mode
12966 || ((GET_CODE (XEXP (mem, 0)) != PLUS
12967 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
12968 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
12969 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
12970 offset + (i - base) * reg_increment))
12971 && (!REG_P (XEXP (mem, 0))
12972 || offset + (i - base) * reg_increment != 0)))
12973 return false;
12975 regno = REGNO (reg);
12976 if (regno == REGNO (addr))
12977 addr_reg_in_reglist = true;
12980 if (load)
12982 if (update && addr_reg_in_reglist)
12983 return false;
12985 /* For Thumb-1, address register is always modified - either by write-back
12986 or by explicit load. If the pattern does not describe an update,
12987 then the address register must be in the list of loaded registers. */
12988 if (TARGET_THUMB1)
12989 return update || addr_reg_in_reglist;
12992 return true;
12995 /* Return true iff it would be profitable to turn a sequence of NOPS loads
12996 or stores (depending on IS_STORE) into a load-multiple or store-multiple
12997 instruction. ADD_OFFSET is nonzero if the base address register needs
12998 to be modified with an add instruction before we can use it. */
13000 static bool
13001 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13002 int nops, HOST_WIDE_INT add_offset)
13004 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13005 if the offset isn't small enough. The reason 2 ldrs are faster
13006 is because these ARMs are able to do more than one cache access
13007 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13008 whilst the ARM8 has a double bandwidth cache. This means that
13009 these cores can do both an instruction fetch and a data fetch in
13010 a single cycle, so the trick of calculating the address into a
13011 scratch register (one of the result regs) and then doing a load
13012 multiple actually becomes slower (and no smaller in code size).
13013 That is the transformation
13015 ldr rd1, [rbase + offset]
13016 ldr rd2, [rbase + offset + 4]
13020 add rd1, rbase, offset
13021 ldmia rd1, {rd1, rd2}
13023 produces worse code -- '3 cycles + any stalls on rd2' instead of
13024 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13025 access per cycle, the first sequence could never complete in less
13026 than 6 cycles, whereas the ldm sequence would only take 5 and
13027 would make better use of sequential accesses if not hitting the
13028 cache.
13030 We cheat here and test 'arm_ld_sched' which we currently know to
13031 only be true for the ARM8, ARM9 and StrongARM. If this ever
13032 changes, then the test below needs to be reworked. */
13033 if (nops == 2 && arm_ld_sched && add_offset != 0)
13034 return false;
13036 /* XScale has load-store double instructions, but they have stricter
13037 alignment requirements than load-store multiple, so we cannot
13038 use them.
13040 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13041 the pipeline until completion.
13043 NREGS CYCLES
13049 An ldr instruction takes 1-3 cycles, but does not block the
13050 pipeline.
13052 NREGS CYCLES
13053 1 1-3
13054 2 2-6
13055 3 3-9
13056 4 4-12
13058 Best case ldr will always win. However, the more ldr instructions
13059 we issue, the less likely we are to be able to schedule them well.
13060 Using ldr instructions also increases code size.
13062 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13063 for counts of 3 or 4 regs. */
13064 if (nops <= 2 && arm_tune_xscale && !optimize_size)
13065 return false;
13066 return true;
13069 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13070 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13071 an array ORDER which describes the sequence to use when accessing the
13072 offsets that produces an ascending order. In this sequence, each
13073 offset must be larger by exactly 4 than the previous one. ORDER[0]
13074 must have been filled in with the lowest offset by the caller.
13075 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13076 we use to verify that ORDER produces an ascending order of registers.
13077 Return true if it was possible to construct such an order, false if
13078 not. */
13080 static bool
13081 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13082 int *unsorted_regs)
13084 int i;
13085 for (i = 1; i < nops; i++)
13087 int j;
13089 order[i] = order[i - 1];
13090 for (j = 0; j < nops; j++)
13091 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13093 /* We must find exactly one offset that is higher than the
13094 previous one by 4. */
13095 if (order[i] != order[i - 1])
13096 return false;
13097 order[i] = j;
13099 if (order[i] == order[i - 1])
13100 return false;
13101 /* The register numbers must be ascending. */
13102 if (unsorted_regs != NULL
13103 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13104 return false;
13106 return true;
13109 /* Used to determine in a peephole whether a sequence of load
13110 instructions can be changed into a load-multiple instruction.
13111 NOPS is the number of separate load instructions we are examining. The
13112 first NOPS entries in OPERANDS are the destination registers, the
13113 next NOPS entries are memory operands. If this function is
13114 successful, *BASE is set to the common base register of the memory
13115 accesses; *LOAD_OFFSET is set to the first memory location's offset
13116 from that base register.
13117 REGS is an array filled in with the destination register numbers.
13118 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13119 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13120 the sequence of registers in REGS matches the loads from ascending memory
13121 locations, and the function verifies that the register numbers are
13122 themselves ascending. If CHECK_REGS is false, the register numbers
13123 are stored in the order they are found in the operands. */
13124 static int
13125 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13126 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13128 int unsorted_regs[MAX_LDM_STM_OPS];
13129 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13130 int order[MAX_LDM_STM_OPS];
13131 rtx base_reg_rtx = NULL;
13132 int base_reg = -1;
13133 int i, ldm_case;
13135 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13136 easily extended if required. */
13137 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13139 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13141 /* Loop over the operands and check that the memory references are
13142 suitable (i.e. immediate offsets from the same base register). At
13143 the same time, extract the target register, and the memory
13144 offsets. */
13145 for (i = 0; i < nops; i++)
13147 rtx reg;
13148 rtx offset;
13150 /* Convert a subreg of a mem into the mem itself. */
13151 if (GET_CODE (operands[nops + i]) == SUBREG)
13152 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13154 gcc_assert (MEM_P (operands[nops + i]));
13156 /* Don't reorder volatile memory references; it doesn't seem worth
13157 looking for the case where the order is ok anyway. */
13158 if (MEM_VOLATILE_P (operands[nops + i]))
13159 return 0;
13161 offset = const0_rtx;
13163 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13164 || (GET_CODE (reg) == SUBREG
13165 && REG_P (reg = SUBREG_REG (reg))))
13166 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13167 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13168 || (GET_CODE (reg) == SUBREG
13169 && REG_P (reg = SUBREG_REG (reg))))
13170 && (CONST_INT_P (offset
13171 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13173 if (i == 0)
13175 base_reg = REGNO (reg);
13176 base_reg_rtx = reg;
13177 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13178 return 0;
13180 else if (base_reg != (int) REGNO (reg))
13181 /* Not addressed from the same base register. */
13182 return 0;
13184 unsorted_regs[i] = (REG_P (operands[i])
13185 ? REGNO (operands[i])
13186 : REGNO (SUBREG_REG (operands[i])));
13188 /* If it isn't an integer register, or if it overwrites the
13189 base register but isn't the last insn in the list, then
13190 we can't do this. */
13191 if (unsorted_regs[i] < 0
13192 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13193 || unsorted_regs[i] > 14
13194 || (i != nops - 1 && unsorted_regs[i] == base_reg))
13195 return 0;
13197 /* Don't allow SP to be loaded unless it is also the base
13198 register. It guarantees that SP is reset correctly when
13199 an LDM instruction is interrupted. Otherwise, we might
13200 end up with a corrupt stack. */
13201 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13202 return 0;
13204 unsorted_offsets[i] = INTVAL (offset);
13205 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13206 order[0] = i;
13208 else
13209 /* Not a suitable memory address. */
13210 return 0;
13213 /* All the useful information has now been extracted from the
13214 operands into unsorted_regs and unsorted_offsets; additionally,
13215 order[0] has been set to the lowest offset in the list. Sort
13216 the offsets into order, verifying that they are adjacent, and
13217 check that the register numbers are ascending. */
13218 if (!compute_offset_order (nops, unsorted_offsets, order,
13219 check_regs ? unsorted_regs : NULL))
13220 return 0;
13222 if (saved_order)
13223 memcpy (saved_order, order, sizeof order);
13225 if (base)
13227 *base = base_reg;
13229 for (i = 0; i < nops; i++)
13230 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13232 *load_offset = unsorted_offsets[order[0]];
13235 if (TARGET_THUMB1
13236 && !peep2_reg_dead_p (nops, base_reg_rtx))
13237 return 0;
13239 if (unsorted_offsets[order[0]] == 0)
13240 ldm_case = 1; /* ldmia */
13241 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13242 ldm_case = 2; /* ldmib */
13243 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13244 ldm_case = 3; /* ldmda */
13245 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13246 ldm_case = 4; /* ldmdb */
13247 else if (const_ok_for_arm (unsorted_offsets[order[0]])
13248 || const_ok_for_arm (-unsorted_offsets[order[0]]))
13249 ldm_case = 5;
13250 else
13251 return 0;
13253 if (!multiple_operation_profitable_p (false, nops,
13254 ldm_case == 5
13255 ? unsorted_offsets[order[0]] : 0))
13256 return 0;
13258 return ldm_case;
13261 /* Used to determine in a peephole whether a sequence of store instructions can
13262 be changed into a store-multiple instruction.
13263 NOPS is the number of separate store instructions we are examining.
13264 NOPS_TOTAL is the total number of instructions recognized by the peephole
13265 pattern.
13266 The first NOPS entries in OPERANDS are the source registers, the next
13267 NOPS entries are memory operands. If this function is successful, *BASE is
13268 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13269 to the first memory location's offset from that base register. REGS is an
13270 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13271 likewise filled with the corresponding rtx's.
13272 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13273 numbers to an ascending order of stores.
13274 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13275 from ascending memory locations, and the function verifies that the register
13276 numbers are themselves ascending. If CHECK_REGS is false, the register
13277 numbers are stored in the order they are found in the operands. */
13278 static int
13279 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13280 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13281 HOST_WIDE_INT *load_offset, bool check_regs)
13283 int unsorted_regs[MAX_LDM_STM_OPS];
13284 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13285 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13286 int order[MAX_LDM_STM_OPS];
13287 int base_reg = -1;
13288 rtx base_reg_rtx = NULL;
13289 int i, stm_case;
13291 /* Write back of base register is currently only supported for Thumb 1. */
13292 int base_writeback = TARGET_THUMB1;
13294 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13295 easily extended if required. */
13296 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13298 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13300 /* Loop over the operands and check that the memory references are
13301 suitable (i.e. immediate offsets from the same base register). At
13302 the same time, extract the target register, and the memory
13303 offsets. */
13304 for (i = 0; i < nops; i++)
13306 rtx reg;
13307 rtx offset;
13309 /* Convert a subreg of a mem into the mem itself. */
13310 if (GET_CODE (operands[nops + i]) == SUBREG)
13311 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13313 gcc_assert (MEM_P (operands[nops + i]));
13315 /* Don't reorder volatile memory references; it doesn't seem worth
13316 looking for the case where the order is ok anyway. */
13317 if (MEM_VOLATILE_P (operands[nops + i]))
13318 return 0;
13320 offset = const0_rtx;
13322 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13323 || (GET_CODE (reg) == SUBREG
13324 && REG_P (reg = SUBREG_REG (reg))))
13325 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13326 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13327 || (GET_CODE (reg) == SUBREG
13328 && REG_P (reg = SUBREG_REG (reg))))
13329 && (CONST_INT_P (offset
13330 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13332 unsorted_reg_rtxs[i] = (REG_P (operands[i])
13333 ? operands[i] : SUBREG_REG (operands[i]));
13334 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13336 if (i == 0)
13338 base_reg = REGNO (reg);
13339 base_reg_rtx = reg;
13340 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13341 return 0;
13343 else if (base_reg != (int) REGNO (reg))
13344 /* Not addressed from the same base register. */
13345 return 0;
13347 /* If it isn't an integer register, then we can't do this. */
13348 if (unsorted_regs[i] < 0
13349 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13350 /* The effects are unpredictable if the base register is
13351 both updated and stored. */
13352 || (base_writeback && unsorted_regs[i] == base_reg)
13353 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13354 || unsorted_regs[i] > 14)
13355 return 0;
13357 unsorted_offsets[i] = INTVAL (offset);
13358 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13359 order[0] = i;
13361 else
13362 /* Not a suitable memory address. */
13363 return 0;
13366 /* All the useful information has now been extracted from the
13367 operands into unsorted_regs and unsorted_offsets; additionally,
13368 order[0] has been set to the lowest offset in the list. Sort
13369 the offsets into order, verifying that they are adjacent, and
13370 check that the register numbers are ascending. */
13371 if (!compute_offset_order (nops, unsorted_offsets, order,
13372 check_regs ? unsorted_regs : NULL))
13373 return 0;
13375 if (saved_order)
13376 memcpy (saved_order, order, sizeof order);
13378 if (base)
13380 *base = base_reg;
13382 for (i = 0; i < nops; i++)
13384 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13385 if (reg_rtxs)
13386 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13389 *load_offset = unsorted_offsets[order[0]];
13392 if (TARGET_THUMB1
13393 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13394 return 0;
13396 if (unsorted_offsets[order[0]] == 0)
13397 stm_case = 1; /* stmia */
13398 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13399 stm_case = 2; /* stmib */
13400 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13401 stm_case = 3; /* stmda */
13402 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13403 stm_case = 4; /* stmdb */
13404 else
13405 return 0;
13407 if (!multiple_operation_profitable_p (false, nops, 0))
13408 return 0;
13410 return stm_case;
13413 /* Routines for use in generating RTL. */
13415 /* Generate a load-multiple instruction. COUNT is the number of loads in
13416 the instruction; REGS and MEMS are arrays containing the operands.
13417 BASEREG is the base register to be used in addressing the memory operands.
13418 WBACK_OFFSET is nonzero if the instruction should update the base
13419 register. */
13421 static rtx
13422 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13423 HOST_WIDE_INT wback_offset)
13425 int i = 0, j;
13426 rtx result;
13428 if (!multiple_operation_profitable_p (false, count, 0))
13430 rtx seq;
13432 start_sequence ();
13434 for (i = 0; i < count; i++)
13435 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13437 if (wback_offset != 0)
13438 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13440 seq = get_insns ();
13441 end_sequence ();
13443 return seq;
13446 result = gen_rtx_PARALLEL (VOIDmode,
13447 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13448 if (wback_offset != 0)
13450 XVECEXP (result, 0, 0)
13451 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13452 i = 1;
13453 count++;
13456 for (j = 0; i < count; i++, j++)
13457 XVECEXP (result, 0, i)
13458 = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
13460 return result;
13463 /* Generate a store-multiple instruction. COUNT is the number of stores in
13464 the instruction; REGS and MEMS are arrays containing the operands.
13465 BASEREG is the base register to be used in addressing the memory operands.
13466 WBACK_OFFSET is nonzero if the instruction should update the base
13467 register. */
13469 static rtx
13470 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13471 HOST_WIDE_INT wback_offset)
13473 int i = 0, j;
13474 rtx result;
13476 if (GET_CODE (basereg) == PLUS)
13477 basereg = XEXP (basereg, 0);
13479 if (!multiple_operation_profitable_p (false, count, 0))
13481 rtx seq;
13483 start_sequence ();
13485 for (i = 0; i < count; i++)
13486 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13488 if (wback_offset != 0)
13489 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13491 seq = get_insns ();
13492 end_sequence ();
13494 return seq;
13497 result = gen_rtx_PARALLEL (VOIDmode,
13498 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13499 if (wback_offset != 0)
13501 XVECEXP (result, 0, 0)
13502 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13503 i = 1;
13504 count++;
13507 for (j = 0; i < count; i++, j++)
13508 XVECEXP (result, 0, i)
13509 = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
13511 return result;
13514 /* Generate either a load-multiple or a store-multiple instruction. This
13515 function can be used in situations where we can start with a single MEM
13516 rtx and adjust its address upwards.
13517 COUNT is the number of operations in the instruction, not counting a
13518 possible update of the base register. REGS is an array containing the
13519 register operands.
13520 BASEREG is the base register to be used in addressing the memory operands,
13521 which are constructed from BASEMEM.
13522 WRITE_BACK specifies whether the generated instruction should include an
13523 update of the base register.
13524 OFFSETP is used to pass an offset to and from this function; this offset
13525 is not used when constructing the address (instead BASEMEM should have an
13526 appropriate offset in its address), it is used only for setting
13527 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13529 static rtx
13530 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
13531 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
13533 rtx mems[MAX_LDM_STM_OPS];
13534 HOST_WIDE_INT offset = *offsetp;
13535 int i;
13537 gcc_assert (count <= MAX_LDM_STM_OPS);
13539 if (GET_CODE (basereg) == PLUS)
13540 basereg = XEXP (basereg, 0);
13542 for (i = 0; i < count; i++)
13544 rtx addr = plus_constant (Pmode, basereg, i * 4);
13545 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
13546 offset += 4;
13549 if (write_back)
13550 *offsetp = offset;
13552 if (is_load)
13553 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
13554 write_back ? 4 * count : 0);
13555 else
13556 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
13557 write_back ? 4 * count : 0);
13561 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
13562 rtx basemem, HOST_WIDE_INT *offsetp)
13564 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
13565 offsetp);
13569 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
13570 rtx basemem, HOST_WIDE_INT *offsetp)
13572 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
13573 offsetp);
13576 /* Called from a peephole2 expander to turn a sequence of loads into an
13577 LDM instruction. OPERANDS are the operands found by the peephole matcher;
13578 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
13579 is true if we can reorder the registers because they are used commutatively
13580 subsequently.
13581 Returns true iff we could generate a new instruction. */
13583 bool
13584 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
13586 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13587 rtx mems[MAX_LDM_STM_OPS];
13588 int i, j, base_reg;
13589 rtx base_reg_rtx;
13590 HOST_WIDE_INT offset;
13591 int write_back = FALSE;
13592 int ldm_case;
13593 rtx addr;
13595 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
13596 &base_reg, &offset, !sort_regs);
13598 if (ldm_case == 0)
13599 return false;
13601 if (sort_regs)
13602 for (i = 0; i < nops - 1; i++)
13603 for (j = i + 1; j < nops; j++)
13604 if (regs[i] > regs[j])
13606 int t = regs[i];
13607 regs[i] = regs[j];
13608 regs[j] = t;
13610 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13612 if (TARGET_THUMB1)
13614 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
13615 gcc_assert (ldm_case == 1 || ldm_case == 5);
13616 write_back = TRUE;
13619 if (ldm_case == 5)
13621 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
13622 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
13623 offset = 0;
13624 if (!TARGET_THUMB1)
13625 base_reg_rtx = newbase;
13628 for (i = 0; i < nops; i++)
13630 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13631 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13632 SImode, addr, 0);
13634 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
13635 write_back ? offset + i * 4 : 0));
13636 return true;
13639 /* Called from a peephole2 expander to turn a sequence of stores into an
13640 STM instruction. OPERANDS are the operands found by the peephole matcher;
13641 NOPS indicates how many separate stores we are trying to combine.
13642 Returns true iff we could generate a new instruction. */
13644 bool
13645 gen_stm_seq (rtx *operands, int nops)
13647 int i;
13648 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13649 rtx mems[MAX_LDM_STM_OPS];
13650 int base_reg;
13651 rtx base_reg_rtx;
13652 HOST_WIDE_INT offset;
13653 int write_back = FALSE;
13654 int stm_case;
13655 rtx addr;
13656 bool base_reg_dies;
13658 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
13659 mem_order, &base_reg, &offset, true);
13661 if (stm_case == 0)
13662 return false;
13664 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13666 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
13667 if (TARGET_THUMB1)
13669 gcc_assert (base_reg_dies);
13670 write_back = TRUE;
13673 if (stm_case == 5)
13675 gcc_assert (base_reg_dies);
13676 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13677 offset = 0;
13680 addr = plus_constant (Pmode, base_reg_rtx, offset);
13682 for (i = 0; i < nops; i++)
13684 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13685 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13686 SImode, addr, 0);
13688 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
13689 write_back ? offset + i * 4 : 0));
13690 return true;
13693 /* Called from a peephole2 expander to turn a sequence of stores that are
13694 preceded by constant loads into an STM instruction. OPERANDS are the
13695 operands found by the peephole matcher; NOPS indicates how many
13696 separate stores we are trying to combine; there are 2 * NOPS
13697 instructions in the peephole.
13698 Returns true iff we could generate a new instruction. */
13700 bool
13701 gen_const_stm_seq (rtx *operands, int nops)
13703 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
13704 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13705 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
13706 rtx mems[MAX_LDM_STM_OPS];
13707 int base_reg;
13708 rtx base_reg_rtx;
13709 HOST_WIDE_INT offset;
13710 int write_back = FALSE;
13711 int stm_case;
13712 rtx addr;
13713 bool base_reg_dies;
13714 int i, j;
13715 HARD_REG_SET allocated;
13717 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
13718 mem_order, &base_reg, &offset, false);
13720 if (stm_case == 0)
13721 return false;
13723 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
13725 /* If the same register is used more than once, try to find a free
13726 register. */
13727 CLEAR_HARD_REG_SET (allocated);
13728 for (i = 0; i < nops; i++)
13730 for (j = i + 1; j < nops; j++)
13731 if (regs[i] == regs[j])
13733 rtx t = peep2_find_free_register (0, nops * 2,
13734 TARGET_THUMB1 ? "l" : "r",
13735 SImode, &allocated);
13736 if (t == NULL_RTX)
13737 return false;
13738 reg_rtxs[i] = t;
13739 regs[i] = REGNO (t);
13743 /* Compute an ordering that maps the register numbers to an ascending
13744 sequence. */
13745 reg_order[0] = 0;
13746 for (i = 0; i < nops; i++)
13747 if (regs[i] < regs[reg_order[0]])
13748 reg_order[0] = i;
13750 for (i = 1; i < nops; i++)
13752 int this_order = reg_order[i - 1];
13753 for (j = 0; j < nops; j++)
13754 if (regs[j] > regs[reg_order[i - 1]]
13755 && (this_order == reg_order[i - 1]
13756 || regs[j] < regs[this_order]))
13757 this_order = j;
13758 reg_order[i] = this_order;
13761 /* Ensure that registers that must be live after the instruction end
13762 up with the correct value. */
13763 for (i = 0; i < nops; i++)
13765 int this_order = reg_order[i];
13766 if ((this_order != mem_order[i]
13767 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
13768 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
13769 return false;
13772 /* Load the constants. */
13773 for (i = 0; i < nops; i++)
13775 rtx op = operands[2 * nops + mem_order[i]];
13776 sorted_regs[i] = regs[reg_order[i]];
13777 emit_move_insn (reg_rtxs[reg_order[i]], op);
13780 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13782 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
13783 if (TARGET_THUMB1)
13785 gcc_assert (base_reg_dies);
13786 write_back = TRUE;
13789 if (stm_case == 5)
13791 gcc_assert (base_reg_dies);
13792 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13793 offset = 0;
13796 addr = plus_constant (Pmode, base_reg_rtx, offset);
13798 for (i = 0; i < nops; i++)
13800 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13801 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13802 SImode, addr, 0);
13804 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
13805 write_back ? offset + i * 4 : 0));
13806 return true;
13809 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
13810 unaligned copies on processors which support unaligned semantics for those
13811 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
13812 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
13813 An interleave factor of 1 (the minimum) will perform no interleaving.
13814 Load/store multiple are used for aligned addresses where possible. */
13816 static void
13817 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
13818 HOST_WIDE_INT length,
13819 unsigned int interleave_factor)
13821 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
13822 int *regnos = XALLOCAVEC (int, interleave_factor);
13823 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
13824 HOST_WIDE_INT i, j;
13825 HOST_WIDE_INT remaining = length, words;
13826 rtx halfword_tmp = NULL, byte_tmp = NULL;
13827 rtx dst, src;
13828 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
13829 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
13830 HOST_WIDE_INT srcoffset, dstoffset;
13831 HOST_WIDE_INT src_autoinc, dst_autoinc;
13832 rtx mem, addr;
13834 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
13836 /* Use hard registers if we have aligned source or destination so we can use
13837 load/store multiple with contiguous registers. */
13838 if (dst_aligned || src_aligned)
13839 for (i = 0; i < interleave_factor; i++)
13840 regs[i] = gen_rtx_REG (SImode, i);
13841 else
13842 for (i = 0; i < interleave_factor; i++)
13843 regs[i] = gen_reg_rtx (SImode);
13845 dst = copy_addr_to_reg (XEXP (dstbase, 0));
13846 src = copy_addr_to_reg (XEXP (srcbase, 0));
13848 srcoffset = dstoffset = 0;
13850 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
13851 For copying the last bytes we want to subtract this offset again. */
13852 src_autoinc = dst_autoinc = 0;
13854 for (i = 0; i < interleave_factor; i++)
13855 regnos[i] = i;
13857 /* Copy BLOCK_SIZE_BYTES chunks. */
13859 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
13861 /* Load words. */
13862 if (src_aligned && interleave_factor > 1)
13864 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
13865 TRUE, srcbase, &srcoffset));
13866 src_autoinc += UNITS_PER_WORD * interleave_factor;
13868 else
13870 for (j = 0; j < interleave_factor; j++)
13872 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
13873 - src_autoinc));
13874 mem = adjust_automodify_address (srcbase, SImode, addr,
13875 srcoffset + j * UNITS_PER_WORD);
13876 emit_insn (gen_unaligned_loadsi (regs[j], mem));
13878 srcoffset += block_size_bytes;
13881 /* Store words. */
13882 if (dst_aligned && interleave_factor > 1)
13884 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
13885 TRUE, dstbase, &dstoffset));
13886 dst_autoinc += UNITS_PER_WORD * interleave_factor;
13888 else
13890 for (j = 0; j < interleave_factor; j++)
13892 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
13893 - dst_autoinc));
13894 mem = adjust_automodify_address (dstbase, SImode, addr,
13895 dstoffset + j * UNITS_PER_WORD);
13896 emit_insn (gen_unaligned_storesi (mem, regs[j]));
13898 dstoffset += block_size_bytes;
13901 remaining -= block_size_bytes;
13904 /* Copy any whole words left (note these aren't interleaved with any
13905 subsequent halfword/byte load/stores in the interests of simplicity). */
13907 words = remaining / UNITS_PER_WORD;
13909 gcc_assert (words < interleave_factor);
13911 if (src_aligned && words > 1)
13913 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
13914 &srcoffset));
13915 src_autoinc += UNITS_PER_WORD * words;
13917 else
13919 for (j = 0; j < words; j++)
13921 addr = plus_constant (Pmode, src,
13922 srcoffset + j * UNITS_PER_WORD - src_autoinc);
13923 mem = adjust_automodify_address (srcbase, SImode, addr,
13924 srcoffset + j * UNITS_PER_WORD);
13925 if (src_aligned)
13926 emit_move_insn (regs[j], mem);
13927 else
13928 emit_insn (gen_unaligned_loadsi (regs[j], mem));
13930 srcoffset += words * UNITS_PER_WORD;
13933 if (dst_aligned && words > 1)
13935 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
13936 &dstoffset));
13937 dst_autoinc += words * UNITS_PER_WORD;
13939 else
13941 for (j = 0; j < words; j++)
13943 addr = plus_constant (Pmode, dst,
13944 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
13945 mem = adjust_automodify_address (dstbase, SImode, addr,
13946 dstoffset + j * UNITS_PER_WORD);
13947 if (dst_aligned)
13948 emit_move_insn (mem, regs[j]);
13949 else
13950 emit_insn (gen_unaligned_storesi (mem, regs[j]));
13952 dstoffset += words * UNITS_PER_WORD;
13955 remaining -= words * UNITS_PER_WORD;
13957 gcc_assert (remaining < 4);
13959 /* Copy a halfword if necessary. */
13961 if (remaining >= 2)
13963 halfword_tmp = gen_reg_rtx (SImode);
13965 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
13966 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
13967 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
13969 /* Either write out immediately, or delay until we've loaded the last
13970 byte, depending on interleave factor. */
13971 if (interleave_factor == 1)
13973 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
13974 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
13975 emit_insn (gen_unaligned_storehi (mem,
13976 gen_lowpart (HImode, halfword_tmp)));
13977 halfword_tmp = NULL;
13978 dstoffset += 2;
13981 remaining -= 2;
13982 srcoffset += 2;
13985 gcc_assert (remaining < 2);
13987 /* Copy last byte. */
13989 if ((remaining & 1) != 0)
13991 byte_tmp = gen_reg_rtx (SImode);
13993 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
13994 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
13995 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
13997 if (interleave_factor == 1)
13999 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14000 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14001 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14002 byte_tmp = NULL;
14003 dstoffset++;
14006 remaining--;
14007 srcoffset++;
14010 /* Store last halfword if we haven't done so already. */
14012 if (halfword_tmp)
14014 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14015 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14016 emit_insn (gen_unaligned_storehi (mem,
14017 gen_lowpart (HImode, halfword_tmp)));
14018 dstoffset += 2;
14021 /* Likewise for last byte. */
14023 if (byte_tmp)
14025 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14026 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14027 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14028 dstoffset++;
14031 gcc_assert (remaining == 0 && srcoffset == dstoffset);
14034 /* From mips_adjust_block_mem:
14036 Helper function for doing a loop-based block operation on memory
14037 reference MEM. Each iteration of the loop will operate on LENGTH
14038 bytes of MEM.
14040 Create a new base register for use within the loop and point it to
14041 the start of MEM. Create a new memory reference that uses this
14042 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14044 static void
14045 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14046 rtx *loop_mem)
14048 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14050 /* Although the new mem does not refer to a known location,
14051 it does keep up to LENGTH bytes of alignment. */
14052 *loop_mem = change_address (mem, BLKmode, *loop_reg);
14053 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14056 /* From mips_block_move_loop:
14058 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14059 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14060 the memory regions do not overlap. */
14062 static void
14063 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14064 unsigned int interleave_factor,
14065 HOST_WIDE_INT bytes_per_iter)
14067 rtx src_reg, dest_reg, final_src, test;
14068 HOST_WIDE_INT leftover;
14070 leftover = length % bytes_per_iter;
14071 length -= leftover;
14073 /* Create registers and memory references for use within the loop. */
14074 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14075 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14077 /* Calculate the value that SRC_REG should have after the last iteration of
14078 the loop. */
14079 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14080 0, 0, OPTAB_WIDEN);
14082 /* Emit the start of the loop. */
14083 rtx_code_label *label = gen_label_rtx ();
14084 emit_label (label);
14086 /* Emit the loop body. */
14087 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14088 interleave_factor);
14090 /* Move on to the next block. */
14091 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14092 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14094 /* Emit the loop condition. */
14095 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14096 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14098 /* Mop up any left-over bytes. */
14099 if (leftover)
14100 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14103 /* Emit a block move when either the source or destination is unaligned (not
14104 aligned to a four-byte boundary). This may need further tuning depending on
14105 core type, optimize_size setting, etc. */
14107 static int
14108 arm_movmemqi_unaligned (rtx *operands)
14110 HOST_WIDE_INT length = INTVAL (operands[2]);
14112 if (optimize_size)
14114 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14115 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14116 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14117 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14118 or dst_aligned though: allow more interleaving in those cases since the
14119 resulting code can be smaller. */
14120 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14121 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14123 if (length > 12)
14124 arm_block_move_unaligned_loop (operands[0], operands[1], length,
14125 interleave_factor, bytes_per_iter);
14126 else
14127 arm_block_move_unaligned_straight (operands[0], operands[1], length,
14128 interleave_factor);
14130 else
14132 /* Note that the loop created by arm_block_move_unaligned_loop may be
14133 subject to loop unrolling, which makes tuning this condition a little
14134 redundant. */
14135 if (length > 32)
14136 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14137 else
14138 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14141 return 1;
14145 arm_gen_movmemqi (rtx *operands)
14147 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14148 HOST_WIDE_INT srcoffset, dstoffset;
14149 rtx src, dst, srcbase, dstbase;
14150 rtx part_bytes_reg = NULL;
14151 rtx mem;
14153 if (!CONST_INT_P (operands[2])
14154 || !CONST_INT_P (operands[3])
14155 || INTVAL (operands[2]) > 64)
14156 return 0;
14158 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14159 return arm_movmemqi_unaligned (operands);
14161 if (INTVAL (operands[3]) & 3)
14162 return 0;
14164 dstbase = operands[0];
14165 srcbase = operands[1];
14167 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14168 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14170 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14171 out_words_to_go = INTVAL (operands[2]) / 4;
14172 last_bytes = INTVAL (operands[2]) & 3;
14173 dstoffset = srcoffset = 0;
14175 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14176 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14178 while (in_words_to_go >= 2)
14180 if (in_words_to_go > 4)
14181 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14182 TRUE, srcbase, &srcoffset));
14183 else
14184 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14185 src, FALSE, srcbase,
14186 &srcoffset));
14188 if (out_words_to_go)
14190 if (out_words_to_go > 4)
14191 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14192 TRUE, dstbase, &dstoffset));
14193 else if (out_words_to_go != 1)
14194 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14195 out_words_to_go, dst,
14196 (last_bytes == 0
14197 ? FALSE : TRUE),
14198 dstbase, &dstoffset));
14199 else
14201 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14202 emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
14203 if (last_bytes != 0)
14205 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14206 dstoffset += 4;
14211 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14212 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14215 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14216 if (out_words_to_go)
14218 rtx sreg;
14220 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14221 sreg = copy_to_reg (mem);
14223 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14224 emit_move_insn (mem, sreg);
14225 in_words_to_go--;
14227 gcc_assert (!in_words_to_go); /* Sanity check */
14230 if (in_words_to_go)
14232 gcc_assert (in_words_to_go > 0);
14234 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14235 part_bytes_reg = copy_to_mode_reg (SImode, mem);
14238 gcc_assert (!last_bytes || part_bytes_reg);
14240 if (BYTES_BIG_ENDIAN && last_bytes)
14242 rtx tmp = gen_reg_rtx (SImode);
14244 /* The bytes we want are in the top end of the word. */
14245 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14246 GEN_INT (8 * (4 - last_bytes))));
14247 part_bytes_reg = tmp;
14249 while (last_bytes)
14251 mem = adjust_automodify_address (dstbase, QImode,
14252 plus_constant (Pmode, dst,
14253 last_bytes - 1),
14254 dstoffset + last_bytes - 1);
14255 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14257 if (--last_bytes)
14259 tmp = gen_reg_rtx (SImode);
14260 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14261 part_bytes_reg = tmp;
14266 else
14268 if (last_bytes > 1)
14270 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14271 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14272 last_bytes -= 2;
14273 if (last_bytes)
14275 rtx tmp = gen_reg_rtx (SImode);
14276 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14277 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14278 part_bytes_reg = tmp;
14279 dstoffset += 2;
14283 if (last_bytes)
14285 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14286 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14290 return 1;
14293 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14294 by mode size. */
14295 inline static rtx
14296 next_consecutive_mem (rtx mem)
14298 machine_mode mode = GET_MODE (mem);
14299 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14300 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14302 return adjust_automodify_address (mem, mode, addr, offset);
14305 /* Copy using LDRD/STRD instructions whenever possible.
14306 Returns true upon success. */
14307 bool
14308 gen_movmem_ldrd_strd (rtx *operands)
14310 unsigned HOST_WIDE_INT len;
14311 HOST_WIDE_INT align;
14312 rtx src, dst, base;
14313 rtx reg0;
14314 bool src_aligned, dst_aligned;
14315 bool src_volatile, dst_volatile;
14317 gcc_assert (CONST_INT_P (operands[2]));
14318 gcc_assert (CONST_INT_P (operands[3]));
14320 len = UINTVAL (operands[2]);
14321 if (len > 64)
14322 return false;
14324 /* Maximum alignment we can assume for both src and dst buffers. */
14325 align = INTVAL (operands[3]);
14327 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14328 return false;
14330 /* Place src and dst addresses in registers
14331 and update the corresponding mem rtx. */
14332 dst = operands[0];
14333 dst_volatile = MEM_VOLATILE_P (dst);
14334 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14335 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14336 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14338 src = operands[1];
14339 src_volatile = MEM_VOLATILE_P (src);
14340 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14341 base = copy_to_mode_reg (SImode, XEXP (src, 0));
14342 src = adjust_automodify_address (src, VOIDmode, base, 0);
14344 if (!unaligned_access && !(src_aligned && dst_aligned))
14345 return false;
14347 if (src_volatile || dst_volatile)
14348 return false;
14350 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14351 if (!(dst_aligned || src_aligned))
14352 return arm_gen_movmemqi (operands);
14354 /* If the either src or dst is unaligned we'll be accessing it as pairs
14355 of unaligned SImode accesses. Otherwise we can generate DImode
14356 ldrd/strd instructions. */
14357 src = adjust_address (src, src_aligned ? DImode : SImode, 0);
14358 dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
14360 while (len >= 8)
14362 len -= 8;
14363 reg0 = gen_reg_rtx (DImode);
14364 rtx low_reg = NULL_RTX;
14365 rtx hi_reg = NULL_RTX;
14367 if (!src_aligned || !dst_aligned)
14369 low_reg = gen_lowpart (SImode, reg0);
14370 hi_reg = gen_highpart_mode (SImode, DImode, reg0);
14372 if (src_aligned)
14373 emit_move_insn (reg0, src);
14374 else
14376 emit_insn (gen_unaligned_loadsi (low_reg, src));
14377 src = next_consecutive_mem (src);
14378 emit_insn (gen_unaligned_loadsi (hi_reg, src));
14381 if (dst_aligned)
14382 emit_move_insn (dst, reg0);
14383 else
14385 emit_insn (gen_unaligned_storesi (dst, low_reg));
14386 dst = next_consecutive_mem (dst);
14387 emit_insn (gen_unaligned_storesi (dst, hi_reg));
14390 src = next_consecutive_mem (src);
14391 dst = next_consecutive_mem (dst);
14394 gcc_assert (len < 8);
14395 if (len >= 4)
14397 /* More than a word but less than a double-word to copy. Copy a word. */
14398 reg0 = gen_reg_rtx (SImode);
14399 src = adjust_address (src, SImode, 0);
14400 dst = adjust_address (dst, SImode, 0);
14401 if (src_aligned)
14402 emit_move_insn (reg0, src);
14403 else
14404 emit_insn (gen_unaligned_loadsi (reg0, src));
14406 if (dst_aligned)
14407 emit_move_insn (dst, reg0);
14408 else
14409 emit_insn (gen_unaligned_storesi (dst, reg0));
14411 src = next_consecutive_mem (src);
14412 dst = next_consecutive_mem (dst);
14413 len -= 4;
14416 if (len == 0)
14417 return true;
14419 /* Copy the remaining bytes. */
14420 if (len >= 2)
14422 dst = adjust_address (dst, HImode, 0);
14423 src = adjust_address (src, HImode, 0);
14424 reg0 = gen_reg_rtx (SImode);
14425 if (src_aligned)
14426 emit_insn (gen_zero_extendhisi2 (reg0, src));
14427 else
14428 emit_insn (gen_unaligned_loadhiu (reg0, src));
14430 if (dst_aligned)
14431 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14432 else
14433 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14435 src = next_consecutive_mem (src);
14436 dst = next_consecutive_mem (dst);
14437 if (len == 2)
14438 return true;
14441 dst = adjust_address (dst, QImode, 0);
14442 src = adjust_address (src, QImode, 0);
14443 reg0 = gen_reg_rtx (QImode);
14444 emit_move_insn (reg0, src);
14445 emit_move_insn (dst, reg0);
14446 return true;
14449 /* Select a dominance comparison mode if possible for a test of the general
14450 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14451 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14452 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14453 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14454 In all cases OP will be either EQ or NE, but we don't need to know which
14455 here. If we are unable to support a dominance comparison we return
14456 CC mode. This will then fail to match for the RTL expressions that
14457 generate this call. */
14458 machine_mode
14459 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14461 enum rtx_code cond1, cond2;
14462 int swapped = 0;
14464 /* Currently we will probably get the wrong result if the individual
14465 comparisons are not simple. This also ensures that it is safe to
14466 reverse a comparison if necessary. */
14467 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14468 != CCmode)
14469 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14470 != CCmode))
14471 return CCmode;
14473 /* The if_then_else variant of this tests the second condition if the
14474 first passes, but is true if the first fails. Reverse the first
14475 condition to get a true "inclusive-or" expression. */
14476 if (cond_or == DOM_CC_NX_OR_Y)
14477 cond1 = reverse_condition (cond1);
14479 /* If the comparisons are not equal, and one doesn't dominate the other,
14480 then we can't do this. */
14481 if (cond1 != cond2
14482 && !comparison_dominates_p (cond1, cond2)
14483 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14484 return CCmode;
14486 if (swapped)
14487 std::swap (cond1, cond2);
14489 switch (cond1)
14491 case EQ:
14492 if (cond_or == DOM_CC_X_AND_Y)
14493 return CC_DEQmode;
14495 switch (cond2)
14497 case EQ: return CC_DEQmode;
14498 case LE: return CC_DLEmode;
14499 case LEU: return CC_DLEUmode;
14500 case GE: return CC_DGEmode;
14501 case GEU: return CC_DGEUmode;
14502 default: gcc_unreachable ();
14505 case LT:
14506 if (cond_or == DOM_CC_X_AND_Y)
14507 return CC_DLTmode;
14509 switch (cond2)
14511 case LT:
14512 return CC_DLTmode;
14513 case LE:
14514 return CC_DLEmode;
14515 case NE:
14516 return CC_DNEmode;
14517 default:
14518 gcc_unreachable ();
14521 case GT:
14522 if (cond_or == DOM_CC_X_AND_Y)
14523 return CC_DGTmode;
14525 switch (cond2)
14527 case GT:
14528 return CC_DGTmode;
14529 case GE:
14530 return CC_DGEmode;
14531 case NE:
14532 return CC_DNEmode;
14533 default:
14534 gcc_unreachable ();
14537 case LTU:
14538 if (cond_or == DOM_CC_X_AND_Y)
14539 return CC_DLTUmode;
14541 switch (cond2)
14543 case LTU:
14544 return CC_DLTUmode;
14545 case LEU:
14546 return CC_DLEUmode;
14547 case NE:
14548 return CC_DNEmode;
14549 default:
14550 gcc_unreachable ();
14553 case GTU:
14554 if (cond_or == DOM_CC_X_AND_Y)
14555 return CC_DGTUmode;
14557 switch (cond2)
14559 case GTU:
14560 return CC_DGTUmode;
14561 case GEU:
14562 return CC_DGEUmode;
14563 case NE:
14564 return CC_DNEmode;
14565 default:
14566 gcc_unreachable ();
14569 /* The remaining cases only occur when both comparisons are the
14570 same. */
14571 case NE:
14572 gcc_assert (cond1 == cond2);
14573 return CC_DNEmode;
14575 case LE:
14576 gcc_assert (cond1 == cond2);
14577 return CC_DLEmode;
14579 case GE:
14580 gcc_assert (cond1 == cond2);
14581 return CC_DGEmode;
14583 case LEU:
14584 gcc_assert (cond1 == cond2);
14585 return CC_DLEUmode;
14587 case GEU:
14588 gcc_assert (cond1 == cond2);
14589 return CC_DGEUmode;
14591 default:
14592 gcc_unreachable ();
14596 machine_mode
14597 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
14599 /* All floating point compares return CCFP if it is an equality
14600 comparison, and CCFPE otherwise. */
14601 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14603 switch (op)
14605 case EQ:
14606 case NE:
14607 case UNORDERED:
14608 case ORDERED:
14609 case UNLT:
14610 case UNLE:
14611 case UNGT:
14612 case UNGE:
14613 case UNEQ:
14614 case LTGT:
14615 return CCFPmode;
14617 case LT:
14618 case LE:
14619 case GT:
14620 case GE:
14621 return CCFPEmode;
14623 default:
14624 gcc_unreachable ();
14628 /* A compare with a shifted operand. Because of canonicalization, the
14629 comparison will have to be swapped when we emit the assembler. */
14630 if (GET_MODE (y) == SImode
14631 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14632 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14633 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
14634 || GET_CODE (x) == ROTATERT))
14635 return CC_SWPmode;
14637 /* This operation is performed swapped, but since we only rely on the Z
14638 flag we don't need an additional mode. */
14639 if (GET_MODE (y) == SImode
14640 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14641 && GET_CODE (x) == NEG
14642 && (op == EQ || op == NE))
14643 return CC_Zmode;
14645 /* This is a special case that is used by combine to allow a
14646 comparison of a shifted byte load to be split into a zero-extend
14647 followed by a comparison of the shifted integer (only valid for
14648 equalities and unsigned inequalities). */
14649 if (GET_MODE (x) == SImode
14650 && GET_CODE (x) == ASHIFT
14651 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
14652 && GET_CODE (XEXP (x, 0)) == SUBREG
14653 && MEM_P (SUBREG_REG (XEXP (x, 0)))
14654 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
14655 && (op == EQ || op == NE
14656 || op == GEU || op == GTU || op == LTU || op == LEU)
14657 && CONST_INT_P (y))
14658 return CC_Zmode;
14660 /* A construct for a conditional compare, if the false arm contains
14661 0, then both conditions must be true, otherwise either condition
14662 must be true. Not all conditions are possible, so CCmode is
14663 returned if it can't be done. */
14664 if (GET_CODE (x) == IF_THEN_ELSE
14665 && (XEXP (x, 2) == const0_rtx
14666 || XEXP (x, 2) == const1_rtx)
14667 && COMPARISON_P (XEXP (x, 0))
14668 && COMPARISON_P (XEXP (x, 1)))
14669 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14670 INTVAL (XEXP (x, 2)));
14672 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
14673 if (GET_CODE (x) == AND
14674 && (op == EQ || op == NE)
14675 && COMPARISON_P (XEXP (x, 0))
14676 && COMPARISON_P (XEXP (x, 1)))
14677 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14678 DOM_CC_X_AND_Y);
14680 if (GET_CODE (x) == IOR
14681 && (op == EQ || op == NE)
14682 && COMPARISON_P (XEXP (x, 0))
14683 && COMPARISON_P (XEXP (x, 1)))
14684 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14685 DOM_CC_X_OR_Y);
14687 /* An operation (on Thumb) where we want to test for a single bit.
14688 This is done by shifting that bit up into the top bit of a
14689 scratch register; we can then branch on the sign bit. */
14690 if (TARGET_THUMB1
14691 && GET_MODE (x) == SImode
14692 && (op == EQ || op == NE)
14693 && GET_CODE (x) == ZERO_EXTRACT
14694 && XEXP (x, 1) == const1_rtx)
14695 return CC_Nmode;
14697 /* An operation that sets the condition codes as a side-effect, the
14698 V flag is not set correctly, so we can only use comparisons where
14699 this doesn't matter. (For LT and GE we can use "mi" and "pl"
14700 instead.) */
14701 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
14702 if (GET_MODE (x) == SImode
14703 && y == const0_rtx
14704 && (op == EQ || op == NE || op == LT || op == GE)
14705 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
14706 || GET_CODE (x) == AND || GET_CODE (x) == IOR
14707 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
14708 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
14709 || GET_CODE (x) == LSHIFTRT
14710 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14711 || GET_CODE (x) == ROTATERT
14712 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
14713 return CC_NOOVmode;
14715 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
14716 return CC_Zmode;
14718 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
14719 && GET_CODE (x) == PLUS
14720 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
14721 return CC_Cmode;
14723 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
14725 switch (op)
14727 case EQ:
14728 case NE:
14729 /* A DImode comparison against zero can be implemented by
14730 or'ing the two halves together. */
14731 if (y == const0_rtx)
14732 return CC_Zmode;
14734 /* We can do an equality test in three Thumb instructions. */
14735 if (!TARGET_32BIT)
14736 return CC_Zmode;
14738 /* FALLTHROUGH */
14740 case LTU:
14741 case LEU:
14742 case GTU:
14743 case GEU:
14744 /* DImode unsigned comparisons can be implemented by cmp +
14745 cmpeq without a scratch register. Not worth doing in
14746 Thumb-2. */
14747 if (TARGET_32BIT)
14748 return CC_CZmode;
14750 /* FALLTHROUGH */
14752 case LT:
14753 case LE:
14754 case GT:
14755 case GE:
14756 /* DImode signed and unsigned comparisons can be implemented
14757 by cmp + sbcs with a scratch register, but that does not
14758 set the Z flag - we must reverse GT/LE/GTU/LEU. */
14759 gcc_assert (op != EQ && op != NE);
14760 return CC_NCVmode;
14762 default:
14763 gcc_unreachable ();
14767 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
14768 return GET_MODE (x);
14770 return CCmode;
14773 /* X and Y are two things to compare using CODE. Emit the compare insn and
14774 return the rtx for register 0 in the proper mode. FP means this is a
14775 floating point compare: I don't think that it is needed on the arm. */
14777 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
14779 machine_mode mode;
14780 rtx cc_reg;
14781 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
14783 /* We might have X as a constant, Y as a register because of the predicates
14784 used for cmpdi. If so, force X to a register here. */
14785 if (dimode_comparison && !REG_P (x))
14786 x = force_reg (DImode, x);
14788 mode = SELECT_CC_MODE (code, x, y);
14789 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
14791 if (dimode_comparison
14792 && mode != CC_CZmode)
14794 rtx clobber, set;
14796 /* To compare two non-zero values for equality, XOR them and
14797 then compare against zero. Not used for ARM mode; there
14798 CC_CZmode is cheaper. */
14799 if (mode == CC_Zmode && y != const0_rtx)
14801 gcc_assert (!reload_completed);
14802 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
14803 y = const0_rtx;
14806 /* A scratch register is required. */
14807 if (reload_completed)
14808 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
14809 else
14810 scratch = gen_rtx_SCRATCH (SImode);
14812 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
14813 set = gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y));
14814 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
14816 else
14817 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
14819 return cc_reg;
14822 /* Generate a sequence of insns that will generate the correct return
14823 address mask depending on the physical architecture that the program
14824 is running on. */
14826 arm_gen_return_addr_mask (void)
14828 rtx reg = gen_reg_rtx (Pmode);
14830 emit_insn (gen_return_addr_mask (reg));
14831 return reg;
14834 void
14835 arm_reload_in_hi (rtx *operands)
14837 rtx ref = operands[1];
14838 rtx base, scratch;
14839 HOST_WIDE_INT offset = 0;
14841 if (GET_CODE (ref) == SUBREG)
14843 offset = SUBREG_BYTE (ref);
14844 ref = SUBREG_REG (ref);
14847 if (REG_P (ref))
14849 /* We have a pseudo which has been spilt onto the stack; there
14850 are two cases here: the first where there is a simple
14851 stack-slot replacement and a second where the stack-slot is
14852 out of range, or is used as a subreg. */
14853 if (reg_equiv_mem (REGNO (ref)))
14855 ref = reg_equiv_mem (REGNO (ref));
14856 base = find_replacement (&XEXP (ref, 0));
14858 else
14859 /* The slot is out of range, or was dressed up in a SUBREG. */
14860 base = reg_equiv_address (REGNO (ref));
14862 /* PR 62554: If there is no equivalent memory location then just move
14863 the value as an SImode register move. This happens when the target
14864 architecture variant does not have an HImode register move. */
14865 if (base == NULL)
14867 gcc_assert (REG_P (operands[0]));
14868 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
14869 gen_rtx_SUBREG (SImode, ref, 0)));
14870 return;
14873 else
14874 base = find_replacement (&XEXP (ref, 0));
14876 /* Handle the case where the address is too complex to be offset by 1. */
14877 if (GET_CODE (base) == MINUS
14878 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
14880 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14882 emit_set_insn (base_plus, base);
14883 base = base_plus;
14885 else if (GET_CODE (base) == PLUS)
14887 /* The addend must be CONST_INT, or we would have dealt with it above. */
14888 HOST_WIDE_INT hi, lo;
14890 offset += INTVAL (XEXP (base, 1));
14891 base = XEXP (base, 0);
14893 /* Rework the address into a legal sequence of insns. */
14894 /* Valid range for lo is -4095 -> 4095 */
14895 lo = (offset >= 0
14896 ? (offset & 0xfff)
14897 : -((-offset) & 0xfff));
14899 /* Corner case, if lo is the max offset then we would be out of range
14900 once we have added the additional 1 below, so bump the msb into the
14901 pre-loading insn(s). */
14902 if (lo == 4095)
14903 lo &= 0x7ff;
14905 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
14906 ^ (HOST_WIDE_INT) 0x80000000)
14907 - (HOST_WIDE_INT) 0x80000000);
14909 gcc_assert (hi + lo == offset);
14911 if (hi != 0)
14913 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14915 /* Get the base address; addsi3 knows how to handle constants
14916 that require more than one insn. */
14917 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
14918 base = base_plus;
14919 offset = lo;
14923 /* Operands[2] may overlap operands[0] (though it won't overlap
14924 operands[1]), that's why we asked for a DImode reg -- so we can
14925 use the bit that does not overlap. */
14926 if (REGNO (operands[2]) == REGNO (operands[0]))
14927 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14928 else
14929 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
14931 emit_insn (gen_zero_extendqisi2 (scratch,
14932 gen_rtx_MEM (QImode,
14933 plus_constant (Pmode, base,
14934 offset))));
14935 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
14936 gen_rtx_MEM (QImode,
14937 plus_constant (Pmode, base,
14938 offset + 1))));
14939 if (!BYTES_BIG_ENDIAN)
14940 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
14941 gen_rtx_IOR (SImode,
14942 gen_rtx_ASHIFT
14943 (SImode,
14944 gen_rtx_SUBREG (SImode, operands[0], 0),
14945 GEN_INT (8)),
14946 scratch));
14947 else
14948 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
14949 gen_rtx_IOR (SImode,
14950 gen_rtx_ASHIFT (SImode, scratch,
14951 GEN_INT (8)),
14952 gen_rtx_SUBREG (SImode, operands[0], 0)));
14955 /* Handle storing a half-word to memory during reload by synthesizing as two
14956 byte stores. Take care not to clobber the input values until after we
14957 have moved them somewhere safe. This code assumes that if the DImode
14958 scratch in operands[2] overlaps either the input value or output address
14959 in some way, then that value must die in this insn (we absolutely need
14960 two scratch registers for some corner cases). */
14961 void
14962 arm_reload_out_hi (rtx *operands)
14964 rtx ref = operands[0];
14965 rtx outval = operands[1];
14966 rtx base, scratch;
14967 HOST_WIDE_INT offset = 0;
14969 if (GET_CODE (ref) == SUBREG)
14971 offset = SUBREG_BYTE (ref);
14972 ref = SUBREG_REG (ref);
14975 if (REG_P (ref))
14977 /* We have a pseudo which has been spilt onto the stack; there
14978 are two cases here: the first where there is a simple
14979 stack-slot replacement and a second where the stack-slot is
14980 out of range, or is used as a subreg. */
14981 if (reg_equiv_mem (REGNO (ref)))
14983 ref = reg_equiv_mem (REGNO (ref));
14984 base = find_replacement (&XEXP (ref, 0));
14986 else
14987 /* The slot is out of range, or was dressed up in a SUBREG. */
14988 base = reg_equiv_address (REGNO (ref));
14990 /* PR 62254: If there is no equivalent memory location then just move
14991 the value as an SImode register move. This happens when the target
14992 architecture variant does not have an HImode register move. */
14993 if (base == NULL)
14995 gcc_assert (REG_P (outval) || SUBREG_P (outval));
14997 if (REG_P (outval))
14999 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15000 gen_rtx_SUBREG (SImode, outval, 0)));
15002 else /* SUBREG_P (outval) */
15004 if (GET_MODE (SUBREG_REG (outval)) == SImode)
15005 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15006 SUBREG_REG (outval)));
15007 else
15008 /* FIXME: Handle other cases ? */
15009 gcc_unreachable ();
15011 return;
15014 else
15015 base = find_replacement (&XEXP (ref, 0));
15017 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15019 /* Handle the case where the address is too complex to be offset by 1. */
15020 if (GET_CODE (base) == MINUS
15021 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15023 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15025 /* Be careful not to destroy OUTVAL. */
15026 if (reg_overlap_mentioned_p (base_plus, outval))
15028 /* Updating base_plus might destroy outval, see if we can
15029 swap the scratch and base_plus. */
15030 if (!reg_overlap_mentioned_p (scratch, outval))
15031 std::swap (scratch, base_plus);
15032 else
15034 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15036 /* Be conservative and copy OUTVAL into the scratch now,
15037 this should only be necessary if outval is a subreg
15038 of something larger than a word. */
15039 /* XXX Might this clobber base? I can't see how it can,
15040 since scratch is known to overlap with OUTVAL, and
15041 must be wider than a word. */
15042 emit_insn (gen_movhi (scratch_hi, outval));
15043 outval = scratch_hi;
15047 emit_set_insn (base_plus, base);
15048 base = base_plus;
15050 else if (GET_CODE (base) == PLUS)
15052 /* The addend must be CONST_INT, or we would have dealt with it above. */
15053 HOST_WIDE_INT hi, lo;
15055 offset += INTVAL (XEXP (base, 1));
15056 base = XEXP (base, 0);
15058 /* Rework the address into a legal sequence of insns. */
15059 /* Valid range for lo is -4095 -> 4095 */
15060 lo = (offset >= 0
15061 ? (offset & 0xfff)
15062 : -((-offset) & 0xfff));
15064 /* Corner case, if lo is the max offset then we would be out of range
15065 once we have added the additional 1 below, so bump the msb into the
15066 pre-loading insn(s). */
15067 if (lo == 4095)
15068 lo &= 0x7ff;
15070 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15071 ^ (HOST_WIDE_INT) 0x80000000)
15072 - (HOST_WIDE_INT) 0x80000000);
15074 gcc_assert (hi + lo == offset);
15076 if (hi != 0)
15078 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15080 /* Be careful not to destroy OUTVAL. */
15081 if (reg_overlap_mentioned_p (base_plus, outval))
15083 /* Updating base_plus might destroy outval, see if we
15084 can swap the scratch and base_plus. */
15085 if (!reg_overlap_mentioned_p (scratch, outval))
15086 std::swap (scratch, base_plus);
15087 else
15089 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15091 /* Be conservative and copy outval into scratch now,
15092 this should only be necessary if outval is a
15093 subreg of something larger than a word. */
15094 /* XXX Might this clobber base? I can't see how it
15095 can, since scratch is known to overlap with
15096 outval. */
15097 emit_insn (gen_movhi (scratch_hi, outval));
15098 outval = scratch_hi;
15102 /* Get the base address; addsi3 knows how to handle constants
15103 that require more than one insn. */
15104 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15105 base = base_plus;
15106 offset = lo;
15110 if (BYTES_BIG_ENDIAN)
15112 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15113 plus_constant (Pmode, base,
15114 offset + 1)),
15115 gen_lowpart (QImode, outval)));
15116 emit_insn (gen_lshrsi3 (scratch,
15117 gen_rtx_SUBREG (SImode, outval, 0),
15118 GEN_INT (8)));
15119 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15120 offset)),
15121 gen_lowpart (QImode, scratch)));
15123 else
15125 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15126 offset)),
15127 gen_lowpart (QImode, outval)));
15128 emit_insn (gen_lshrsi3 (scratch,
15129 gen_rtx_SUBREG (SImode, outval, 0),
15130 GEN_INT (8)));
15131 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15132 plus_constant (Pmode, base,
15133 offset + 1)),
15134 gen_lowpart (QImode, scratch)));
15138 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15139 (padded to the size of a word) should be passed in a register. */
15141 static bool
15142 arm_must_pass_in_stack (machine_mode mode, const_tree type)
15144 if (TARGET_AAPCS_BASED)
15145 return must_pass_in_stack_var_size (mode, type);
15146 else
15147 return must_pass_in_stack_var_size_or_pad (mode, type);
15151 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15152 Return true if an argument passed on the stack should be padded upwards,
15153 i.e. if the least-significant byte has useful data.
15154 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15155 aggregate types are placed in the lowest memory address. */
15157 bool
15158 arm_pad_arg_upward (machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
15160 if (!TARGET_AAPCS_BASED)
15161 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
15163 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15164 return false;
15166 return true;
15170 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15171 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15172 register has useful data, and return the opposite if the most
15173 significant byte does. */
15175 bool
15176 arm_pad_reg_upward (machine_mode mode,
15177 tree type, int first ATTRIBUTE_UNUSED)
15179 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15181 /* For AAPCS, small aggregates, small fixed-point types,
15182 and small complex types are always padded upwards. */
15183 if (type)
15185 if ((AGGREGATE_TYPE_P (type)
15186 || TREE_CODE (type) == COMPLEX_TYPE
15187 || FIXED_POINT_TYPE_P (type))
15188 && int_size_in_bytes (type) <= 4)
15189 return true;
15191 else
15193 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15194 && GET_MODE_SIZE (mode) <= 4)
15195 return true;
15199 /* Otherwise, use default padding. */
15200 return !BYTES_BIG_ENDIAN;
15203 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15204 assuming that the address in the base register is word aligned. */
15205 bool
15206 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15208 HOST_WIDE_INT max_offset;
15210 /* Offset must be a multiple of 4 in Thumb mode. */
15211 if (TARGET_THUMB2 && ((offset & 3) != 0))
15212 return false;
15214 if (TARGET_THUMB2)
15215 max_offset = 1020;
15216 else if (TARGET_ARM)
15217 max_offset = 255;
15218 else
15219 return false;
15221 return ((offset <= max_offset) && (offset >= -max_offset));
15224 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15225 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15226 Assumes that the address in the base register RN is word aligned. Pattern
15227 guarantees that both memory accesses use the same base register,
15228 the offsets are constants within the range, and the gap between the offsets is 4.
15229 If preload complete then check that registers are legal. WBACK indicates whether
15230 address is updated. LOAD indicates whether memory access is load or store. */
15231 bool
15232 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15233 bool wback, bool load)
15235 unsigned int t, t2, n;
15237 if (!reload_completed)
15238 return true;
15240 if (!offset_ok_for_ldrd_strd (offset))
15241 return false;
15243 t = REGNO (rt);
15244 t2 = REGNO (rt2);
15245 n = REGNO (rn);
15247 if ((TARGET_THUMB2)
15248 && ((wback && (n == t || n == t2))
15249 || (t == SP_REGNUM)
15250 || (t == PC_REGNUM)
15251 || (t2 == SP_REGNUM)
15252 || (t2 == PC_REGNUM)
15253 || (!load && (n == PC_REGNUM))
15254 || (load && (t == t2))
15255 /* Triggers Cortex-M3 LDRD errata. */
15256 || (!wback && load && fix_cm3_ldrd && (n == t))))
15257 return false;
15259 if ((TARGET_ARM)
15260 && ((wback && (n == t || n == t2))
15261 || (t2 == PC_REGNUM)
15262 || (t % 2 != 0) /* First destination register is not even. */
15263 || (t2 != t + 1)
15264 /* PC can be used as base register (for offset addressing only),
15265 but it is depricated. */
15266 || (n == PC_REGNUM)))
15267 return false;
15269 return true;
15272 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15273 operand MEM's address contains an immediate offset from the base
15274 register and has no side effects, in which case it sets BASE and
15275 OFFSET accordingly. */
15276 static bool
15277 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset)
15279 rtx addr;
15281 gcc_assert (base != NULL && offset != NULL);
15283 /* TODO: Handle more general memory operand patterns, such as
15284 PRE_DEC and PRE_INC. */
15286 if (side_effects_p (mem))
15287 return false;
15289 /* Can't deal with subregs. */
15290 if (GET_CODE (mem) == SUBREG)
15291 return false;
15293 gcc_assert (MEM_P (mem));
15295 *offset = const0_rtx;
15297 addr = XEXP (mem, 0);
15299 /* If addr isn't valid for DImode, then we can't handle it. */
15300 if (!arm_legitimate_address_p (DImode, addr,
15301 reload_in_progress || reload_completed))
15302 return false;
15304 if (REG_P (addr))
15306 *base = addr;
15307 return true;
15309 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15311 *base = XEXP (addr, 0);
15312 *offset = XEXP (addr, 1);
15313 return (REG_P (*base) && CONST_INT_P (*offset));
15316 return false;
15319 /* Called from a peephole2 to replace two word-size accesses with a
15320 single LDRD/STRD instruction. Returns true iff we can generate a
15321 new instruction sequence. That is, both accesses use the same base
15322 register and the gap between constant offsets is 4. This function
15323 may reorder its operands to match ldrd/strd RTL templates.
15324 OPERANDS are the operands found by the peephole matcher;
15325 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15326 corresponding memory operands. LOAD indicaates whether the access
15327 is load or store. CONST_STORE indicates a store of constant
15328 integer values held in OPERANDS[4,5] and assumes that the pattern
15329 is of length 4 insn, for the purpose of checking dead registers.
15330 COMMUTE indicates that register operands may be reordered. */
15331 bool
15332 gen_operands_ldrd_strd (rtx *operands, bool load,
15333 bool const_store, bool commute)
15335 int nops = 2;
15336 HOST_WIDE_INT offsets[2], offset;
15337 rtx base = NULL_RTX;
15338 rtx cur_base, cur_offset, tmp;
15339 int i, gap;
15340 HARD_REG_SET regset;
15342 gcc_assert (!const_store || !load);
15343 /* Check that the memory references are immediate offsets from the
15344 same base register. Extract the base register, the destination
15345 registers, and the corresponding memory offsets. */
15346 for (i = 0; i < nops; i++)
15348 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
15349 return false;
15351 if (i == 0)
15352 base = cur_base;
15353 else if (REGNO (base) != REGNO (cur_base))
15354 return false;
15356 offsets[i] = INTVAL (cur_offset);
15357 if (GET_CODE (operands[i]) == SUBREG)
15359 tmp = SUBREG_REG (operands[i]);
15360 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15361 operands[i] = tmp;
15365 /* Make sure there is no dependency between the individual loads. */
15366 if (load && REGNO (operands[0]) == REGNO (base))
15367 return false; /* RAW */
15369 if (load && REGNO (operands[0]) == REGNO (operands[1]))
15370 return false; /* WAW */
15372 /* If the same input register is used in both stores
15373 when storing different constants, try to find a free register.
15374 For example, the code
15375 mov r0, 0
15376 str r0, [r2]
15377 mov r0, 1
15378 str r0, [r2, #4]
15379 can be transformed into
15380 mov r1, 0
15381 mov r0, 1
15382 strd r1, r0, [r2]
15383 in Thumb mode assuming that r1 is free.
15384 For ARM mode do the same but only if the starting register
15385 can be made to be even. */
15386 if (const_store
15387 && REGNO (operands[0]) == REGNO (operands[1])
15388 && INTVAL (operands[4]) != INTVAL (operands[5]))
15390 if (TARGET_THUMB2)
15392 CLEAR_HARD_REG_SET (regset);
15393 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15394 if (tmp == NULL_RTX)
15395 return false;
15397 /* Use the new register in the first load to ensure that
15398 if the original input register is not dead after peephole,
15399 then it will have the correct constant value. */
15400 operands[0] = tmp;
15402 else if (TARGET_ARM)
15404 int regno = REGNO (operands[0]);
15405 if (!peep2_reg_dead_p (4, operands[0]))
15407 /* When the input register is even and is not dead after the
15408 pattern, it has to hold the second constant but we cannot
15409 form a legal STRD in ARM mode with this register as the second
15410 register. */
15411 if (regno % 2 == 0)
15412 return false;
15414 /* Is regno-1 free? */
15415 SET_HARD_REG_SET (regset);
15416 CLEAR_HARD_REG_BIT(regset, regno - 1);
15417 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15418 if (tmp == NULL_RTX)
15419 return false;
15421 operands[0] = tmp;
15423 else
15425 /* Find a DImode register. */
15426 CLEAR_HARD_REG_SET (regset);
15427 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15428 if (tmp != NULL_RTX)
15430 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15431 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15433 else
15435 /* Can we use the input register to form a DI register? */
15436 SET_HARD_REG_SET (regset);
15437 CLEAR_HARD_REG_BIT(regset,
15438 regno % 2 == 0 ? regno + 1 : regno - 1);
15439 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15440 if (tmp == NULL_RTX)
15441 return false;
15442 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15446 gcc_assert (operands[0] != NULL_RTX);
15447 gcc_assert (operands[1] != NULL_RTX);
15448 gcc_assert (REGNO (operands[0]) % 2 == 0);
15449 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15453 /* Make sure the instructions are ordered with lower memory access first. */
15454 if (offsets[0] > offsets[1])
15456 gap = offsets[0] - offsets[1];
15457 offset = offsets[1];
15459 /* Swap the instructions such that lower memory is accessed first. */
15460 std::swap (operands[0], operands[1]);
15461 std::swap (operands[2], operands[3]);
15462 if (const_store)
15463 std::swap (operands[4], operands[5]);
15465 else
15467 gap = offsets[1] - offsets[0];
15468 offset = offsets[0];
15471 /* Make sure accesses are to consecutive memory locations. */
15472 if (gap != 4)
15473 return false;
15475 /* Make sure we generate legal instructions. */
15476 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15477 false, load))
15478 return true;
15480 /* In Thumb state, where registers are almost unconstrained, there
15481 is little hope to fix it. */
15482 if (TARGET_THUMB2)
15483 return false;
15485 if (load && commute)
15487 /* Try reordering registers. */
15488 std::swap (operands[0], operands[1]);
15489 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15490 false, load))
15491 return true;
15494 if (const_store)
15496 /* If input registers are dead after this pattern, they can be
15497 reordered or replaced by other registers that are free in the
15498 current pattern. */
15499 if (!peep2_reg_dead_p (4, operands[0])
15500 || !peep2_reg_dead_p (4, operands[1]))
15501 return false;
15503 /* Try to reorder the input registers. */
15504 /* For example, the code
15505 mov r0, 0
15506 mov r1, 1
15507 str r1, [r2]
15508 str r0, [r2, #4]
15509 can be transformed into
15510 mov r1, 0
15511 mov r0, 1
15512 strd r0, [r2]
15514 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15515 false, false))
15517 std::swap (operands[0], operands[1]);
15518 return true;
15521 /* Try to find a free DI register. */
15522 CLEAR_HARD_REG_SET (regset);
15523 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15524 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15525 while (true)
15527 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15528 if (tmp == NULL_RTX)
15529 return false;
15531 /* DREG must be an even-numbered register in DImode.
15532 Split it into SI registers. */
15533 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15534 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15535 gcc_assert (operands[0] != NULL_RTX);
15536 gcc_assert (operands[1] != NULL_RTX);
15537 gcc_assert (REGNO (operands[0]) % 2 == 0);
15538 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15540 return (operands_ok_ldrd_strd (operands[0], operands[1],
15541 base, offset,
15542 false, load));
15546 return false;
15552 /* Print a symbolic form of X to the debug file, F. */
15553 static void
15554 arm_print_value (FILE *f, rtx x)
15556 switch (GET_CODE (x))
15558 case CONST_INT:
15559 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15560 return;
15562 case CONST_DOUBLE:
15563 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15564 return;
15566 case CONST_VECTOR:
15568 int i;
15570 fprintf (f, "<");
15571 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15573 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15574 if (i < (CONST_VECTOR_NUNITS (x) - 1))
15575 fputc (',', f);
15577 fprintf (f, ">");
15579 return;
15581 case CONST_STRING:
15582 fprintf (f, "\"%s\"", XSTR (x, 0));
15583 return;
15585 case SYMBOL_REF:
15586 fprintf (f, "`%s'", XSTR (x, 0));
15587 return;
15589 case LABEL_REF:
15590 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
15591 return;
15593 case CONST:
15594 arm_print_value (f, XEXP (x, 0));
15595 return;
15597 case PLUS:
15598 arm_print_value (f, XEXP (x, 0));
15599 fprintf (f, "+");
15600 arm_print_value (f, XEXP (x, 1));
15601 return;
15603 case PC:
15604 fprintf (f, "pc");
15605 return;
15607 default:
15608 fprintf (f, "????");
15609 return;
15613 /* Routines for manipulation of the constant pool. */
15615 /* Arm instructions cannot load a large constant directly into a
15616 register; they have to come from a pc relative load. The constant
15617 must therefore be placed in the addressable range of the pc
15618 relative load. Depending on the precise pc relative load
15619 instruction the range is somewhere between 256 bytes and 4k. This
15620 means that we often have to dump a constant inside a function, and
15621 generate code to branch around it.
15623 It is important to minimize this, since the branches will slow
15624 things down and make the code larger.
15626 Normally we can hide the table after an existing unconditional
15627 branch so that there is no interruption of the flow, but in the
15628 worst case the code looks like this:
15630 ldr rn, L1
15632 b L2
15633 align
15634 L1: .long value
15638 ldr rn, L3
15640 b L4
15641 align
15642 L3: .long value
15646 We fix this by performing a scan after scheduling, which notices
15647 which instructions need to have their operands fetched from the
15648 constant table and builds the table.
15650 The algorithm starts by building a table of all the constants that
15651 need fixing up and all the natural barriers in the function (places
15652 where a constant table can be dropped without breaking the flow).
15653 For each fixup we note how far the pc-relative replacement will be
15654 able to reach and the offset of the instruction into the function.
15656 Having built the table we then group the fixes together to form
15657 tables that are as large as possible (subject to addressing
15658 constraints) and emit each table of constants after the last
15659 barrier that is within range of all the instructions in the group.
15660 If a group does not contain a barrier, then we forcibly create one
15661 by inserting a jump instruction into the flow. Once the table has
15662 been inserted, the insns are then modified to reference the
15663 relevant entry in the pool.
15665 Possible enhancements to the algorithm (not implemented) are:
15667 1) For some processors and object formats, there may be benefit in
15668 aligning the pools to the start of cache lines; this alignment
15669 would need to be taken into account when calculating addressability
15670 of a pool. */
15672 /* These typedefs are located at the start of this file, so that
15673 they can be used in the prototypes there. This comment is to
15674 remind readers of that fact so that the following structures
15675 can be understood more easily.
15677 typedef struct minipool_node Mnode;
15678 typedef struct minipool_fixup Mfix; */
15680 struct minipool_node
15682 /* Doubly linked chain of entries. */
15683 Mnode * next;
15684 Mnode * prev;
15685 /* The maximum offset into the code that this entry can be placed. While
15686 pushing fixes for forward references, all entries are sorted in order
15687 of increasing max_address. */
15688 HOST_WIDE_INT max_address;
15689 /* Similarly for an entry inserted for a backwards ref. */
15690 HOST_WIDE_INT min_address;
15691 /* The number of fixes referencing this entry. This can become zero
15692 if we "unpush" an entry. In this case we ignore the entry when we
15693 come to emit the code. */
15694 int refcount;
15695 /* The offset from the start of the minipool. */
15696 HOST_WIDE_INT offset;
15697 /* The value in table. */
15698 rtx value;
15699 /* The mode of value. */
15700 machine_mode mode;
15701 /* The size of the value. With iWMMXt enabled
15702 sizes > 4 also imply an alignment of 8-bytes. */
15703 int fix_size;
15706 struct minipool_fixup
15708 Mfix * next;
15709 rtx_insn * insn;
15710 HOST_WIDE_INT address;
15711 rtx * loc;
15712 machine_mode mode;
15713 int fix_size;
15714 rtx value;
15715 Mnode * minipool;
15716 HOST_WIDE_INT forwards;
15717 HOST_WIDE_INT backwards;
15720 /* Fixes less than a word need padding out to a word boundary. */
15721 #define MINIPOOL_FIX_SIZE(mode) \
15722 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15724 static Mnode * minipool_vector_head;
15725 static Mnode * minipool_vector_tail;
15726 static rtx_code_label *minipool_vector_label;
15727 static int minipool_pad;
15729 /* The linked list of all minipool fixes required for this function. */
15730 Mfix * minipool_fix_head;
15731 Mfix * minipool_fix_tail;
15732 /* The fix entry for the current minipool, once it has been placed. */
15733 Mfix * minipool_barrier;
15735 #ifndef JUMP_TABLES_IN_TEXT_SECTION
15736 #define JUMP_TABLES_IN_TEXT_SECTION 0
15737 #endif
15739 static HOST_WIDE_INT
15740 get_jump_table_size (rtx_jump_table_data *insn)
15742 /* ADDR_VECs only take room if read-only data does into the text
15743 section. */
15744 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
15746 rtx body = PATTERN (insn);
15747 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
15748 HOST_WIDE_INT size;
15749 HOST_WIDE_INT modesize;
15751 modesize = GET_MODE_SIZE (GET_MODE (body));
15752 size = modesize * XVECLEN (body, elt);
15753 switch (modesize)
15755 case 1:
15756 /* Round up size of TBB table to a halfword boundary. */
15757 size = (size + 1) & ~HOST_WIDE_INT_1;
15758 break;
15759 case 2:
15760 /* No padding necessary for TBH. */
15761 break;
15762 case 4:
15763 /* Add two bytes for alignment on Thumb. */
15764 if (TARGET_THUMB)
15765 size += 2;
15766 break;
15767 default:
15768 gcc_unreachable ();
15770 return size;
15773 return 0;
15776 /* Return the maximum amount of padding that will be inserted before
15777 label LABEL. */
15779 static HOST_WIDE_INT
15780 get_label_padding (rtx label)
15782 HOST_WIDE_INT align, min_insn_size;
15784 align = 1 << label_to_alignment (label);
15785 min_insn_size = TARGET_THUMB ? 2 : 4;
15786 return align > min_insn_size ? align - min_insn_size : 0;
15789 /* Move a minipool fix MP from its current location to before MAX_MP.
15790 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
15791 constraints may need updating. */
15792 static Mnode *
15793 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
15794 HOST_WIDE_INT max_address)
15796 /* The code below assumes these are different. */
15797 gcc_assert (mp != max_mp);
15799 if (max_mp == NULL)
15801 if (max_address < mp->max_address)
15802 mp->max_address = max_address;
15804 else
15806 if (max_address > max_mp->max_address - mp->fix_size)
15807 mp->max_address = max_mp->max_address - mp->fix_size;
15808 else
15809 mp->max_address = max_address;
15811 /* Unlink MP from its current position. Since max_mp is non-null,
15812 mp->prev must be non-null. */
15813 mp->prev->next = mp->next;
15814 if (mp->next != NULL)
15815 mp->next->prev = mp->prev;
15816 else
15817 minipool_vector_tail = mp->prev;
15819 /* Re-insert it before MAX_MP. */
15820 mp->next = max_mp;
15821 mp->prev = max_mp->prev;
15822 max_mp->prev = mp;
15824 if (mp->prev != NULL)
15825 mp->prev->next = mp;
15826 else
15827 minipool_vector_head = mp;
15830 /* Save the new entry. */
15831 max_mp = mp;
15833 /* Scan over the preceding entries and adjust their addresses as
15834 required. */
15835 while (mp->prev != NULL
15836 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
15838 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
15839 mp = mp->prev;
15842 return max_mp;
15845 /* Add a constant to the minipool for a forward reference. Returns the
15846 node added or NULL if the constant will not fit in this pool. */
15847 static Mnode *
15848 add_minipool_forward_ref (Mfix *fix)
15850 /* If set, max_mp is the first pool_entry that has a lower
15851 constraint than the one we are trying to add. */
15852 Mnode * max_mp = NULL;
15853 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
15854 Mnode * mp;
15856 /* If the minipool starts before the end of FIX->INSN then this FIX
15857 can not be placed into the current pool. Furthermore, adding the
15858 new constant pool entry may cause the pool to start FIX_SIZE bytes
15859 earlier. */
15860 if (minipool_vector_head &&
15861 (fix->address + get_attr_length (fix->insn)
15862 >= minipool_vector_head->max_address - fix->fix_size))
15863 return NULL;
15865 /* Scan the pool to see if a constant with the same value has
15866 already been added. While we are doing this, also note the
15867 location where we must insert the constant if it doesn't already
15868 exist. */
15869 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
15871 if (GET_CODE (fix->value) == GET_CODE (mp->value)
15872 && fix->mode == mp->mode
15873 && (!LABEL_P (fix->value)
15874 || (CODE_LABEL_NUMBER (fix->value)
15875 == CODE_LABEL_NUMBER (mp->value)))
15876 && rtx_equal_p (fix->value, mp->value))
15878 /* More than one fix references this entry. */
15879 mp->refcount++;
15880 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
15883 /* Note the insertion point if necessary. */
15884 if (max_mp == NULL
15885 && mp->max_address > max_address)
15886 max_mp = mp;
15888 /* If we are inserting an 8-bytes aligned quantity and
15889 we have not already found an insertion point, then
15890 make sure that all such 8-byte aligned quantities are
15891 placed at the start of the pool. */
15892 if (ARM_DOUBLEWORD_ALIGN
15893 && max_mp == NULL
15894 && fix->fix_size >= 8
15895 && mp->fix_size < 8)
15897 max_mp = mp;
15898 max_address = mp->max_address;
15902 /* The value is not currently in the minipool, so we need to create
15903 a new entry for it. If MAX_MP is NULL, the entry will be put on
15904 the end of the list since the placement is less constrained than
15905 any existing entry. Otherwise, we insert the new fix before
15906 MAX_MP and, if necessary, adjust the constraints on the other
15907 entries. */
15908 mp = XNEW (Mnode);
15909 mp->fix_size = fix->fix_size;
15910 mp->mode = fix->mode;
15911 mp->value = fix->value;
15912 mp->refcount = 1;
15913 /* Not yet required for a backwards ref. */
15914 mp->min_address = -65536;
15916 if (max_mp == NULL)
15918 mp->max_address = max_address;
15919 mp->next = NULL;
15920 mp->prev = minipool_vector_tail;
15922 if (mp->prev == NULL)
15924 minipool_vector_head = mp;
15925 minipool_vector_label = gen_label_rtx ();
15927 else
15928 mp->prev->next = mp;
15930 minipool_vector_tail = mp;
15932 else
15934 if (max_address > max_mp->max_address - mp->fix_size)
15935 mp->max_address = max_mp->max_address - mp->fix_size;
15936 else
15937 mp->max_address = max_address;
15939 mp->next = max_mp;
15940 mp->prev = max_mp->prev;
15941 max_mp->prev = mp;
15942 if (mp->prev != NULL)
15943 mp->prev->next = mp;
15944 else
15945 minipool_vector_head = mp;
15948 /* Save the new entry. */
15949 max_mp = mp;
15951 /* Scan over the preceding entries and adjust their addresses as
15952 required. */
15953 while (mp->prev != NULL
15954 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
15956 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
15957 mp = mp->prev;
15960 return max_mp;
15963 static Mnode *
15964 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
15965 HOST_WIDE_INT min_address)
15967 HOST_WIDE_INT offset;
15969 /* The code below assumes these are different. */
15970 gcc_assert (mp != min_mp);
15972 if (min_mp == NULL)
15974 if (min_address > mp->min_address)
15975 mp->min_address = min_address;
15977 else
15979 /* We will adjust this below if it is too loose. */
15980 mp->min_address = min_address;
15982 /* Unlink MP from its current position. Since min_mp is non-null,
15983 mp->next must be non-null. */
15984 mp->next->prev = mp->prev;
15985 if (mp->prev != NULL)
15986 mp->prev->next = mp->next;
15987 else
15988 minipool_vector_head = mp->next;
15990 /* Reinsert it after MIN_MP. */
15991 mp->prev = min_mp;
15992 mp->next = min_mp->next;
15993 min_mp->next = mp;
15994 if (mp->next != NULL)
15995 mp->next->prev = mp;
15996 else
15997 minipool_vector_tail = mp;
16000 min_mp = mp;
16002 offset = 0;
16003 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16005 mp->offset = offset;
16006 if (mp->refcount > 0)
16007 offset += mp->fix_size;
16009 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16010 mp->next->min_address = mp->min_address + mp->fix_size;
16013 return min_mp;
16016 /* Add a constant to the minipool for a backward reference. Returns the
16017 node added or NULL if the constant will not fit in this pool.
16019 Note that the code for insertion for a backwards reference can be
16020 somewhat confusing because the calculated offsets for each fix do
16021 not take into account the size of the pool (which is still under
16022 construction. */
16023 static Mnode *
16024 add_minipool_backward_ref (Mfix *fix)
16026 /* If set, min_mp is the last pool_entry that has a lower constraint
16027 than the one we are trying to add. */
16028 Mnode *min_mp = NULL;
16029 /* This can be negative, since it is only a constraint. */
16030 HOST_WIDE_INT min_address = fix->address - fix->backwards;
16031 Mnode *mp;
16033 /* If we can't reach the current pool from this insn, or if we can't
16034 insert this entry at the end of the pool without pushing other
16035 fixes out of range, then we don't try. This ensures that we
16036 can't fail later on. */
16037 if (min_address >= minipool_barrier->address
16038 || (minipool_vector_tail->min_address + fix->fix_size
16039 >= minipool_barrier->address))
16040 return NULL;
16042 /* Scan the pool to see if a constant with the same value has
16043 already been added. While we are doing this, also note the
16044 location where we must insert the constant if it doesn't already
16045 exist. */
16046 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16048 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16049 && fix->mode == mp->mode
16050 && (!LABEL_P (fix->value)
16051 || (CODE_LABEL_NUMBER (fix->value)
16052 == CODE_LABEL_NUMBER (mp->value)))
16053 && rtx_equal_p (fix->value, mp->value)
16054 /* Check that there is enough slack to move this entry to the
16055 end of the table (this is conservative). */
16056 && (mp->max_address
16057 > (minipool_barrier->address
16058 + minipool_vector_tail->offset
16059 + minipool_vector_tail->fix_size)))
16061 mp->refcount++;
16062 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16065 if (min_mp != NULL)
16066 mp->min_address += fix->fix_size;
16067 else
16069 /* Note the insertion point if necessary. */
16070 if (mp->min_address < min_address)
16072 /* For now, we do not allow the insertion of 8-byte alignment
16073 requiring nodes anywhere but at the start of the pool. */
16074 if (ARM_DOUBLEWORD_ALIGN
16075 && fix->fix_size >= 8 && mp->fix_size < 8)
16076 return NULL;
16077 else
16078 min_mp = mp;
16080 else if (mp->max_address
16081 < minipool_barrier->address + mp->offset + fix->fix_size)
16083 /* Inserting before this entry would push the fix beyond
16084 its maximum address (which can happen if we have
16085 re-located a forwards fix); force the new fix to come
16086 after it. */
16087 if (ARM_DOUBLEWORD_ALIGN
16088 && fix->fix_size >= 8 && mp->fix_size < 8)
16089 return NULL;
16090 else
16092 min_mp = mp;
16093 min_address = mp->min_address + fix->fix_size;
16096 /* Do not insert a non-8-byte aligned quantity before 8-byte
16097 aligned quantities. */
16098 else if (ARM_DOUBLEWORD_ALIGN
16099 && fix->fix_size < 8
16100 && mp->fix_size >= 8)
16102 min_mp = mp;
16103 min_address = mp->min_address + fix->fix_size;
16108 /* We need to create a new entry. */
16109 mp = XNEW (Mnode);
16110 mp->fix_size = fix->fix_size;
16111 mp->mode = fix->mode;
16112 mp->value = fix->value;
16113 mp->refcount = 1;
16114 mp->max_address = minipool_barrier->address + 65536;
16116 mp->min_address = min_address;
16118 if (min_mp == NULL)
16120 mp->prev = NULL;
16121 mp->next = minipool_vector_head;
16123 if (mp->next == NULL)
16125 minipool_vector_tail = mp;
16126 minipool_vector_label = gen_label_rtx ();
16128 else
16129 mp->next->prev = mp;
16131 minipool_vector_head = mp;
16133 else
16135 mp->next = min_mp->next;
16136 mp->prev = min_mp;
16137 min_mp->next = mp;
16139 if (mp->next != NULL)
16140 mp->next->prev = mp;
16141 else
16142 minipool_vector_tail = mp;
16145 /* Save the new entry. */
16146 min_mp = mp;
16148 if (mp->prev)
16149 mp = mp->prev;
16150 else
16151 mp->offset = 0;
16153 /* Scan over the following entries and adjust their offsets. */
16154 while (mp->next != NULL)
16156 if (mp->next->min_address < mp->min_address + mp->fix_size)
16157 mp->next->min_address = mp->min_address + mp->fix_size;
16159 if (mp->refcount)
16160 mp->next->offset = mp->offset + mp->fix_size;
16161 else
16162 mp->next->offset = mp->offset;
16164 mp = mp->next;
16167 return min_mp;
16170 static void
16171 assign_minipool_offsets (Mfix *barrier)
16173 HOST_WIDE_INT offset = 0;
16174 Mnode *mp;
16176 minipool_barrier = barrier;
16178 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16180 mp->offset = offset;
16182 if (mp->refcount > 0)
16183 offset += mp->fix_size;
16187 /* Output the literal table */
16188 static void
16189 dump_minipool (rtx_insn *scan)
16191 Mnode * mp;
16192 Mnode * nmp;
16193 int align64 = 0;
16195 if (ARM_DOUBLEWORD_ALIGN)
16196 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16197 if (mp->refcount > 0 && mp->fix_size >= 8)
16199 align64 = 1;
16200 break;
16203 if (dump_file)
16204 fprintf (dump_file,
16205 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16206 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16208 scan = emit_label_after (gen_label_rtx (), scan);
16209 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16210 scan = emit_label_after (minipool_vector_label, scan);
16212 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16214 if (mp->refcount > 0)
16216 if (dump_file)
16218 fprintf (dump_file,
16219 ";; Offset %u, min %ld, max %ld ",
16220 (unsigned) mp->offset, (unsigned long) mp->min_address,
16221 (unsigned long) mp->max_address);
16222 arm_print_value (dump_file, mp->value);
16223 fputc ('\n', dump_file);
16226 rtx val = copy_rtx (mp->value);
16228 switch (GET_MODE_SIZE (mp->mode))
16230 #ifdef HAVE_consttable_1
16231 case 1:
16232 scan = emit_insn_after (gen_consttable_1 (val), scan);
16233 break;
16235 #endif
16236 #ifdef HAVE_consttable_2
16237 case 2:
16238 scan = emit_insn_after (gen_consttable_2 (val), scan);
16239 break;
16241 #endif
16242 #ifdef HAVE_consttable_4
16243 case 4:
16244 scan = emit_insn_after (gen_consttable_4 (val), scan);
16245 break;
16247 #endif
16248 #ifdef HAVE_consttable_8
16249 case 8:
16250 scan = emit_insn_after (gen_consttable_8 (val), scan);
16251 break;
16253 #endif
16254 #ifdef HAVE_consttable_16
16255 case 16:
16256 scan = emit_insn_after (gen_consttable_16 (val), scan);
16257 break;
16259 #endif
16260 default:
16261 gcc_unreachable ();
16265 nmp = mp->next;
16266 free (mp);
16269 minipool_vector_head = minipool_vector_tail = NULL;
16270 scan = emit_insn_after (gen_consttable_end (), scan);
16271 scan = emit_barrier_after (scan);
16274 /* Return the cost of forcibly inserting a barrier after INSN. */
16275 static int
16276 arm_barrier_cost (rtx_insn *insn)
16278 /* Basing the location of the pool on the loop depth is preferable,
16279 but at the moment, the basic block information seems to be
16280 corrupt by this stage of the compilation. */
16281 int base_cost = 50;
16282 rtx_insn *next = next_nonnote_insn (insn);
16284 if (next != NULL && LABEL_P (next))
16285 base_cost -= 20;
16287 switch (GET_CODE (insn))
16289 case CODE_LABEL:
16290 /* It will always be better to place the table before the label, rather
16291 than after it. */
16292 return 50;
16294 case INSN:
16295 case CALL_INSN:
16296 return base_cost;
16298 case JUMP_INSN:
16299 return base_cost - 10;
16301 default:
16302 return base_cost + 10;
16306 /* Find the best place in the insn stream in the range
16307 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16308 Create the barrier by inserting a jump and add a new fix entry for
16309 it. */
16310 static Mfix *
16311 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16313 HOST_WIDE_INT count = 0;
16314 rtx_barrier *barrier;
16315 rtx_insn *from = fix->insn;
16316 /* The instruction after which we will insert the jump. */
16317 rtx_insn *selected = NULL;
16318 int selected_cost;
16319 /* The address at which the jump instruction will be placed. */
16320 HOST_WIDE_INT selected_address;
16321 Mfix * new_fix;
16322 HOST_WIDE_INT max_count = max_address - fix->address;
16323 rtx_code_label *label = gen_label_rtx ();
16325 selected_cost = arm_barrier_cost (from);
16326 selected_address = fix->address;
16328 while (from && count < max_count)
16330 rtx_jump_table_data *tmp;
16331 int new_cost;
16333 /* This code shouldn't have been called if there was a natural barrier
16334 within range. */
16335 gcc_assert (!BARRIER_P (from));
16337 /* Count the length of this insn. This must stay in sync with the
16338 code that pushes minipool fixes. */
16339 if (LABEL_P (from))
16340 count += get_label_padding (from);
16341 else
16342 count += get_attr_length (from);
16344 /* If there is a jump table, add its length. */
16345 if (tablejump_p (from, NULL, &tmp))
16347 count += get_jump_table_size (tmp);
16349 /* Jump tables aren't in a basic block, so base the cost on
16350 the dispatch insn. If we select this location, we will
16351 still put the pool after the table. */
16352 new_cost = arm_barrier_cost (from);
16354 if (count < max_count
16355 && (!selected || new_cost <= selected_cost))
16357 selected = tmp;
16358 selected_cost = new_cost;
16359 selected_address = fix->address + count;
16362 /* Continue after the dispatch table. */
16363 from = NEXT_INSN (tmp);
16364 continue;
16367 new_cost = arm_barrier_cost (from);
16369 if (count < max_count
16370 && (!selected || new_cost <= selected_cost))
16372 selected = from;
16373 selected_cost = new_cost;
16374 selected_address = fix->address + count;
16377 from = NEXT_INSN (from);
16380 /* Make sure that we found a place to insert the jump. */
16381 gcc_assert (selected);
16383 /* Make sure we do not split a call and its corresponding
16384 CALL_ARG_LOCATION note. */
16385 if (CALL_P (selected))
16387 rtx_insn *next = NEXT_INSN (selected);
16388 if (next && NOTE_P (next)
16389 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16390 selected = next;
16393 /* Create a new JUMP_INSN that branches around a barrier. */
16394 from = emit_jump_insn_after (gen_jump (label), selected);
16395 JUMP_LABEL (from) = label;
16396 barrier = emit_barrier_after (from);
16397 emit_label_after (label, barrier);
16399 /* Create a minipool barrier entry for the new barrier. */
16400 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16401 new_fix->insn = barrier;
16402 new_fix->address = selected_address;
16403 new_fix->next = fix->next;
16404 fix->next = new_fix;
16406 return new_fix;
16409 /* Record that there is a natural barrier in the insn stream at
16410 ADDRESS. */
16411 static void
16412 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
16414 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16416 fix->insn = insn;
16417 fix->address = address;
16419 fix->next = NULL;
16420 if (minipool_fix_head != NULL)
16421 minipool_fix_tail->next = fix;
16422 else
16423 minipool_fix_head = fix;
16425 minipool_fix_tail = fix;
16428 /* Record INSN, which will need fixing up to load a value from the
16429 minipool. ADDRESS is the offset of the insn since the start of the
16430 function; LOC is a pointer to the part of the insn which requires
16431 fixing; VALUE is the constant that must be loaded, which is of type
16432 MODE. */
16433 static void
16434 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
16435 machine_mode mode, rtx value)
16437 gcc_assert (!arm_disable_literal_pool);
16438 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16440 fix->insn = insn;
16441 fix->address = address;
16442 fix->loc = loc;
16443 fix->mode = mode;
16444 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16445 fix->value = value;
16446 fix->forwards = get_attr_pool_range (insn);
16447 fix->backwards = get_attr_neg_pool_range (insn);
16448 fix->minipool = NULL;
16450 /* If an insn doesn't have a range defined for it, then it isn't
16451 expecting to be reworked by this code. Better to stop now than
16452 to generate duff assembly code. */
16453 gcc_assert (fix->forwards || fix->backwards);
16455 /* If an entry requires 8-byte alignment then assume all constant pools
16456 require 4 bytes of padding. Trying to do this later on a per-pool
16457 basis is awkward because existing pool entries have to be modified. */
16458 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16459 minipool_pad = 4;
16461 if (dump_file)
16463 fprintf (dump_file,
16464 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16465 GET_MODE_NAME (mode),
16466 INSN_UID (insn), (unsigned long) address,
16467 -1 * (long)fix->backwards, (long)fix->forwards);
16468 arm_print_value (dump_file, fix->value);
16469 fprintf (dump_file, "\n");
16472 /* Add it to the chain of fixes. */
16473 fix->next = NULL;
16475 if (minipool_fix_head != NULL)
16476 minipool_fix_tail->next = fix;
16477 else
16478 minipool_fix_head = fix;
16480 minipool_fix_tail = fix;
16483 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16484 Returns the number of insns needed, or 99 if we always want to synthesize
16485 the value. */
16487 arm_max_const_double_inline_cost ()
16489 return ((optimize_size || arm_ld_sched) ? 3 : 4);
16492 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16493 Returns the number of insns needed, or 99 if we don't know how to
16494 do it. */
16496 arm_const_double_inline_cost (rtx val)
16498 rtx lowpart, highpart;
16499 machine_mode mode;
16501 mode = GET_MODE (val);
16503 if (mode == VOIDmode)
16504 mode = DImode;
16506 gcc_assert (GET_MODE_SIZE (mode) == 8);
16508 lowpart = gen_lowpart (SImode, val);
16509 highpart = gen_highpart_mode (SImode, mode, val);
16511 gcc_assert (CONST_INT_P (lowpart));
16512 gcc_assert (CONST_INT_P (highpart));
16514 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16515 NULL_RTX, NULL_RTX, 0, 0)
16516 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16517 NULL_RTX, NULL_RTX, 0, 0));
16520 /* Cost of loading a SImode constant. */
16521 static inline int
16522 arm_const_inline_cost (enum rtx_code code, rtx val)
16524 return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
16525 NULL_RTX, NULL_RTX, 1, 0);
16528 /* Return true if it is worthwhile to split a 64-bit constant into two
16529 32-bit operations. This is the case if optimizing for size, or
16530 if we have load delay slots, or if one 32-bit part can be done with
16531 a single data operation. */
16532 bool
16533 arm_const_double_by_parts (rtx val)
16535 machine_mode mode = GET_MODE (val);
16536 rtx part;
16538 if (optimize_size || arm_ld_sched)
16539 return true;
16541 if (mode == VOIDmode)
16542 mode = DImode;
16544 part = gen_highpart_mode (SImode, mode, val);
16546 gcc_assert (CONST_INT_P (part));
16548 if (const_ok_for_arm (INTVAL (part))
16549 || const_ok_for_arm (~INTVAL (part)))
16550 return true;
16552 part = gen_lowpart (SImode, val);
16554 gcc_assert (CONST_INT_P (part));
16556 if (const_ok_for_arm (INTVAL (part))
16557 || const_ok_for_arm (~INTVAL (part)))
16558 return true;
16560 return false;
16563 /* Return true if it is possible to inline both the high and low parts
16564 of a 64-bit constant into 32-bit data processing instructions. */
16565 bool
16566 arm_const_double_by_immediates (rtx val)
16568 machine_mode mode = GET_MODE (val);
16569 rtx part;
16571 if (mode == VOIDmode)
16572 mode = DImode;
16574 part = gen_highpart_mode (SImode, mode, val);
16576 gcc_assert (CONST_INT_P (part));
16578 if (!const_ok_for_arm (INTVAL (part)))
16579 return false;
16581 part = gen_lowpart (SImode, val);
16583 gcc_assert (CONST_INT_P (part));
16585 if (!const_ok_for_arm (INTVAL (part)))
16586 return false;
16588 return true;
16591 /* Scan INSN and note any of its operands that need fixing.
16592 If DO_PUSHES is false we do not actually push any of the fixups
16593 needed. */
16594 static void
16595 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
16597 int opno;
16599 extract_constrain_insn (insn);
16601 if (recog_data.n_alternatives == 0)
16602 return;
16604 /* Fill in recog_op_alt with information about the constraints of
16605 this insn. */
16606 preprocess_constraints (insn);
16608 const operand_alternative *op_alt = which_op_alt ();
16609 for (opno = 0; opno < recog_data.n_operands; opno++)
16611 /* Things we need to fix can only occur in inputs. */
16612 if (recog_data.operand_type[opno] != OP_IN)
16613 continue;
16615 /* If this alternative is a memory reference, then any mention
16616 of constants in this alternative is really to fool reload
16617 into allowing us to accept one there. We need to fix them up
16618 now so that we output the right code. */
16619 if (op_alt[opno].memory_ok)
16621 rtx op = recog_data.operand[opno];
16623 if (CONSTANT_P (op))
16625 if (do_pushes)
16626 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
16627 recog_data.operand_mode[opno], op);
16629 else if (MEM_P (op)
16630 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
16631 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
16633 if (do_pushes)
16635 rtx cop = avoid_constant_pool_reference (op);
16637 /* Casting the address of something to a mode narrower
16638 than a word can cause avoid_constant_pool_reference()
16639 to return the pool reference itself. That's no good to
16640 us here. Lets just hope that we can use the
16641 constant pool value directly. */
16642 if (op == cop)
16643 cop = get_pool_constant (XEXP (op, 0));
16645 push_minipool_fix (insn, address,
16646 recog_data.operand_loc[opno],
16647 recog_data.operand_mode[opno], cop);
16654 return;
16657 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
16658 and unions in the context of ARMv8-M Security Extensions. It is used as a
16659 helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
16660 functions. The PADDING_BITS_TO_CLEAR pointer can be the base to either one
16661 or four masks, depending on whether it is being computed for a
16662 'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
16663 respectively. The tree for the type of the argument or a field within an
16664 argument is passed in ARG_TYPE, the current register this argument or field
16665 starts in is kept in the pointer REGNO and updated accordingly, the bit this
16666 argument or field starts at is passed in STARTING_BIT and the last used bit
16667 is kept in LAST_USED_BIT which is also updated accordingly. */
16669 static unsigned HOST_WIDE_INT
16670 comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
16671 uint32_t * padding_bits_to_clear,
16672 unsigned starting_bit, int * last_used_bit)
16675 unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0;
16677 if (TREE_CODE (arg_type) == RECORD_TYPE)
16679 unsigned current_bit = starting_bit;
16680 tree field;
16681 long int offset, size;
16684 field = TYPE_FIELDS (arg_type);
16685 while (field)
16687 /* The offset within a structure is always an offset from
16688 the start of that structure. Make sure we take that into the
16689 calculation of the register based offset that we use here. */
16690 offset = starting_bit;
16691 offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0);
16692 offset %= 32;
16694 /* This is the actual size of the field, for bitfields this is the
16695 bitfield width and not the container size. */
16696 size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16698 if (*last_used_bit != offset)
16700 if (offset < *last_used_bit)
16702 /* This field's offset is before the 'last_used_bit', that
16703 means this field goes on the next register. So we need to
16704 pad the rest of the current register and increase the
16705 register number. */
16706 uint32_t mask;
16707 mask = ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit);
16708 mask++;
16710 padding_bits_to_clear[*regno] |= mask;
16711 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16712 (*regno)++;
16714 else
16716 /* Otherwise we pad the bits between the last field's end and
16717 the start of the new field. */
16718 uint32_t mask;
16720 mask = ((uint32_t)-1) >> (32 - offset);
16721 mask -= ((uint32_t) 1 << *last_used_bit) - 1;
16722 padding_bits_to_clear[*regno] |= mask;
16724 current_bit = offset;
16727 /* Calculate further padding bits for inner structs/unions too. */
16728 if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)))
16730 *last_used_bit = current_bit;
16731 not_to_clear_reg_mask
16732 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno,
16733 padding_bits_to_clear, offset,
16734 last_used_bit);
16736 else
16738 /* Update 'current_bit' with this field's size. If the
16739 'current_bit' lies in a subsequent register, update 'regno' and
16740 reset 'current_bit' to point to the current bit in that new
16741 register. */
16742 current_bit += size;
16743 while (current_bit >= 32)
16745 current_bit-=32;
16746 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16747 (*regno)++;
16749 *last_used_bit = current_bit;
16752 field = TREE_CHAIN (field);
16754 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16756 else if (TREE_CODE (arg_type) == UNION_TYPE)
16758 tree field, field_t;
16759 int i, regno_t, field_size;
16760 int max_reg = -1;
16761 int max_bit = -1;
16762 uint32_t mask;
16763 uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
16764 = {-1, -1, -1, -1};
16766 /* To compute the padding bits in a union we only consider bits as
16767 padding bits if they are always either a padding bit or fall outside a
16768 fields size for all fields in the union. */
16769 field = TYPE_FIELDS (arg_type);
16770 while (field)
16772 uint32_t padding_bits_to_clear_t[NUM_ARG_REGS]
16773 = {0U, 0U, 0U, 0U};
16774 int last_used_bit_t = *last_used_bit;
16775 regno_t = *regno;
16776 field_t = TREE_TYPE (field);
16778 /* If the field's type is either a record or a union make sure to
16779 compute their padding bits too. */
16780 if (RECORD_OR_UNION_TYPE_P (field_t))
16781 not_to_clear_reg_mask
16782 |= comp_not_to_clear_mask_str_un (field_t, &regno_t,
16783 &padding_bits_to_clear_t[0],
16784 starting_bit, &last_used_bit_t);
16785 else
16787 field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16788 regno_t = (field_size / 32) + *regno;
16789 last_used_bit_t = (starting_bit + field_size) % 32;
16792 for (i = *regno; i < regno_t; i++)
16794 /* For all but the last register used by this field only keep the
16795 padding bits that were padding bits in this field. */
16796 padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i];
16799 /* For the last register, keep all padding bits that were padding
16800 bits in this field and any padding bits that are still valid
16801 as padding bits but fall outside of this field's size. */
16802 mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1;
16803 padding_bits_to_clear_res[regno_t]
16804 &= padding_bits_to_clear_t[regno_t] | mask;
16806 /* Update the maximum size of the fields in terms of registers used
16807 ('max_reg') and the 'last_used_bit' in said register. */
16808 if (max_reg < regno_t)
16810 max_reg = regno_t;
16811 max_bit = last_used_bit_t;
16813 else if (max_reg == regno_t && max_bit < last_used_bit_t)
16814 max_bit = last_used_bit_t;
16816 field = TREE_CHAIN (field);
16819 /* Update the current padding_bits_to_clear using the intersection of the
16820 padding bits of all the fields. */
16821 for (i=*regno; i < max_reg; i++)
16822 padding_bits_to_clear[i] |= padding_bits_to_clear_res[i];
16824 /* Do not keep trailing padding bits, we do not know yet whether this
16825 is the end of the argument. */
16826 mask = ((uint32_t) 1 << max_bit) - 1;
16827 padding_bits_to_clear[max_reg]
16828 |= padding_bits_to_clear_res[max_reg] & mask;
16830 *regno = max_reg;
16831 *last_used_bit = max_bit;
16833 else
16834 /* This function should only be used for structs and unions. */
16835 gcc_unreachable ();
16837 return not_to_clear_reg_mask;
16840 /* In the context of ARMv8-M Security Extensions, this function is used for both
16841 'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
16842 registers are used when returning or passing arguments, which is then
16843 returned as a mask. It will also compute a mask to indicate padding/unused
16844 bits for each of these registers, and passes this through the
16845 PADDING_BITS_TO_CLEAR pointer. The tree of the argument type is passed in
16846 ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
16847 the starting register used to pass this argument or return value is passed
16848 in REGNO. It makes use of 'comp_not_to_clear_mask_str_un' to compute these
16849 for struct and union types. */
16851 static unsigned HOST_WIDE_INT
16852 compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
16853 uint32_t * padding_bits_to_clear)
16856 int last_used_bit = 0;
16857 unsigned HOST_WIDE_INT not_to_clear_mask;
16859 if (RECORD_OR_UNION_TYPE_P (arg_type))
16861 not_to_clear_mask
16862 = comp_not_to_clear_mask_str_un (arg_type, &regno,
16863 padding_bits_to_clear, 0,
16864 &last_used_bit);
16867 /* If the 'last_used_bit' is not zero, that means we are still using a
16868 part of the last 'regno'. In such cases we must clear the trailing
16869 bits. Otherwise we are not using regno and we should mark it as to
16870 clear. */
16871 if (last_used_bit != 0)
16872 padding_bits_to_clear[regno]
16873 |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit) + 1;
16874 else
16875 not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno);
16877 else
16879 not_to_clear_mask = 0;
16880 /* We are not dealing with structs nor unions. So these arguments may be
16881 passed in floating point registers too. In some cases a BLKmode is
16882 used when returning or passing arguments in multiple VFP registers. */
16883 if (GET_MODE (arg_rtx) == BLKmode)
16885 int i, arg_regs;
16886 rtx reg;
16888 /* This should really only occur when dealing with the hard-float
16889 ABI. */
16890 gcc_assert (TARGET_HARD_FLOAT_ABI);
16892 for (i = 0; i < XVECLEN (arg_rtx, 0); i++)
16894 reg = XEXP (XVECEXP (arg_rtx, 0, i), 0);
16895 gcc_assert (REG_P (reg));
16897 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg);
16899 /* If we are dealing with DF mode, make sure we don't
16900 clear either of the registers it addresses. */
16901 arg_regs = ARM_NUM_REGS (GET_MODE (reg));
16902 if (arg_regs > 1)
16904 unsigned HOST_WIDE_INT mask;
16905 mask = HOST_WIDE_INT_1U << (REGNO (reg) + arg_regs);
16906 mask -= HOST_WIDE_INT_1U << REGNO (reg);
16907 not_to_clear_mask |= mask;
16911 else
16913 /* Otherwise we can rely on the MODE to determine how many registers
16914 are being used by this argument. */
16915 int arg_regs = ARM_NUM_REGS (GET_MODE (arg_rtx));
16916 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (arg_rtx);
16917 if (arg_regs > 1)
16919 unsigned HOST_WIDE_INT
16920 mask = HOST_WIDE_INT_1U << (REGNO (arg_rtx) + arg_regs);
16921 mask -= HOST_WIDE_INT_1U << REGNO (arg_rtx);
16922 not_to_clear_mask |= mask;
16927 return not_to_clear_mask;
16930 /* Clears caller saved registers not used to pass arguments before a
16931 cmse_nonsecure_call. Saving, clearing and restoring of callee saved
16932 registers is done in __gnu_cmse_nonsecure_call libcall.
16933 See libgcc/config/arm/cmse_nonsecure_call.S. */
16935 static void
16936 cmse_nonsecure_call_clear_caller_saved (void)
16938 basic_block bb;
16940 FOR_EACH_BB_FN (bb, cfun)
16942 rtx_insn *insn;
16944 FOR_BB_INSNS (bb, insn)
16946 uint64_t to_clear_mask, float_mask;
16947 rtx_insn *seq;
16948 rtx pat, call, unspec, reg, cleared_reg, tmp;
16949 unsigned int regno, maxregno;
16950 rtx address;
16951 CUMULATIVE_ARGS args_so_far_v;
16952 cumulative_args_t args_so_far;
16953 tree arg_type, fntype;
16954 bool using_r4, first_param = true;
16955 function_args_iterator args_iter;
16956 uint32_t padding_bits_to_clear[4] = {0U, 0U, 0U, 0U};
16957 uint32_t * padding_bits_to_clear_ptr = &padding_bits_to_clear[0];
16959 if (!NONDEBUG_INSN_P (insn))
16960 continue;
16962 if (!CALL_P (insn))
16963 continue;
16965 pat = PATTERN (insn);
16966 gcc_assert (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 0);
16967 call = XVECEXP (pat, 0, 0);
16969 /* Get the real call RTX if the insn sets a value, ie. returns. */
16970 if (GET_CODE (call) == SET)
16971 call = SET_SRC (call);
16973 /* Check if it is a cmse_nonsecure_call. */
16974 unspec = XEXP (call, 0);
16975 if (GET_CODE (unspec) != UNSPEC
16976 || XINT (unspec, 1) != UNSPEC_NONSECURE_MEM)
16977 continue;
16979 /* Determine the caller-saved registers we need to clear. */
16980 to_clear_mask = (1LL << (NUM_ARG_REGS)) - 1;
16981 maxregno = NUM_ARG_REGS - 1;
16982 /* Only look at the caller-saved floating point registers in case of
16983 -mfloat-abi=hard. For -mfloat-abi=softfp we will be using the
16984 lazy store and loads which clear both caller- and callee-saved
16985 registers. */
16986 if (TARGET_HARD_FLOAT_ABI)
16988 float_mask = (1LL << (D7_VFP_REGNUM + 1)) - 1;
16989 float_mask &= ~((1LL << FIRST_VFP_REGNUM) - 1);
16990 to_clear_mask |= float_mask;
16991 maxregno = D7_VFP_REGNUM;
16994 /* Make sure the register used to hold the function address is not
16995 cleared. */
16996 address = RTVEC_ELT (XVEC (unspec, 0), 0);
16997 gcc_assert (MEM_P (address));
16998 gcc_assert (REG_P (XEXP (address, 0)));
16999 to_clear_mask &= ~(1LL << REGNO (XEXP (address, 0)));
17001 /* Set basic block of call insn so that df rescan is performed on
17002 insns inserted here. */
17003 set_block_for_insn (insn, bb);
17004 df_set_flags (DF_DEFER_INSN_RESCAN);
17005 start_sequence ();
17007 /* Make sure the scheduler doesn't schedule other insns beyond
17008 here. */
17009 emit_insn (gen_blockage ());
17011 /* Walk through all arguments and clear registers appropriately.
17013 fntype = TREE_TYPE (MEM_EXPR (address));
17014 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX,
17015 NULL_TREE);
17016 args_so_far = pack_cumulative_args (&args_so_far_v);
17017 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
17019 rtx arg_rtx;
17020 machine_mode arg_mode = TYPE_MODE (arg_type);
17022 if (VOID_TYPE_P (arg_type))
17023 continue;
17025 if (!first_param)
17026 arm_function_arg_advance (args_so_far, arg_mode, arg_type,
17027 true);
17029 arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type,
17030 true);
17031 gcc_assert (REG_P (arg_rtx));
17032 to_clear_mask
17033 &= ~compute_not_to_clear_mask (arg_type, arg_rtx,
17034 REGNO (arg_rtx),
17035 padding_bits_to_clear_ptr);
17037 first_param = false;
17040 /* Clear padding bits where needed. */
17041 cleared_reg = XEXP (address, 0);
17042 reg = gen_rtx_REG (SImode, IP_REGNUM);
17043 using_r4 = false;
17044 for (regno = R0_REGNUM; regno < NUM_ARG_REGS; regno++)
17046 if (padding_bits_to_clear[regno] == 0)
17047 continue;
17049 /* If this is a Thumb-1 target copy the address of the function
17050 we are calling from 'r4' into 'ip' such that we can use r4 to
17051 clear the unused bits in the arguments. */
17052 if (TARGET_THUMB1 && !using_r4)
17054 using_r4 = true;
17055 reg = cleared_reg;
17056 emit_move_insn (gen_rtx_REG (SImode, IP_REGNUM),
17057 reg);
17060 tmp = GEN_INT ((((~padding_bits_to_clear[regno]) << 16u) >> 16u));
17061 emit_move_insn (reg, tmp);
17062 /* Also fill the top half of the negated
17063 padding_bits_to_clear. */
17064 if (((~padding_bits_to_clear[regno]) >> 16) > 0)
17066 tmp = GEN_INT ((~padding_bits_to_clear[regno]) >> 16);
17067 emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode, reg,
17068 GEN_INT (16),
17069 GEN_INT (16)),
17070 tmp));
17073 emit_insn (gen_andsi3 (gen_rtx_REG (SImode, regno),
17074 gen_rtx_REG (SImode, regno),
17075 reg));
17078 if (using_r4)
17079 emit_move_insn (cleared_reg,
17080 gen_rtx_REG (SImode, IP_REGNUM));
17082 /* We use right shift and left shift to clear the LSB of the address
17083 we jump to instead of using bic, to avoid having to use an extra
17084 register on Thumb-1. */
17085 tmp = gen_rtx_LSHIFTRT (SImode, cleared_reg, const1_rtx);
17086 emit_insn (gen_rtx_SET (cleared_reg, tmp));
17087 tmp = gen_rtx_ASHIFT (SImode, cleared_reg, const1_rtx);
17088 emit_insn (gen_rtx_SET (cleared_reg, tmp));
17090 /* Clearing all registers that leak before doing a non-secure
17091 call. */
17092 for (regno = R0_REGNUM; regno <= maxregno; regno++)
17094 if (!(to_clear_mask & (1LL << regno)))
17095 continue;
17097 /* If regno is an even vfp register and its successor is also to
17098 be cleared, use vmov. */
17099 if (IS_VFP_REGNUM (regno))
17101 if (TARGET_VFP_DOUBLE
17102 && VFP_REGNO_OK_FOR_DOUBLE (regno)
17103 && to_clear_mask & (1LL << (regno + 1)))
17104 emit_move_insn (gen_rtx_REG (DFmode, regno++),
17105 CONST0_RTX (DFmode));
17106 else
17107 emit_move_insn (gen_rtx_REG (SFmode, regno),
17108 CONST0_RTX (SFmode));
17110 else
17111 emit_move_insn (gen_rtx_REG (SImode, regno), cleared_reg);
17114 seq = get_insns ();
17115 end_sequence ();
17116 emit_insn_before (seq, insn);
17122 /* Rewrite move insn into subtract of 0 if the condition codes will
17123 be useful in next conditional jump insn. */
17125 static void
17126 thumb1_reorg (void)
17128 basic_block bb;
17130 FOR_EACH_BB_FN (bb, cfun)
17132 rtx dest, src;
17133 rtx cmp, op0, op1, set = NULL;
17134 rtx_insn *prev, *insn = BB_END (bb);
17135 bool insn_clobbered = false;
17137 while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17138 insn = PREV_INSN (insn);
17140 /* Find the last cbranchsi4_insn in basic block BB. */
17141 if (insn == BB_HEAD (bb)
17142 || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17143 continue;
17145 /* Get the register with which we are comparing. */
17146 cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
17147 op0 = XEXP (cmp, 0);
17148 op1 = XEXP (cmp, 1);
17150 /* Check that comparison is against ZERO. */
17151 if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
17152 continue;
17154 /* Find the first flag setting insn before INSN in basic block BB. */
17155 gcc_assert (insn != BB_HEAD (bb));
17156 for (prev = PREV_INSN (insn);
17157 (!insn_clobbered
17158 && prev != BB_HEAD (bb)
17159 && (NOTE_P (prev)
17160 || DEBUG_INSN_P (prev)
17161 || ((set = single_set (prev)) != NULL
17162 && get_attr_conds (prev) == CONDS_NOCOND)));
17163 prev = PREV_INSN (prev))
17165 if (reg_set_p (op0, prev))
17166 insn_clobbered = true;
17169 /* Skip if op0 is clobbered by insn other than prev. */
17170 if (insn_clobbered)
17171 continue;
17173 if (!set)
17174 continue;
17176 dest = SET_DEST (set);
17177 src = SET_SRC (set);
17178 if (!low_register_operand (dest, SImode)
17179 || !low_register_operand (src, SImode))
17180 continue;
17182 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17183 in INSN. Both src and dest of the move insn are checked. */
17184 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17186 dest = copy_rtx (dest);
17187 src = copy_rtx (src);
17188 src = gen_rtx_MINUS (SImode, src, const0_rtx);
17189 PATTERN (prev) = gen_rtx_SET (dest, src);
17190 INSN_CODE (prev) = -1;
17191 /* Set test register in INSN to dest. */
17192 XEXP (cmp, 0) = copy_rtx (dest);
17193 INSN_CODE (insn) = -1;
17198 /* Convert instructions to their cc-clobbering variant if possible, since
17199 that allows us to use smaller encodings. */
17201 static void
17202 thumb2_reorg (void)
17204 basic_block bb;
17205 regset_head live;
17207 INIT_REG_SET (&live);
17209 /* We are freeing block_for_insn in the toplev to keep compatibility
17210 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17211 compute_bb_for_insn ();
17212 df_analyze ();
17214 enum Convert_Action {SKIP, CONV, SWAP_CONV};
17216 FOR_EACH_BB_FN (bb, cfun)
17218 if ((current_tune->disparage_flag_setting_t16_encodings
17219 == tune_params::DISPARAGE_FLAGS_ALL)
17220 && optimize_bb_for_speed_p (bb))
17221 continue;
17223 rtx_insn *insn;
17224 Convert_Action action = SKIP;
17225 Convert_Action action_for_partial_flag_setting
17226 = ((current_tune->disparage_flag_setting_t16_encodings
17227 != tune_params::DISPARAGE_FLAGS_NEITHER)
17228 && optimize_bb_for_speed_p (bb))
17229 ? SKIP : CONV;
17231 COPY_REG_SET (&live, DF_LR_OUT (bb));
17232 df_simulate_initialize_backwards (bb, &live);
17233 FOR_BB_INSNS_REVERSE (bb, insn)
17235 if (NONJUMP_INSN_P (insn)
17236 && !REGNO_REG_SET_P (&live, CC_REGNUM)
17237 && GET_CODE (PATTERN (insn)) == SET)
17239 action = SKIP;
17240 rtx pat = PATTERN (insn);
17241 rtx dst = XEXP (pat, 0);
17242 rtx src = XEXP (pat, 1);
17243 rtx op0 = NULL_RTX, op1 = NULL_RTX;
17245 if (UNARY_P (src) || BINARY_P (src))
17246 op0 = XEXP (src, 0);
17248 if (BINARY_P (src))
17249 op1 = XEXP (src, 1);
17251 if (low_register_operand (dst, SImode))
17253 switch (GET_CODE (src))
17255 case PLUS:
17256 /* Adding two registers and storing the result
17257 in the first source is already a 16-bit
17258 operation. */
17259 if (rtx_equal_p (dst, op0)
17260 && register_operand (op1, SImode))
17261 break;
17263 if (low_register_operand (op0, SImode))
17265 /* ADDS <Rd>,<Rn>,<Rm> */
17266 if (low_register_operand (op1, SImode))
17267 action = CONV;
17268 /* ADDS <Rdn>,#<imm8> */
17269 /* SUBS <Rdn>,#<imm8> */
17270 else if (rtx_equal_p (dst, op0)
17271 && CONST_INT_P (op1)
17272 && IN_RANGE (INTVAL (op1), -255, 255))
17273 action = CONV;
17274 /* ADDS <Rd>,<Rn>,#<imm3> */
17275 /* SUBS <Rd>,<Rn>,#<imm3> */
17276 else if (CONST_INT_P (op1)
17277 && IN_RANGE (INTVAL (op1), -7, 7))
17278 action = CONV;
17280 /* ADCS <Rd>, <Rn> */
17281 else if (GET_CODE (XEXP (src, 0)) == PLUS
17282 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17283 && low_register_operand (XEXP (XEXP (src, 0), 1),
17284 SImode)
17285 && COMPARISON_P (op1)
17286 && cc_register (XEXP (op1, 0), VOIDmode)
17287 && maybe_get_arm_condition_code (op1) == ARM_CS
17288 && XEXP (op1, 1) == const0_rtx)
17289 action = CONV;
17290 break;
17292 case MINUS:
17293 /* RSBS <Rd>,<Rn>,#0
17294 Not handled here: see NEG below. */
17295 /* SUBS <Rd>,<Rn>,#<imm3>
17296 SUBS <Rdn>,#<imm8>
17297 Not handled here: see PLUS above. */
17298 /* SUBS <Rd>,<Rn>,<Rm> */
17299 if (low_register_operand (op0, SImode)
17300 && low_register_operand (op1, SImode))
17301 action = CONV;
17302 break;
17304 case MULT:
17305 /* MULS <Rdm>,<Rn>,<Rdm>
17306 As an exception to the rule, this is only used
17307 when optimizing for size since MULS is slow on all
17308 known implementations. We do not even want to use
17309 MULS in cold code, if optimizing for speed, so we
17310 test the global flag here. */
17311 if (!optimize_size)
17312 break;
17313 /* Fall through. */
17314 case AND:
17315 case IOR:
17316 case XOR:
17317 /* ANDS <Rdn>,<Rm> */
17318 if (rtx_equal_p (dst, op0)
17319 && low_register_operand (op1, SImode))
17320 action = action_for_partial_flag_setting;
17321 else if (rtx_equal_p (dst, op1)
17322 && low_register_operand (op0, SImode))
17323 action = action_for_partial_flag_setting == SKIP
17324 ? SKIP : SWAP_CONV;
17325 break;
17327 case ASHIFTRT:
17328 case ASHIFT:
17329 case LSHIFTRT:
17330 /* ASRS <Rdn>,<Rm> */
17331 /* LSRS <Rdn>,<Rm> */
17332 /* LSLS <Rdn>,<Rm> */
17333 if (rtx_equal_p (dst, op0)
17334 && low_register_operand (op1, SImode))
17335 action = action_for_partial_flag_setting;
17336 /* ASRS <Rd>,<Rm>,#<imm5> */
17337 /* LSRS <Rd>,<Rm>,#<imm5> */
17338 /* LSLS <Rd>,<Rm>,#<imm5> */
17339 else if (low_register_operand (op0, SImode)
17340 && CONST_INT_P (op1)
17341 && IN_RANGE (INTVAL (op1), 0, 31))
17342 action = action_for_partial_flag_setting;
17343 break;
17345 case ROTATERT:
17346 /* RORS <Rdn>,<Rm> */
17347 if (rtx_equal_p (dst, op0)
17348 && low_register_operand (op1, SImode))
17349 action = action_for_partial_flag_setting;
17350 break;
17352 case NOT:
17353 /* MVNS <Rd>,<Rm> */
17354 if (low_register_operand (op0, SImode))
17355 action = action_for_partial_flag_setting;
17356 break;
17358 case NEG:
17359 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17360 if (low_register_operand (op0, SImode))
17361 action = CONV;
17362 break;
17364 case CONST_INT:
17365 /* MOVS <Rd>,#<imm8> */
17366 if (CONST_INT_P (src)
17367 && IN_RANGE (INTVAL (src), 0, 255))
17368 action = action_for_partial_flag_setting;
17369 break;
17371 case REG:
17372 /* MOVS and MOV<c> with registers have different
17373 encodings, so are not relevant here. */
17374 break;
17376 default:
17377 break;
17381 if (action != SKIP)
17383 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17384 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17385 rtvec vec;
17387 if (action == SWAP_CONV)
17389 src = copy_rtx (src);
17390 XEXP (src, 0) = op1;
17391 XEXP (src, 1) = op0;
17392 pat = gen_rtx_SET (dst, src);
17393 vec = gen_rtvec (2, pat, clobber);
17395 else /* action == CONV */
17396 vec = gen_rtvec (2, pat, clobber);
17398 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17399 INSN_CODE (insn) = -1;
17403 if (NONDEBUG_INSN_P (insn))
17404 df_simulate_one_insn_backwards (bb, insn, &live);
17408 CLEAR_REG_SET (&live);
17411 /* Gcc puts the pool in the wrong place for ARM, since we can only
17412 load addresses a limited distance around the pc. We do some
17413 special munging to move the constant pool values to the correct
17414 point in the code. */
17415 static void
17416 arm_reorg (void)
17418 rtx_insn *insn;
17419 HOST_WIDE_INT address = 0;
17420 Mfix * fix;
17422 if (use_cmse)
17423 cmse_nonsecure_call_clear_caller_saved ();
17424 if (TARGET_THUMB1)
17425 thumb1_reorg ();
17426 else if (TARGET_THUMB2)
17427 thumb2_reorg ();
17429 /* Ensure all insns that must be split have been split at this point.
17430 Otherwise, the pool placement code below may compute incorrect
17431 insn lengths. Note that when optimizing, all insns have already
17432 been split at this point. */
17433 if (!optimize)
17434 split_all_insns_noflow ();
17436 /* Make sure we do not attempt to create a literal pool even though it should
17437 no longer be necessary to create any. */
17438 if (arm_disable_literal_pool)
17439 return ;
17441 minipool_fix_head = minipool_fix_tail = NULL;
17443 /* The first insn must always be a note, or the code below won't
17444 scan it properly. */
17445 insn = get_insns ();
17446 gcc_assert (NOTE_P (insn));
17447 minipool_pad = 0;
17449 /* Scan all the insns and record the operands that will need fixing. */
17450 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17452 if (BARRIER_P (insn))
17453 push_minipool_barrier (insn, address);
17454 else if (INSN_P (insn))
17456 rtx_jump_table_data *table;
17458 note_invalid_constants (insn, address, true);
17459 address += get_attr_length (insn);
17461 /* If the insn is a vector jump, add the size of the table
17462 and skip the table. */
17463 if (tablejump_p (insn, NULL, &table))
17465 address += get_jump_table_size (table);
17466 insn = table;
17469 else if (LABEL_P (insn))
17470 /* Add the worst-case padding due to alignment. We don't add
17471 the _current_ padding because the minipool insertions
17472 themselves might change it. */
17473 address += get_label_padding (insn);
17476 fix = minipool_fix_head;
17478 /* Now scan the fixups and perform the required changes. */
17479 while (fix)
17481 Mfix * ftmp;
17482 Mfix * fdel;
17483 Mfix * last_added_fix;
17484 Mfix * last_barrier = NULL;
17485 Mfix * this_fix;
17487 /* Skip any further barriers before the next fix. */
17488 while (fix && BARRIER_P (fix->insn))
17489 fix = fix->next;
17491 /* No more fixes. */
17492 if (fix == NULL)
17493 break;
17495 last_added_fix = NULL;
17497 for (ftmp = fix; ftmp; ftmp = ftmp->next)
17499 if (BARRIER_P (ftmp->insn))
17501 if (ftmp->address >= minipool_vector_head->max_address)
17502 break;
17504 last_barrier = ftmp;
17506 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17507 break;
17509 last_added_fix = ftmp; /* Keep track of the last fix added. */
17512 /* If we found a barrier, drop back to that; any fixes that we
17513 could have reached but come after the barrier will now go in
17514 the next mini-pool. */
17515 if (last_barrier != NULL)
17517 /* Reduce the refcount for those fixes that won't go into this
17518 pool after all. */
17519 for (fdel = last_barrier->next;
17520 fdel && fdel != ftmp;
17521 fdel = fdel->next)
17523 fdel->minipool->refcount--;
17524 fdel->minipool = NULL;
17527 ftmp = last_barrier;
17529 else
17531 /* ftmp is first fix that we can't fit into this pool and
17532 there no natural barriers that we could use. Insert a
17533 new barrier in the code somewhere between the previous
17534 fix and this one, and arrange to jump around it. */
17535 HOST_WIDE_INT max_address;
17537 /* The last item on the list of fixes must be a barrier, so
17538 we can never run off the end of the list of fixes without
17539 last_barrier being set. */
17540 gcc_assert (ftmp);
17542 max_address = minipool_vector_head->max_address;
17543 /* Check that there isn't another fix that is in range that
17544 we couldn't fit into this pool because the pool was
17545 already too large: we need to put the pool before such an
17546 instruction. The pool itself may come just after the
17547 fix because create_fix_barrier also allows space for a
17548 jump instruction. */
17549 if (ftmp->address < max_address)
17550 max_address = ftmp->address + 1;
17552 last_barrier = create_fix_barrier (last_added_fix, max_address);
17555 assign_minipool_offsets (last_barrier);
17557 while (ftmp)
17559 if (!BARRIER_P (ftmp->insn)
17560 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17561 == NULL))
17562 break;
17564 ftmp = ftmp->next;
17567 /* Scan over the fixes we have identified for this pool, fixing them
17568 up and adding the constants to the pool itself. */
17569 for (this_fix = fix; this_fix && ftmp != this_fix;
17570 this_fix = this_fix->next)
17571 if (!BARRIER_P (this_fix->insn))
17573 rtx addr
17574 = plus_constant (Pmode,
17575 gen_rtx_LABEL_REF (VOIDmode,
17576 minipool_vector_label),
17577 this_fix->minipool->offset);
17578 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17581 dump_minipool (last_barrier->insn);
17582 fix = ftmp;
17585 /* From now on we must synthesize any constants that we can't handle
17586 directly. This can happen if the RTL gets split during final
17587 instruction generation. */
17588 cfun->machine->after_arm_reorg = 1;
17590 /* Free the minipool memory. */
17591 obstack_free (&minipool_obstack, minipool_startobj);
17594 /* Routines to output assembly language. */
17596 /* Return string representation of passed in real value. */
17597 static const char *
17598 fp_const_from_val (REAL_VALUE_TYPE *r)
17600 if (!fp_consts_inited)
17601 init_fp_table ();
17603 gcc_assert (real_equal (r, &value_fp0));
17604 return "0";
17607 /* OPERANDS[0] is the entire list of insns that constitute pop,
17608 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17609 is in the list, UPDATE is true iff the list contains explicit
17610 update of base register. */
17611 void
17612 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17613 bool update)
17615 int i;
17616 char pattern[100];
17617 int offset;
17618 const char *conditional;
17619 int num_saves = XVECLEN (operands[0], 0);
17620 unsigned int regno;
17621 unsigned int regno_base = REGNO (operands[1]);
17622 bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
17624 offset = 0;
17625 offset += update ? 1 : 0;
17626 offset += return_pc ? 1 : 0;
17628 /* Is the base register in the list? */
17629 for (i = offset; i < num_saves; i++)
17631 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17632 /* If SP is in the list, then the base register must be SP. */
17633 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17634 /* If base register is in the list, there must be no explicit update. */
17635 if (regno == regno_base)
17636 gcc_assert (!update);
17639 conditional = reverse ? "%?%D0" : "%?%d0";
17640 /* Can't use POP if returning from an interrupt. */
17641 if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
17642 sprintf (pattern, "pop%s\t{", conditional);
17643 else
17645 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17646 It's just a convention, their semantics are identical. */
17647 if (regno_base == SP_REGNUM)
17648 sprintf (pattern, "ldmfd%s\t", conditional);
17649 else if (update)
17650 sprintf (pattern, "ldmia%s\t", conditional);
17651 else
17652 sprintf (pattern, "ldm%s\t", conditional);
17654 strcat (pattern, reg_names[regno_base]);
17655 if (update)
17656 strcat (pattern, "!, {");
17657 else
17658 strcat (pattern, ", {");
17661 /* Output the first destination register. */
17662 strcat (pattern,
17663 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17665 /* Output the rest of the destination registers. */
17666 for (i = offset + 1; i < num_saves; i++)
17668 strcat (pattern, ", ");
17669 strcat (pattern,
17670 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17673 strcat (pattern, "}");
17675 if (interrupt_p && return_pc)
17676 strcat (pattern, "^");
17678 output_asm_insn (pattern, &cond);
17682 /* Output the assembly for a store multiple. */
17684 const char *
17685 vfp_output_vstmd (rtx * operands)
17687 char pattern[100];
17688 int p;
17689 int base;
17690 int i;
17691 rtx addr_reg = REG_P (XEXP (operands[0], 0))
17692 ? XEXP (operands[0], 0)
17693 : XEXP (XEXP (operands[0], 0), 0);
17694 bool push_p = REGNO (addr_reg) == SP_REGNUM;
17696 if (push_p)
17697 strcpy (pattern, "vpush%?.64\t{%P1");
17698 else
17699 strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
17701 p = strlen (pattern);
17703 gcc_assert (REG_P (operands[1]));
17705 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17706 for (i = 1; i < XVECLEN (operands[2], 0); i++)
17708 p += sprintf (&pattern[p], ", d%d", base + i);
17710 strcpy (&pattern[p], "}");
17712 output_asm_insn (pattern, operands);
17713 return "";
17717 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17718 number of bytes pushed. */
17720 static int
17721 vfp_emit_fstmd (int base_reg, int count)
17723 rtx par;
17724 rtx dwarf;
17725 rtx tmp, reg;
17726 int i;
17728 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17729 register pairs are stored by a store multiple insn. We avoid this
17730 by pushing an extra pair. */
17731 if (count == 2 && !arm_arch6)
17733 if (base_reg == LAST_VFP_REGNUM - 3)
17734 base_reg -= 2;
17735 count++;
17738 /* FSTMD may not store more than 16 doubleword registers at once. Split
17739 larger stores into multiple parts (up to a maximum of two, in
17740 practice). */
17741 if (count > 16)
17743 int saved;
17744 /* NOTE: base_reg is an internal register number, so each D register
17745 counts as 2. */
17746 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17747 saved += vfp_emit_fstmd (base_reg, 16);
17748 return saved;
17751 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17752 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17754 reg = gen_rtx_REG (DFmode, base_reg);
17755 base_reg += 2;
17757 XVECEXP (par, 0, 0)
17758 = gen_rtx_SET (gen_frame_mem
17759 (BLKmode,
17760 gen_rtx_PRE_MODIFY (Pmode,
17761 stack_pointer_rtx,
17762 plus_constant
17763 (Pmode, stack_pointer_rtx,
17764 - (count * 8)))
17766 gen_rtx_UNSPEC (BLKmode,
17767 gen_rtvec (1, reg),
17768 UNSPEC_PUSH_MULT));
17770 tmp = gen_rtx_SET (stack_pointer_rtx,
17771 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17772 RTX_FRAME_RELATED_P (tmp) = 1;
17773 XVECEXP (dwarf, 0, 0) = tmp;
17775 tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
17776 RTX_FRAME_RELATED_P (tmp) = 1;
17777 XVECEXP (dwarf, 0, 1) = tmp;
17779 for (i = 1; i < count; i++)
17781 reg = gen_rtx_REG (DFmode, base_reg);
17782 base_reg += 2;
17783 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
17785 tmp = gen_rtx_SET (gen_frame_mem (DFmode,
17786 plus_constant (Pmode,
17787 stack_pointer_rtx,
17788 i * 8)),
17789 reg);
17790 RTX_FRAME_RELATED_P (tmp) = 1;
17791 XVECEXP (dwarf, 0, i + 1) = tmp;
17794 par = emit_insn (par);
17795 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17796 RTX_FRAME_RELATED_P (par) = 1;
17798 return count * 8;
17801 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
17802 has the cmse_nonsecure_call attribute and returns false otherwise. */
17804 bool
17805 detect_cmse_nonsecure_call (tree addr)
17807 if (!addr)
17808 return FALSE;
17810 tree fntype = TREE_TYPE (addr);
17811 if (use_cmse && lookup_attribute ("cmse_nonsecure_call",
17812 TYPE_ATTRIBUTES (fntype)))
17813 return TRUE;
17814 return FALSE;
17818 /* Emit a call instruction with pattern PAT. ADDR is the address of
17819 the call target. */
17821 void
17822 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
17824 rtx insn;
17826 insn = emit_call_insn (pat);
17828 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17829 If the call might use such an entry, add a use of the PIC register
17830 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17831 if (TARGET_VXWORKS_RTP
17832 && flag_pic
17833 && !sibcall
17834 && GET_CODE (addr) == SYMBOL_REF
17835 && (SYMBOL_REF_DECL (addr)
17836 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
17837 : !SYMBOL_REF_LOCAL_P (addr)))
17839 require_pic_register ();
17840 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
17843 if (TARGET_AAPCS_BASED)
17845 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17846 linker. We need to add an IP clobber to allow setting
17847 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
17848 is not needed since it's a fixed register. */
17849 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
17850 clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
17854 /* Output a 'call' insn. */
17855 const char *
17856 output_call (rtx *operands)
17858 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
17860 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17861 if (REGNO (operands[0]) == LR_REGNUM)
17863 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
17864 output_asm_insn ("mov%?\t%0, %|lr", operands);
17867 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17869 if (TARGET_INTERWORK || arm_arch4t)
17870 output_asm_insn ("bx%?\t%0", operands);
17871 else
17872 output_asm_insn ("mov%?\t%|pc, %0", operands);
17874 return "";
17877 /* Output a move from arm registers to arm registers of a long double
17878 OPERANDS[0] is the destination.
17879 OPERANDS[1] is the source. */
17880 const char *
17881 output_mov_long_double_arm_from_arm (rtx *operands)
17883 /* We have to be careful here because the two might overlap. */
17884 int dest_start = REGNO (operands[0]);
17885 int src_start = REGNO (operands[1]);
17886 rtx ops[2];
17887 int i;
17889 if (dest_start < src_start)
17891 for (i = 0; i < 3; i++)
17893 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17894 ops[1] = gen_rtx_REG (SImode, src_start + i);
17895 output_asm_insn ("mov%?\t%0, %1", ops);
17898 else
17900 for (i = 2; i >= 0; i--)
17902 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17903 ops[1] = gen_rtx_REG (SImode, src_start + i);
17904 output_asm_insn ("mov%?\t%0, %1", ops);
17908 return "";
17911 void
17912 arm_emit_movpair (rtx dest, rtx src)
17914 /* If the src is an immediate, simplify it. */
17915 if (CONST_INT_P (src))
17917 HOST_WIDE_INT val = INTVAL (src);
17918 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
17919 if ((val >> 16) & 0x0000ffff)
17921 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
17922 GEN_INT (16)),
17923 GEN_INT ((val >> 16) & 0x0000ffff));
17924 rtx_insn *insn = get_last_insn ();
17925 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
17927 return;
17929 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
17930 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
17931 rtx_insn *insn = get_last_insn ();
17932 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
17935 /* Output a move between double words. It must be REG<-MEM
17936 or MEM<-REG. */
17937 const char *
17938 output_move_double (rtx *operands, bool emit, int *count)
17940 enum rtx_code code0 = GET_CODE (operands[0]);
17941 enum rtx_code code1 = GET_CODE (operands[1]);
17942 rtx otherops[3];
17943 if (count)
17944 *count = 1;
17946 /* The only case when this might happen is when
17947 you are looking at the length of a DImode instruction
17948 that has an invalid constant in it. */
17949 if (code0 == REG && code1 != MEM)
17951 gcc_assert (!emit);
17952 *count = 2;
17953 return "";
17956 if (code0 == REG)
17958 unsigned int reg0 = REGNO (operands[0]);
17960 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
17962 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
17964 switch (GET_CODE (XEXP (operands[1], 0)))
17966 case REG:
17968 if (emit)
17970 if (TARGET_LDRD
17971 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
17972 output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
17973 else
17974 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
17976 break;
17978 case PRE_INC:
17979 gcc_assert (TARGET_LDRD);
17980 if (emit)
17981 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
17982 break;
17984 case PRE_DEC:
17985 if (emit)
17987 if (TARGET_LDRD)
17988 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
17989 else
17990 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
17992 break;
17994 case POST_INC:
17995 if (emit)
17997 if (TARGET_LDRD)
17998 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
17999 else
18000 output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
18002 break;
18004 case POST_DEC:
18005 gcc_assert (TARGET_LDRD);
18006 if (emit)
18007 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
18008 break;
18010 case PRE_MODIFY:
18011 case POST_MODIFY:
18012 /* Autoicrement addressing modes should never have overlapping
18013 base and destination registers, and overlapping index registers
18014 are already prohibited, so this doesn't need to worry about
18015 fix_cm3_ldrd. */
18016 otherops[0] = operands[0];
18017 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
18018 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
18020 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
18022 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
18024 /* Registers overlap so split out the increment. */
18025 if (emit)
18027 output_asm_insn ("add%?\t%1, %1, %2", otherops);
18028 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
18030 if (count)
18031 *count = 2;
18033 else
18035 /* Use a single insn if we can.
18036 FIXME: IWMMXT allows offsets larger than ldrd can
18037 handle, fix these up with a pair of ldr. */
18038 if (TARGET_THUMB2
18039 || !CONST_INT_P (otherops[2])
18040 || (INTVAL (otherops[2]) > -256
18041 && INTVAL (otherops[2]) < 256))
18043 if (emit)
18044 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
18046 else
18048 if (emit)
18050 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
18051 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18053 if (count)
18054 *count = 2;
18059 else
18061 /* Use a single insn if we can.
18062 FIXME: IWMMXT allows offsets larger than ldrd can handle,
18063 fix these up with a pair of ldr. */
18064 if (TARGET_THUMB2
18065 || !CONST_INT_P (otherops[2])
18066 || (INTVAL (otherops[2]) > -256
18067 && INTVAL (otherops[2]) < 256))
18069 if (emit)
18070 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
18072 else
18074 if (emit)
18076 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18077 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
18079 if (count)
18080 *count = 2;
18083 break;
18085 case LABEL_REF:
18086 case CONST:
18087 /* We might be able to use ldrd %0, %1 here. However the range is
18088 different to ldr/adr, and it is broken on some ARMv7-M
18089 implementations. */
18090 /* Use the second register of the pair to avoid problematic
18091 overlap. */
18092 otherops[1] = operands[1];
18093 if (emit)
18094 output_asm_insn ("adr%?\t%0, %1", otherops);
18095 operands[1] = otherops[0];
18096 if (emit)
18098 if (TARGET_LDRD)
18099 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18100 else
18101 output_asm_insn ("ldmia%?\t%1, %M0", operands);
18104 if (count)
18105 *count = 2;
18106 break;
18108 /* ??? This needs checking for thumb2. */
18109 default:
18110 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18111 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18113 otherops[0] = operands[0];
18114 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18115 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18117 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18119 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18121 switch ((int) INTVAL (otherops[2]))
18123 case -8:
18124 if (emit)
18125 output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
18126 return "";
18127 case -4:
18128 if (TARGET_THUMB2)
18129 break;
18130 if (emit)
18131 output_asm_insn ("ldmda%?\t%1, %M0", otherops);
18132 return "";
18133 case 4:
18134 if (TARGET_THUMB2)
18135 break;
18136 if (emit)
18137 output_asm_insn ("ldmib%?\t%1, %M0", otherops);
18138 return "";
18141 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18142 operands[1] = otherops[0];
18143 if (TARGET_LDRD
18144 && (REG_P (otherops[2])
18145 || TARGET_THUMB2
18146 || (CONST_INT_P (otherops[2])
18147 && INTVAL (otherops[2]) > -256
18148 && INTVAL (otherops[2]) < 256)))
18150 if (reg_overlap_mentioned_p (operands[0],
18151 otherops[2]))
18153 /* Swap base and index registers over to
18154 avoid a conflict. */
18155 std::swap (otherops[1], otherops[2]);
18157 /* If both registers conflict, it will usually
18158 have been fixed by a splitter. */
18159 if (reg_overlap_mentioned_p (operands[0], otherops[2])
18160 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18162 if (emit)
18164 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18165 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18167 if (count)
18168 *count = 2;
18170 else
18172 otherops[0] = operands[0];
18173 if (emit)
18174 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
18176 return "";
18179 if (CONST_INT_P (otherops[2]))
18181 if (emit)
18183 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18184 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18185 else
18186 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18189 else
18191 if (emit)
18192 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18195 else
18197 if (emit)
18198 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18201 if (count)
18202 *count = 2;
18204 if (TARGET_LDRD)
18205 return "ldrd%?\t%0, [%1]";
18207 return "ldmia%?\t%1, %M0";
18209 else
18211 otherops[1] = adjust_address (operands[1], SImode, 4);
18212 /* Take care of overlapping base/data reg. */
18213 if (reg_mentioned_p (operands[0], operands[1]))
18215 if (emit)
18217 output_asm_insn ("ldr%?\t%0, %1", otherops);
18218 output_asm_insn ("ldr%?\t%0, %1", operands);
18220 if (count)
18221 *count = 2;
18224 else
18226 if (emit)
18228 output_asm_insn ("ldr%?\t%0, %1", operands);
18229 output_asm_insn ("ldr%?\t%0, %1", otherops);
18231 if (count)
18232 *count = 2;
18237 else
18239 /* Constraints should ensure this. */
18240 gcc_assert (code0 == MEM && code1 == REG);
18241 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18242 || (TARGET_ARM && TARGET_LDRD));
18244 switch (GET_CODE (XEXP (operands[0], 0)))
18246 case REG:
18247 if (emit)
18249 if (TARGET_LDRD)
18250 output_asm_insn ("strd%?\t%1, [%m0]", operands);
18251 else
18252 output_asm_insn ("stm%?\t%m0, %M1", operands);
18254 break;
18256 case PRE_INC:
18257 gcc_assert (TARGET_LDRD);
18258 if (emit)
18259 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
18260 break;
18262 case PRE_DEC:
18263 if (emit)
18265 if (TARGET_LDRD)
18266 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
18267 else
18268 output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
18270 break;
18272 case POST_INC:
18273 if (emit)
18275 if (TARGET_LDRD)
18276 output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
18277 else
18278 output_asm_insn ("stm%?\t%m0!, %M1", operands);
18280 break;
18282 case POST_DEC:
18283 gcc_assert (TARGET_LDRD);
18284 if (emit)
18285 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
18286 break;
18288 case PRE_MODIFY:
18289 case POST_MODIFY:
18290 otherops[0] = operands[1];
18291 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18292 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18294 /* IWMMXT allows offsets larger than ldrd can handle,
18295 fix these up with a pair of ldr. */
18296 if (!TARGET_THUMB2
18297 && CONST_INT_P (otherops[2])
18298 && (INTVAL(otherops[2]) <= -256
18299 || INTVAL(otherops[2]) >= 256))
18301 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18303 if (emit)
18305 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18306 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18308 if (count)
18309 *count = 2;
18311 else
18313 if (emit)
18315 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18316 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18318 if (count)
18319 *count = 2;
18322 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18324 if (emit)
18325 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
18327 else
18329 if (emit)
18330 output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
18332 break;
18334 case PLUS:
18335 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18336 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18338 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18340 case -8:
18341 if (emit)
18342 output_asm_insn ("stmdb%?\t%m0, %M1", operands);
18343 return "";
18345 case -4:
18346 if (TARGET_THUMB2)
18347 break;
18348 if (emit)
18349 output_asm_insn ("stmda%?\t%m0, %M1", operands);
18350 return "";
18352 case 4:
18353 if (TARGET_THUMB2)
18354 break;
18355 if (emit)
18356 output_asm_insn ("stmib%?\t%m0, %M1", operands);
18357 return "";
18360 if (TARGET_LDRD
18361 && (REG_P (otherops[2])
18362 || TARGET_THUMB2
18363 || (CONST_INT_P (otherops[2])
18364 && INTVAL (otherops[2]) > -256
18365 && INTVAL (otherops[2]) < 256)))
18367 otherops[0] = operands[1];
18368 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18369 if (emit)
18370 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
18371 return "";
18373 /* Fall through */
18375 default:
18376 otherops[0] = adjust_address (operands[0], SImode, 4);
18377 otherops[1] = operands[1];
18378 if (emit)
18380 output_asm_insn ("str%?\t%1, %0", operands);
18381 output_asm_insn ("str%?\t%H1, %0", otherops);
18383 if (count)
18384 *count = 2;
18388 return "";
18391 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18392 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18394 const char *
18395 output_move_quad (rtx *operands)
18397 if (REG_P (operands[0]))
18399 /* Load, or reg->reg move. */
18401 if (MEM_P (operands[1]))
18403 switch (GET_CODE (XEXP (operands[1], 0)))
18405 case REG:
18406 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18407 break;
18409 case LABEL_REF:
18410 case CONST:
18411 output_asm_insn ("adr%?\t%0, %1", operands);
18412 output_asm_insn ("ldmia%?\t%0, %M0", operands);
18413 break;
18415 default:
18416 gcc_unreachable ();
18419 else
18421 rtx ops[2];
18422 int dest, src, i;
18424 gcc_assert (REG_P (operands[1]));
18426 dest = REGNO (operands[0]);
18427 src = REGNO (operands[1]);
18429 /* This seems pretty dumb, but hopefully GCC won't try to do it
18430 very often. */
18431 if (dest < src)
18432 for (i = 0; i < 4; i++)
18434 ops[0] = gen_rtx_REG (SImode, dest + i);
18435 ops[1] = gen_rtx_REG (SImode, src + i);
18436 output_asm_insn ("mov%?\t%0, %1", ops);
18438 else
18439 for (i = 3; i >= 0; i--)
18441 ops[0] = gen_rtx_REG (SImode, dest + i);
18442 ops[1] = gen_rtx_REG (SImode, src + i);
18443 output_asm_insn ("mov%?\t%0, %1", ops);
18447 else
18449 gcc_assert (MEM_P (operands[0]));
18450 gcc_assert (REG_P (operands[1]));
18451 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18453 switch (GET_CODE (XEXP (operands[0], 0)))
18455 case REG:
18456 output_asm_insn ("stm%?\t%m0, %M1", operands);
18457 break;
18459 default:
18460 gcc_unreachable ();
18464 return "";
18467 /* Output a VFP load or store instruction. */
18469 const char *
18470 output_move_vfp (rtx *operands)
18472 rtx reg, mem, addr, ops[2];
18473 int load = REG_P (operands[0]);
18474 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18475 int sp = (!TARGET_VFP_FP16INST
18476 || GET_MODE_SIZE (GET_MODE (operands[0])) == 4);
18477 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18478 const char *templ;
18479 char buff[50];
18480 machine_mode mode;
18482 reg = operands[!load];
18483 mem = operands[load];
18485 mode = GET_MODE (reg);
18487 gcc_assert (REG_P (reg));
18488 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18489 gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT)
18490 || mode == SFmode
18491 || mode == DFmode
18492 || mode == HImode
18493 || mode == SImode
18494 || mode == DImode
18495 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18496 gcc_assert (MEM_P (mem));
18498 addr = XEXP (mem, 0);
18500 switch (GET_CODE (addr))
18502 case PRE_DEC:
18503 templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18504 ops[0] = XEXP (addr, 0);
18505 ops[1] = reg;
18506 break;
18508 case POST_INC:
18509 templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18510 ops[0] = XEXP (addr, 0);
18511 ops[1] = reg;
18512 break;
18514 default:
18515 templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18516 ops[0] = reg;
18517 ops[1] = mem;
18518 break;
18521 sprintf (buff, templ,
18522 load ? "ld" : "st",
18523 dp ? "64" : sp ? "32" : "16",
18524 dp ? "P" : "",
18525 integer_p ? "\t%@ int" : "");
18526 output_asm_insn (buff, ops);
18528 return "";
18531 /* Output a Neon double-word or quad-word load or store, or a load
18532 or store for larger structure modes.
18534 WARNING: The ordering of elements is weird in big-endian mode,
18535 because the EABI requires that vectors stored in memory appear
18536 as though they were stored by a VSTM, as required by the EABI.
18537 GCC RTL defines element ordering based on in-memory order.
18538 This can be different from the architectural ordering of elements
18539 within a NEON register. The intrinsics defined in arm_neon.h use the
18540 NEON register element ordering, not the GCC RTL element ordering.
18542 For example, the in-memory ordering of a big-endian a quadword
18543 vector with 16-bit elements when stored from register pair {d0,d1}
18544 will be (lowest address first, d0[N] is NEON register element N):
18546 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18548 When necessary, quadword registers (dN, dN+1) are moved to ARM
18549 registers from rN in the order:
18551 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18553 So that STM/LDM can be used on vectors in ARM registers, and the
18554 same memory layout will result as if VSTM/VLDM were used.
18556 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18557 possible, which allows use of appropriate alignment tags.
18558 Note that the choice of "64" is independent of the actual vector
18559 element size; this size simply ensures that the behavior is
18560 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18562 Due to limitations of those instructions, use of VST1.64/VLD1.64
18563 is not possible if:
18564 - the address contains PRE_DEC, or
18565 - the mode refers to more than 4 double-word registers
18567 In those cases, it would be possible to replace VSTM/VLDM by a
18568 sequence of instructions; this is not currently implemented since
18569 this is not certain to actually improve performance. */
18571 const char *
18572 output_move_neon (rtx *operands)
18574 rtx reg, mem, addr, ops[2];
18575 int regno, nregs, load = REG_P (operands[0]);
18576 const char *templ;
18577 char buff[50];
18578 machine_mode mode;
18580 reg = operands[!load];
18581 mem = operands[load];
18583 mode = GET_MODE (reg);
18585 gcc_assert (REG_P (reg));
18586 regno = REGNO (reg);
18587 nregs = HARD_REGNO_NREGS (regno, mode) / 2;
18588 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18589 || NEON_REGNO_OK_FOR_QUAD (regno));
18590 gcc_assert (VALID_NEON_DREG_MODE (mode)
18591 || VALID_NEON_QREG_MODE (mode)
18592 || VALID_NEON_STRUCT_MODE (mode));
18593 gcc_assert (MEM_P (mem));
18595 addr = XEXP (mem, 0);
18597 /* Strip off const from addresses like (const (plus (...))). */
18598 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18599 addr = XEXP (addr, 0);
18601 switch (GET_CODE (addr))
18603 case POST_INC:
18604 /* We have to use vldm / vstm for too-large modes. */
18605 if (nregs > 4)
18607 templ = "v%smia%%?\t%%0!, %%h1";
18608 ops[0] = XEXP (addr, 0);
18610 else
18612 templ = "v%s1.64\t%%h1, %%A0";
18613 ops[0] = mem;
18615 ops[1] = reg;
18616 break;
18618 case PRE_DEC:
18619 /* We have to use vldm / vstm in this case, since there is no
18620 pre-decrement form of the vld1 / vst1 instructions. */
18621 templ = "v%smdb%%?\t%%0!, %%h1";
18622 ops[0] = XEXP (addr, 0);
18623 ops[1] = reg;
18624 break;
18626 case POST_MODIFY:
18627 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18628 gcc_unreachable ();
18630 case REG:
18631 /* We have to use vldm / vstm for too-large modes. */
18632 if (nregs > 1)
18634 if (nregs > 4)
18635 templ = "v%smia%%?\t%%m0, %%h1";
18636 else
18637 templ = "v%s1.64\t%%h1, %%A0";
18639 ops[0] = mem;
18640 ops[1] = reg;
18641 break;
18643 /* Fall through. */
18644 case LABEL_REF:
18645 case PLUS:
18647 int i;
18648 int overlap = -1;
18649 for (i = 0; i < nregs; i++)
18651 /* We're only using DImode here because it's a convenient size. */
18652 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18653 ops[1] = adjust_address (mem, DImode, 8 * i);
18654 if (reg_overlap_mentioned_p (ops[0], mem))
18656 gcc_assert (overlap == -1);
18657 overlap = i;
18659 else
18661 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18662 output_asm_insn (buff, ops);
18665 if (overlap != -1)
18667 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18668 ops[1] = adjust_address (mem, SImode, 8 * overlap);
18669 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18670 output_asm_insn (buff, ops);
18673 return "";
18676 default:
18677 gcc_unreachable ();
18680 sprintf (buff, templ, load ? "ld" : "st");
18681 output_asm_insn (buff, ops);
18683 return "";
18686 /* Compute and return the length of neon_mov<mode>, where <mode> is
18687 one of VSTRUCT modes: EI, OI, CI or XI. */
18689 arm_attr_length_move_neon (rtx_insn *insn)
18691 rtx reg, mem, addr;
18692 int load;
18693 machine_mode mode;
18695 extract_insn_cached (insn);
18697 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18699 mode = GET_MODE (recog_data.operand[0]);
18700 switch (mode)
18702 case E_EImode:
18703 case E_OImode:
18704 return 8;
18705 case E_CImode:
18706 return 12;
18707 case E_XImode:
18708 return 16;
18709 default:
18710 gcc_unreachable ();
18714 load = REG_P (recog_data.operand[0]);
18715 reg = recog_data.operand[!load];
18716 mem = recog_data.operand[load];
18718 gcc_assert (MEM_P (mem));
18720 mode = GET_MODE (reg);
18721 addr = XEXP (mem, 0);
18723 /* Strip off const from addresses like (const (plus (...))). */
18724 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18725 addr = XEXP (addr, 0);
18727 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18729 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
18730 return insns * 4;
18732 else
18733 return 4;
18736 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18737 return zero. */
18740 arm_address_offset_is_imm (rtx_insn *insn)
18742 rtx mem, addr;
18744 extract_insn_cached (insn);
18746 if (REG_P (recog_data.operand[0]))
18747 return 0;
18749 mem = recog_data.operand[0];
18751 gcc_assert (MEM_P (mem));
18753 addr = XEXP (mem, 0);
18755 if (REG_P (addr)
18756 || (GET_CODE (addr) == PLUS
18757 && REG_P (XEXP (addr, 0))
18758 && CONST_INT_P (XEXP (addr, 1))))
18759 return 1;
18760 else
18761 return 0;
18764 /* Output an ADD r, s, #n where n may be too big for one instruction.
18765 If adding zero to one register, output nothing. */
18766 const char *
18767 output_add_immediate (rtx *operands)
18769 HOST_WIDE_INT n = INTVAL (operands[2]);
18771 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18773 if (n < 0)
18774 output_multi_immediate (operands,
18775 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18776 -n);
18777 else
18778 output_multi_immediate (operands,
18779 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18783 return "";
18786 /* Output a multiple immediate operation.
18787 OPERANDS is the vector of operands referred to in the output patterns.
18788 INSTR1 is the output pattern to use for the first constant.
18789 INSTR2 is the output pattern to use for subsequent constants.
18790 IMMED_OP is the index of the constant slot in OPERANDS.
18791 N is the constant value. */
18792 static const char *
18793 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18794 int immed_op, HOST_WIDE_INT n)
18796 #if HOST_BITS_PER_WIDE_INT > 32
18797 n &= 0xffffffff;
18798 #endif
18800 if (n == 0)
18802 /* Quick and easy output. */
18803 operands[immed_op] = const0_rtx;
18804 output_asm_insn (instr1, operands);
18806 else
18808 int i;
18809 const char * instr = instr1;
18811 /* Note that n is never zero here (which would give no output). */
18812 for (i = 0; i < 32; i += 2)
18814 if (n & (3 << i))
18816 operands[immed_op] = GEN_INT (n & (255 << i));
18817 output_asm_insn (instr, operands);
18818 instr = instr2;
18819 i += 6;
18824 return "";
18827 /* Return the name of a shifter operation. */
18828 static const char *
18829 arm_shift_nmem(enum rtx_code code)
18831 switch (code)
18833 case ASHIFT:
18834 return ARM_LSL_NAME;
18836 case ASHIFTRT:
18837 return "asr";
18839 case LSHIFTRT:
18840 return "lsr";
18842 case ROTATERT:
18843 return "ror";
18845 default:
18846 abort();
18850 /* Return the appropriate ARM instruction for the operation code.
18851 The returned result should not be overwritten. OP is the rtx of the
18852 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18853 was shifted. */
18854 const char *
18855 arithmetic_instr (rtx op, int shift_first_arg)
18857 switch (GET_CODE (op))
18859 case PLUS:
18860 return "add";
18862 case MINUS:
18863 return shift_first_arg ? "rsb" : "sub";
18865 case IOR:
18866 return "orr";
18868 case XOR:
18869 return "eor";
18871 case AND:
18872 return "and";
18874 case ASHIFT:
18875 case ASHIFTRT:
18876 case LSHIFTRT:
18877 case ROTATERT:
18878 return arm_shift_nmem(GET_CODE(op));
18880 default:
18881 gcc_unreachable ();
18885 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18886 for the operation code. The returned result should not be overwritten.
18887 OP is the rtx code of the shift.
18888 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18889 shift. */
18890 static const char *
18891 shift_op (rtx op, HOST_WIDE_INT *amountp)
18893 const char * mnem;
18894 enum rtx_code code = GET_CODE (op);
18896 switch (code)
18898 case ROTATE:
18899 if (!CONST_INT_P (XEXP (op, 1)))
18901 output_operand_lossage ("invalid shift operand");
18902 return NULL;
18905 code = ROTATERT;
18906 *amountp = 32 - INTVAL (XEXP (op, 1));
18907 mnem = "ror";
18908 break;
18910 case ASHIFT:
18911 case ASHIFTRT:
18912 case LSHIFTRT:
18913 case ROTATERT:
18914 mnem = arm_shift_nmem(code);
18915 if (CONST_INT_P (XEXP (op, 1)))
18917 *amountp = INTVAL (XEXP (op, 1));
18919 else if (REG_P (XEXP (op, 1)))
18921 *amountp = -1;
18922 return mnem;
18924 else
18926 output_operand_lossage ("invalid shift operand");
18927 return NULL;
18929 break;
18931 case MULT:
18932 /* We never have to worry about the amount being other than a
18933 power of 2, since this case can never be reloaded from a reg. */
18934 if (!CONST_INT_P (XEXP (op, 1)))
18936 output_operand_lossage ("invalid shift operand");
18937 return NULL;
18940 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
18942 /* Amount must be a power of two. */
18943 if (*amountp & (*amountp - 1))
18945 output_operand_lossage ("invalid shift operand");
18946 return NULL;
18949 *amountp = exact_log2 (*amountp);
18950 gcc_assert (IN_RANGE (*amountp, 0, 31));
18951 return ARM_LSL_NAME;
18953 default:
18954 output_operand_lossage ("invalid shift operand");
18955 return NULL;
18958 /* This is not 100% correct, but follows from the desire to merge
18959 multiplication by a power of 2 with the recognizer for a
18960 shift. >=32 is not a valid shift for "lsl", so we must try and
18961 output a shift that produces the correct arithmetical result.
18962 Using lsr #32 is identical except for the fact that the carry bit
18963 is not set correctly if we set the flags; but we never use the
18964 carry bit from such an operation, so we can ignore that. */
18965 if (code == ROTATERT)
18966 /* Rotate is just modulo 32. */
18967 *amountp &= 31;
18968 else if (*amountp != (*amountp & 31))
18970 if (code == ASHIFT)
18971 mnem = "lsr";
18972 *amountp = 32;
18975 /* Shifts of 0 are no-ops. */
18976 if (*amountp == 0)
18977 return NULL;
18979 return mnem;
18982 /* Output a .ascii pseudo-op, keeping track of lengths. This is
18983 because /bin/as is horribly restrictive. The judgement about
18984 whether or not each character is 'printable' (and can be output as
18985 is) or not (and must be printed with an octal escape) must be made
18986 with reference to the *host* character set -- the situation is
18987 similar to that discussed in the comments above pp_c_char in
18988 c-pretty-print.c. */
18990 #define MAX_ASCII_LEN 51
18992 void
18993 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
18995 int i;
18996 int len_so_far = 0;
18998 fputs ("\t.ascii\t\"", stream);
19000 for (i = 0; i < len; i++)
19002 int c = p[i];
19004 if (len_so_far >= MAX_ASCII_LEN)
19006 fputs ("\"\n\t.ascii\t\"", stream);
19007 len_so_far = 0;
19010 if (ISPRINT (c))
19012 if (c == '\\' || c == '\"')
19014 putc ('\\', stream);
19015 len_so_far++;
19017 putc (c, stream);
19018 len_so_far++;
19020 else
19022 fprintf (stream, "\\%03o", c);
19023 len_so_far += 4;
19027 fputs ("\"\n", stream);
19030 /* Whether a register is callee saved or not. This is necessary because high
19031 registers are marked as caller saved when optimizing for size on Thumb-1
19032 targets despite being callee saved in order to avoid using them. */
19033 #define callee_saved_reg_p(reg) \
19034 (!call_used_regs[reg] \
19035 || (TARGET_THUMB1 && optimize_size \
19036 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
19038 /* Compute the register save mask for registers 0 through 12
19039 inclusive. This code is used by arm_compute_save_core_reg_mask (). */
19041 static unsigned long
19042 arm_compute_save_reg0_reg12_mask (void)
19044 unsigned long func_type = arm_current_func_type ();
19045 unsigned long save_reg_mask = 0;
19046 unsigned int reg;
19048 if (IS_INTERRUPT (func_type))
19050 unsigned int max_reg;
19051 /* Interrupt functions must not corrupt any registers,
19052 even call clobbered ones. If this is a leaf function
19053 we can just examine the registers used by the RTL, but
19054 otherwise we have to assume that whatever function is
19055 called might clobber anything, and so we have to save
19056 all the call-clobbered registers as well. */
19057 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
19058 /* FIQ handlers have registers r8 - r12 banked, so
19059 we only need to check r0 - r7, Normal ISRs only
19060 bank r14 and r15, so we must check up to r12.
19061 r13 is the stack pointer which is always preserved,
19062 so we do not need to consider it here. */
19063 max_reg = 7;
19064 else
19065 max_reg = 12;
19067 for (reg = 0; reg <= max_reg; reg++)
19068 if (df_regs_ever_live_p (reg)
19069 || (! crtl->is_leaf && call_used_regs[reg]))
19070 save_reg_mask |= (1 << reg);
19072 /* Also save the pic base register if necessary. */
19073 if (flag_pic
19074 && !TARGET_SINGLE_PIC_BASE
19075 && arm_pic_register != INVALID_REGNUM
19076 && crtl->uses_pic_offset_table)
19077 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19079 else if (IS_VOLATILE(func_type))
19081 /* For noreturn functions we historically omitted register saves
19082 altogether. However this really messes up debugging. As a
19083 compromise save just the frame pointers. Combined with the link
19084 register saved elsewhere this should be sufficient to get
19085 a backtrace. */
19086 if (frame_pointer_needed)
19087 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19088 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
19089 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19090 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
19091 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
19093 else
19095 /* In the normal case we only need to save those registers
19096 which are call saved and which are used by this function. */
19097 for (reg = 0; reg <= 11; reg++)
19098 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19099 save_reg_mask |= (1 << reg);
19101 /* Handle the frame pointer as a special case. */
19102 if (frame_pointer_needed)
19103 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19105 /* If we aren't loading the PIC register,
19106 don't stack it even though it may be live. */
19107 if (flag_pic
19108 && !TARGET_SINGLE_PIC_BASE
19109 && arm_pic_register != INVALID_REGNUM
19110 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19111 || crtl->uses_pic_offset_table))
19112 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19114 /* The prologue will copy SP into R0, so save it. */
19115 if (IS_STACKALIGN (func_type))
19116 save_reg_mask |= 1;
19119 /* Save registers so the exception handler can modify them. */
19120 if (crtl->calls_eh_return)
19122 unsigned int i;
19124 for (i = 0; ; i++)
19126 reg = EH_RETURN_DATA_REGNO (i);
19127 if (reg == INVALID_REGNUM)
19128 break;
19129 save_reg_mask |= 1 << reg;
19133 return save_reg_mask;
19136 /* Return true if r3 is live at the start of the function. */
19138 static bool
19139 arm_r3_live_at_start_p (void)
19141 /* Just look at cfg info, which is still close enough to correct at this
19142 point. This gives false positives for broken functions that might use
19143 uninitialized data that happens to be allocated in r3, but who cares? */
19144 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19147 /* Compute the number of bytes used to store the static chain register on the
19148 stack, above the stack frame. We need to know this accurately to get the
19149 alignment of the rest of the stack frame correct. */
19151 static int
19152 arm_compute_static_chain_stack_bytes (void)
19154 /* See the defining assertion in arm_expand_prologue. */
19155 if (IS_NESTED (arm_current_func_type ())
19156 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19157 || (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
19158 && !df_regs_ever_live_p (LR_REGNUM)))
19159 && arm_r3_live_at_start_p ()
19160 && crtl->args.pretend_args_size == 0)
19161 return 4;
19163 return 0;
19166 /* Compute a bit mask of which core registers need to be
19167 saved on the stack for the current function.
19168 This is used by arm_compute_frame_layout, which may add extra registers. */
19170 static unsigned long
19171 arm_compute_save_core_reg_mask (void)
19173 unsigned int save_reg_mask = 0;
19174 unsigned long func_type = arm_current_func_type ();
19175 unsigned int reg;
19177 if (IS_NAKED (func_type))
19178 /* This should never really happen. */
19179 return 0;
19181 /* If we are creating a stack frame, then we must save the frame pointer,
19182 IP (which will hold the old stack pointer), LR and the PC. */
19183 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19184 save_reg_mask |=
19185 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19186 | (1 << IP_REGNUM)
19187 | (1 << LR_REGNUM)
19188 | (1 << PC_REGNUM);
19190 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19192 /* Decide if we need to save the link register.
19193 Interrupt routines have their own banked link register,
19194 so they never need to save it.
19195 Otherwise if we do not use the link register we do not need to save
19196 it. If we are pushing other registers onto the stack however, we
19197 can save an instruction in the epilogue by pushing the link register
19198 now and then popping it back into the PC. This incurs extra memory
19199 accesses though, so we only do it when optimizing for size, and only
19200 if we know that we will not need a fancy return sequence. */
19201 if (df_regs_ever_live_p (LR_REGNUM)
19202 || (save_reg_mask
19203 && optimize_size
19204 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19205 && !crtl->tail_call_emit
19206 && !crtl->calls_eh_return))
19207 save_reg_mask |= 1 << LR_REGNUM;
19209 if (cfun->machine->lr_save_eliminated)
19210 save_reg_mask &= ~ (1 << LR_REGNUM);
19212 if (TARGET_REALLY_IWMMXT
19213 && ((bit_count (save_reg_mask)
19214 + ARM_NUM_INTS (crtl->args.pretend_args_size +
19215 arm_compute_static_chain_stack_bytes())
19216 ) % 2) != 0)
19218 /* The total number of registers that are going to be pushed
19219 onto the stack is odd. We need to ensure that the stack
19220 is 64-bit aligned before we start to save iWMMXt registers,
19221 and also before we start to create locals. (A local variable
19222 might be a double or long long which we will load/store using
19223 an iWMMXt instruction). Therefore we need to push another
19224 ARM register, so that the stack will be 64-bit aligned. We
19225 try to avoid using the arg registers (r0 -r3) as they might be
19226 used to pass values in a tail call. */
19227 for (reg = 4; reg <= 12; reg++)
19228 if ((save_reg_mask & (1 << reg)) == 0)
19229 break;
19231 if (reg <= 12)
19232 save_reg_mask |= (1 << reg);
19233 else
19235 cfun->machine->sibcall_blocked = 1;
19236 save_reg_mask |= (1 << 3);
19240 /* We may need to push an additional register for use initializing the
19241 PIC base register. */
19242 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19243 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19245 reg = thumb_find_work_register (1 << 4);
19246 if (!call_used_regs[reg])
19247 save_reg_mask |= (1 << reg);
19250 return save_reg_mask;
19253 /* Compute a bit mask of which core registers need to be
19254 saved on the stack for the current function. */
19255 static unsigned long
19256 thumb1_compute_save_core_reg_mask (void)
19258 unsigned long mask;
19259 unsigned reg;
19261 mask = 0;
19262 for (reg = 0; reg < 12; reg ++)
19263 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19264 mask |= 1 << reg;
19266 /* Handle the frame pointer as a special case. */
19267 if (frame_pointer_needed)
19268 mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19270 if (flag_pic
19271 && !TARGET_SINGLE_PIC_BASE
19272 && arm_pic_register != INVALID_REGNUM
19273 && crtl->uses_pic_offset_table)
19274 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19276 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19277 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19278 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19280 /* LR will also be pushed if any lo regs are pushed. */
19281 if (mask & 0xff || thumb_force_lr_save ())
19282 mask |= (1 << LR_REGNUM);
19284 /* Make sure we have a low work register if we need one.
19285 We will need one if we are going to push a high register,
19286 but we are not currently intending to push a low register. */
19287 if ((mask & 0xff) == 0
19288 && ((mask & 0x0f00) || TARGET_BACKTRACE))
19290 /* Use thumb_find_work_register to choose which register
19291 we will use. If the register is live then we will
19292 have to push it. Use LAST_LO_REGNUM as our fallback
19293 choice for the register to select. */
19294 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19295 /* Make sure the register returned by thumb_find_work_register is
19296 not part of the return value. */
19297 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19298 reg = LAST_LO_REGNUM;
19300 if (callee_saved_reg_p (reg))
19301 mask |= 1 << reg;
19304 /* The 504 below is 8 bytes less than 512 because there are two possible
19305 alignment words. We can't tell here if they will be present or not so we
19306 have to play it safe and assume that they are. */
19307 if ((CALLER_INTERWORKING_SLOT_SIZE +
19308 ROUND_UP_WORD (get_frame_size ()) +
19309 crtl->outgoing_args_size) >= 504)
19311 /* This is the same as the code in thumb1_expand_prologue() which
19312 determines which register to use for stack decrement. */
19313 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19314 if (mask & (1 << reg))
19315 break;
19317 if (reg > LAST_LO_REGNUM)
19319 /* Make sure we have a register available for stack decrement. */
19320 mask |= 1 << LAST_LO_REGNUM;
19324 return mask;
19328 /* Return the number of bytes required to save VFP registers. */
19329 static int
19330 arm_get_vfp_saved_size (void)
19332 unsigned int regno;
19333 int count;
19334 int saved;
19336 saved = 0;
19337 /* Space for saved VFP registers. */
19338 if (TARGET_HARD_FLOAT)
19340 count = 0;
19341 for (regno = FIRST_VFP_REGNUM;
19342 regno < LAST_VFP_REGNUM;
19343 regno += 2)
19345 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19346 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19348 if (count > 0)
19350 /* Workaround ARM10 VFPr1 bug. */
19351 if (count == 2 && !arm_arch6)
19352 count++;
19353 saved += count * 8;
19355 count = 0;
19357 else
19358 count++;
19360 if (count > 0)
19362 if (count == 2 && !arm_arch6)
19363 count++;
19364 saved += count * 8;
19367 return saved;
19371 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19372 everything bar the final return instruction. If simple_return is true,
19373 then do not output epilogue, because it has already been emitted in RTL. */
19374 const char *
19375 output_return_instruction (rtx operand, bool really_return, bool reverse,
19376 bool simple_return)
19378 char conditional[10];
19379 char instr[100];
19380 unsigned reg;
19381 unsigned long live_regs_mask;
19382 unsigned long func_type;
19383 arm_stack_offsets *offsets;
19385 func_type = arm_current_func_type ();
19387 if (IS_NAKED (func_type))
19388 return "";
19390 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19392 /* If this function was declared non-returning, and we have
19393 found a tail call, then we have to trust that the called
19394 function won't return. */
19395 if (really_return)
19397 rtx ops[2];
19399 /* Otherwise, trap an attempted return by aborting. */
19400 ops[0] = operand;
19401 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19402 : "abort");
19403 assemble_external_libcall (ops[1]);
19404 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19407 return "";
19410 gcc_assert (!cfun->calls_alloca || really_return);
19412 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19414 cfun->machine->return_used_this_function = 1;
19416 offsets = arm_get_frame_offsets ();
19417 live_regs_mask = offsets->saved_regs_mask;
19419 if (!simple_return && live_regs_mask)
19421 const char * return_reg;
19423 /* If we do not have any special requirements for function exit
19424 (e.g. interworking) then we can load the return address
19425 directly into the PC. Otherwise we must load it into LR. */
19426 if (really_return
19427 && !IS_CMSE_ENTRY (func_type)
19428 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19429 return_reg = reg_names[PC_REGNUM];
19430 else
19431 return_reg = reg_names[LR_REGNUM];
19433 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19435 /* There are three possible reasons for the IP register
19436 being saved. 1) a stack frame was created, in which case
19437 IP contains the old stack pointer, or 2) an ISR routine
19438 corrupted it, or 3) it was saved to align the stack on
19439 iWMMXt. In case 1, restore IP into SP, otherwise just
19440 restore IP. */
19441 if (frame_pointer_needed)
19443 live_regs_mask &= ~ (1 << IP_REGNUM);
19444 live_regs_mask |= (1 << SP_REGNUM);
19446 else
19447 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19450 /* On some ARM architectures it is faster to use LDR rather than
19451 LDM to load a single register. On other architectures, the
19452 cost is the same. In 26 bit mode, or for exception handlers,
19453 we have to use LDM to load the PC so that the CPSR is also
19454 restored. */
19455 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19456 if (live_regs_mask == (1U << reg))
19457 break;
19459 if (reg <= LAST_ARM_REGNUM
19460 && (reg != LR_REGNUM
19461 || ! really_return
19462 || ! IS_INTERRUPT (func_type)))
19464 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19465 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19467 else
19469 char *p;
19470 int first = 1;
19472 /* Generate the load multiple instruction to restore the
19473 registers. Note we can get here, even if
19474 frame_pointer_needed is true, but only if sp already
19475 points to the base of the saved core registers. */
19476 if (live_regs_mask & (1 << SP_REGNUM))
19478 unsigned HOST_WIDE_INT stack_adjust;
19480 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19481 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19483 if (stack_adjust && arm_arch5 && TARGET_ARM)
19484 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19485 else
19487 /* If we can't use ldmib (SA110 bug),
19488 then try to pop r3 instead. */
19489 if (stack_adjust)
19490 live_regs_mask |= 1 << 3;
19492 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19495 /* For interrupt returns we have to use an LDM rather than
19496 a POP so that we can use the exception return variant. */
19497 else if (IS_INTERRUPT (func_type))
19498 sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
19499 else
19500 sprintf (instr, "pop%s\t{", conditional);
19502 p = instr + strlen (instr);
19504 for (reg = 0; reg <= SP_REGNUM; reg++)
19505 if (live_regs_mask & (1 << reg))
19507 int l = strlen (reg_names[reg]);
19509 if (first)
19510 first = 0;
19511 else
19513 memcpy (p, ", ", 2);
19514 p += 2;
19517 memcpy (p, "%|", 2);
19518 memcpy (p + 2, reg_names[reg], l);
19519 p += l + 2;
19522 if (live_regs_mask & (1 << LR_REGNUM))
19524 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19525 /* If returning from an interrupt, restore the CPSR. */
19526 if (IS_INTERRUPT (func_type))
19527 strcat (p, "^");
19529 else
19530 strcpy (p, "}");
19533 output_asm_insn (instr, & operand);
19535 /* See if we need to generate an extra instruction to
19536 perform the actual function return. */
19537 if (really_return
19538 && func_type != ARM_FT_INTERWORKED
19539 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19541 /* The return has already been handled
19542 by loading the LR into the PC. */
19543 return "";
19547 if (really_return)
19549 switch ((int) ARM_FUNC_TYPE (func_type))
19551 case ARM_FT_ISR:
19552 case ARM_FT_FIQ:
19553 /* ??? This is wrong for unified assembly syntax. */
19554 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19555 break;
19557 case ARM_FT_INTERWORKED:
19558 gcc_assert (arm_arch5 || arm_arch4t);
19559 sprintf (instr, "bx%s\t%%|lr", conditional);
19560 break;
19562 case ARM_FT_EXCEPTION:
19563 /* ??? This is wrong for unified assembly syntax. */
19564 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19565 break;
19567 default:
19568 if (IS_CMSE_ENTRY (func_type))
19570 /* Check if we have to clear the 'GE bits' which is only used if
19571 parallel add and subtraction instructions are available. */
19572 if (TARGET_INT_SIMD)
19573 snprintf (instr, sizeof (instr),
19574 "msr%s\tAPSR_nzcvqg, %%|lr", conditional);
19575 else
19576 snprintf (instr, sizeof (instr),
19577 "msr%s\tAPSR_nzcvq, %%|lr", conditional);
19579 output_asm_insn (instr, & operand);
19580 if (TARGET_HARD_FLOAT && !TARGET_THUMB1)
19582 /* Clear the cumulative exception-status bits (0-4,7) and the
19583 condition code bits (28-31) of the FPSCR. We need to
19584 remember to clear the first scratch register used (IP) and
19585 save and restore the second (r4). */
19586 snprintf (instr, sizeof (instr), "push\t{%%|r4}");
19587 output_asm_insn (instr, & operand);
19588 snprintf (instr, sizeof (instr), "vmrs\t%%|ip, fpscr");
19589 output_asm_insn (instr, & operand);
19590 snprintf (instr, sizeof (instr), "movw\t%%|r4, #65376");
19591 output_asm_insn (instr, & operand);
19592 snprintf (instr, sizeof (instr), "movt\t%%|r4, #4095");
19593 output_asm_insn (instr, & operand);
19594 snprintf (instr, sizeof (instr), "and\t%%|ip, %%|r4");
19595 output_asm_insn (instr, & operand);
19596 snprintf (instr, sizeof (instr), "vmsr\tfpscr, %%|ip");
19597 output_asm_insn (instr, & operand);
19598 snprintf (instr, sizeof (instr), "pop\t{%%|r4}");
19599 output_asm_insn (instr, & operand);
19600 snprintf (instr, sizeof (instr), "mov\t%%|ip, %%|lr");
19601 output_asm_insn (instr, & operand);
19603 snprintf (instr, sizeof (instr), "bxns\t%%|lr");
19605 /* Use bx if it's available. */
19606 else if (arm_arch5 || arm_arch4t)
19607 sprintf (instr, "bx%s\t%%|lr", conditional);
19608 else
19609 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19610 break;
19613 output_asm_insn (instr, & operand);
19616 return "";
19619 /* Output in FILE asm statements needed to declare the NAME of the function
19620 defined by its DECL node. */
19622 void
19623 arm_asm_declare_function_name (FILE *file, const char *name, tree decl)
19625 size_t cmse_name_len;
19626 char *cmse_name = 0;
19627 char cmse_prefix[] = "__acle_se_";
19629 /* When compiling with ARMv8-M Security Extensions enabled, we should print an
19630 extra function label for each function with the 'cmse_nonsecure_entry'
19631 attribute. This extra function label should be prepended with
19632 '__acle_se_', telling the linker that it needs to create secure gateway
19633 veneers for this function. */
19634 if (use_cmse && lookup_attribute ("cmse_nonsecure_entry",
19635 DECL_ATTRIBUTES (decl)))
19637 cmse_name_len = sizeof (cmse_prefix) + strlen (name);
19638 cmse_name = XALLOCAVEC (char, cmse_name_len);
19639 snprintf (cmse_name, cmse_name_len, "%s%s", cmse_prefix, name);
19640 targetm.asm_out.globalize_label (file, cmse_name);
19642 ARM_DECLARE_FUNCTION_NAME (file, cmse_name, decl);
19643 ASM_OUTPUT_TYPE_DIRECTIVE (file, cmse_name, "function");
19646 ARM_DECLARE_FUNCTION_NAME (file, name, decl);
19647 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
19648 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
19649 ASM_OUTPUT_LABEL (file, name);
19651 if (cmse_name)
19652 ASM_OUTPUT_LABEL (file, cmse_name);
19654 ARM_OUTPUT_FN_UNWIND (file, TRUE);
19657 /* Write the function name into the code section, directly preceding
19658 the function prologue.
19660 Code will be output similar to this:
19662 .ascii "arm_poke_function_name", 0
19663 .align
19665 .word 0xff000000 + (t1 - t0)
19666 arm_poke_function_name
19667 mov ip, sp
19668 stmfd sp!, {fp, ip, lr, pc}
19669 sub fp, ip, #4
19671 When performing a stack backtrace, code can inspect the value
19672 of 'pc' stored at 'fp' + 0. If the trace function then looks
19673 at location pc - 12 and the top 8 bits are set, then we know
19674 that there is a function name embedded immediately preceding this
19675 location and has length ((pc[-3]) & 0xff000000).
19677 We assume that pc is declared as a pointer to an unsigned long.
19679 It is of no benefit to output the function name if we are assembling
19680 a leaf function. These function types will not contain a stack
19681 backtrace structure, therefore it is not possible to determine the
19682 function name. */
19683 void
19684 arm_poke_function_name (FILE *stream, const char *name)
19686 unsigned long alignlength;
19687 unsigned long length;
19688 rtx x;
19690 length = strlen (name) + 1;
19691 alignlength = ROUND_UP_WORD (length);
19693 ASM_OUTPUT_ASCII (stream, name, length);
19694 ASM_OUTPUT_ALIGN (stream, 2);
19695 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19696 assemble_aligned_integer (UNITS_PER_WORD, x);
19699 /* Place some comments into the assembler stream
19700 describing the current function. */
19701 static void
19702 arm_output_function_prologue (FILE *f)
19704 unsigned long func_type;
19706 /* Sanity check. */
19707 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19709 func_type = arm_current_func_type ();
19711 switch ((int) ARM_FUNC_TYPE (func_type))
19713 default:
19714 case ARM_FT_NORMAL:
19715 break;
19716 case ARM_FT_INTERWORKED:
19717 asm_fprintf (f, "\t%@ Function supports interworking.\n");
19718 break;
19719 case ARM_FT_ISR:
19720 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19721 break;
19722 case ARM_FT_FIQ:
19723 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19724 break;
19725 case ARM_FT_EXCEPTION:
19726 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19727 break;
19730 if (IS_NAKED (func_type))
19731 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19733 if (IS_VOLATILE (func_type))
19734 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19736 if (IS_NESTED (func_type))
19737 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19738 if (IS_STACKALIGN (func_type))
19739 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19740 if (IS_CMSE_ENTRY (func_type))
19741 asm_fprintf (f, "\t%@ Non-secure entry function: called from non-secure code.\n");
19743 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19744 crtl->args.size,
19745 crtl->args.pretend_args_size,
19746 (HOST_WIDE_INT) get_frame_size ());
19748 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19749 frame_pointer_needed,
19750 cfun->machine->uses_anonymous_args);
19752 if (cfun->machine->lr_save_eliminated)
19753 asm_fprintf (f, "\t%@ link register save eliminated.\n");
19755 if (crtl->calls_eh_return)
19756 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19760 static void
19761 arm_output_function_epilogue (FILE *)
19763 arm_stack_offsets *offsets;
19765 if (TARGET_THUMB1)
19767 int regno;
19769 /* Emit any call-via-reg trampolines that are needed for v4t support
19770 of call_reg and call_value_reg type insns. */
19771 for (regno = 0; regno < LR_REGNUM; regno++)
19773 rtx label = cfun->machine->call_via[regno];
19775 if (label != NULL)
19777 switch_to_section (function_section (current_function_decl));
19778 targetm.asm_out.internal_label (asm_out_file, "L",
19779 CODE_LABEL_NUMBER (label));
19780 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19784 /* ??? Probably not safe to set this here, since it assumes that a
19785 function will be emitted as assembly immediately after we generate
19786 RTL for it. This does not happen for inline functions. */
19787 cfun->machine->return_used_this_function = 0;
19789 else /* TARGET_32BIT */
19791 /* We need to take into account any stack-frame rounding. */
19792 offsets = arm_get_frame_offsets ();
19794 gcc_assert (!use_return_insn (FALSE, NULL)
19795 || (cfun->machine->return_used_this_function != 0)
19796 || offsets->saved_regs == offsets->outgoing_args
19797 || frame_pointer_needed);
19801 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19802 STR and STRD. If an even number of registers are being pushed, one
19803 or more STRD patterns are created for each register pair. If an
19804 odd number of registers are pushed, emit an initial STR followed by
19805 as many STRD instructions as are needed. This works best when the
19806 stack is initially 64-bit aligned (the normal case), since it
19807 ensures that each STRD is also 64-bit aligned. */
19808 static void
19809 thumb2_emit_strd_push (unsigned long saved_regs_mask)
19811 int num_regs = 0;
19812 int i;
19813 int regno;
19814 rtx par = NULL_RTX;
19815 rtx dwarf = NULL_RTX;
19816 rtx tmp;
19817 bool first = true;
19819 num_regs = bit_count (saved_regs_mask);
19821 /* Must be at least one register to save, and can't save SP or PC. */
19822 gcc_assert (num_regs > 0 && num_regs <= 14);
19823 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19824 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19826 /* Create sequence for DWARF info. All the frame-related data for
19827 debugging is held in this wrapper. */
19828 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19830 /* Describe the stack adjustment. */
19831 tmp = gen_rtx_SET (stack_pointer_rtx,
19832 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19833 RTX_FRAME_RELATED_P (tmp) = 1;
19834 XVECEXP (dwarf, 0, 0) = tmp;
19836 /* Find the first register. */
19837 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19840 i = 0;
19842 /* If there's an odd number of registers to push. Start off by
19843 pushing a single register. This ensures that subsequent strd
19844 operations are dword aligned (assuming that SP was originally
19845 64-bit aligned). */
19846 if ((num_regs & 1) != 0)
19848 rtx reg, mem, insn;
19850 reg = gen_rtx_REG (SImode, regno);
19851 if (num_regs == 1)
19852 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19853 stack_pointer_rtx));
19854 else
19855 mem = gen_frame_mem (Pmode,
19856 gen_rtx_PRE_MODIFY
19857 (Pmode, stack_pointer_rtx,
19858 plus_constant (Pmode, stack_pointer_rtx,
19859 -4 * num_regs)));
19861 tmp = gen_rtx_SET (mem, reg);
19862 RTX_FRAME_RELATED_P (tmp) = 1;
19863 insn = emit_insn (tmp);
19864 RTX_FRAME_RELATED_P (insn) = 1;
19865 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19866 tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
19867 RTX_FRAME_RELATED_P (tmp) = 1;
19868 i++;
19869 regno++;
19870 XVECEXP (dwarf, 0, i) = tmp;
19871 first = false;
19874 while (i < num_regs)
19875 if (saved_regs_mask & (1 << regno))
19877 rtx reg1, reg2, mem1, mem2;
19878 rtx tmp0, tmp1, tmp2;
19879 int regno2;
19881 /* Find the register to pair with this one. */
19882 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
19883 regno2++)
19886 reg1 = gen_rtx_REG (SImode, regno);
19887 reg2 = gen_rtx_REG (SImode, regno2);
19889 if (first)
19891 rtx insn;
19893 first = false;
19894 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19895 stack_pointer_rtx,
19896 -4 * num_regs));
19897 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19898 stack_pointer_rtx,
19899 -4 * (num_regs - 1)));
19900 tmp0 = gen_rtx_SET (stack_pointer_rtx,
19901 plus_constant (Pmode, stack_pointer_rtx,
19902 -4 * (num_regs)));
19903 tmp1 = gen_rtx_SET (mem1, reg1);
19904 tmp2 = gen_rtx_SET (mem2, reg2);
19905 RTX_FRAME_RELATED_P (tmp0) = 1;
19906 RTX_FRAME_RELATED_P (tmp1) = 1;
19907 RTX_FRAME_RELATED_P (tmp2) = 1;
19908 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
19909 XVECEXP (par, 0, 0) = tmp0;
19910 XVECEXP (par, 0, 1) = tmp1;
19911 XVECEXP (par, 0, 2) = tmp2;
19912 insn = emit_insn (par);
19913 RTX_FRAME_RELATED_P (insn) = 1;
19914 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19916 else
19918 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19919 stack_pointer_rtx,
19920 4 * i));
19921 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19922 stack_pointer_rtx,
19923 4 * (i + 1)));
19924 tmp1 = gen_rtx_SET (mem1, reg1);
19925 tmp2 = gen_rtx_SET (mem2, reg2);
19926 RTX_FRAME_RELATED_P (tmp1) = 1;
19927 RTX_FRAME_RELATED_P (tmp2) = 1;
19928 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19929 XVECEXP (par, 0, 0) = tmp1;
19930 XVECEXP (par, 0, 1) = tmp2;
19931 emit_insn (par);
19934 /* Create unwind information. This is an approximation. */
19935 tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
19936 plus_constant (Pmode,
19937 stack_pointer_rtx,
19938 4 * i)),
19939 reg1);
19940 tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
19941 plus_constant (Pmode,
19942 stack_pointer_rtx,
19943 4 * (i + 1))),
19944 reg2);
19946 RTX_FRAME_RELATED_P (tmp1) = 1;
19947 RTX_FRAME_RELATED_P (tmp2) = 1;
19948 XVECEXP (dwarf, 0, i + 1) = tmp1;
19949 XVECEXP (dwarf, 0, i + 2) = tmp2;
19950 i += 2;
19951 regno = regno2 + 1;
19953 else
19954 regno++;
19956 return;
19959 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19960 whenever possible, otherwise it emits single-word stores. The first store
19961 also allocates stack space for all saved registers, using writeback with
19962 post-addressing mode. All other stores use offset addressing. If no STRD
19963 can be emitted, this function emits a sequence of single-word stores,
19964 and not an STM as before, because single-word stores provide more freedom
19965 scheduling and can be turned into an STM by peephole optimizations. */
19966 static void
19967 arm_emit_strd_push (unsigned long saved_regs_mask)
19969 int num_regs = 0;
19970 int i, j, dwarf_index = 0;
19971 int offset = 0;
19972 rtx dwarf = NULL_RTX;
19973 rtx insn = NULL_RTX;
19974 rtx tmp, mem;
19976 /* TODO: A more efficient code can be emitted by changing the
19977 layout, e.g., first push all pairs that can use STRD to keep the
19978 stack aligned, and then push all other registers. */
19979 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19980 if (saved_regs_mask & (1 << i))
19981 num_regs++;
19983 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19984 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19985 gcc_assert (num_regs > 0);
19987 /* Create sequence for DWARF info. */
19988 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19990 /* For dwarf info, we generate explicit stack update. */
19991 tmp = gen_rtx_SET (stack_pointer_rtx,
19992 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19993 RTX_FRAME_RELATED_P (tmp) = 1;
19994 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19996 /* Save registers. */
19997 offset = - 4 * num_regs;
19998 j = 0;
19999 while (j <= LAST_ARM_REGNUM)
20000 if (saved_regs_mask & (1 << j))
20002 if ((j % 2 == 0)
20003 && (saved_regs_mask & (1 << (j + 1))))
20005 /* Current register and previous register form register pair for
20006 which STRD can be generated. */
20007 if (offset < 0)
20009 /* Allocate stack space for all saved registers. */
20010 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20011 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20012 mem = gen_frame_mem (DImode, tmp);
20013 offset = 0;
20015 else if (offset > 0)
20016 mem = gen_frame_mem (DImode,
20017 plus_constant (Pmode,
20018 stack_pointer_rtx,
20019 offset));
20020 else
20021 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20023 tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
20024 RTX_FRAME_RELATED_P (tmp) = 1;
20025 tmp = emit_insn (tmp);
20027 /* Record the first store insn. */
20028 if (dwarf_index == 1)
20029 insn = tmp;
20031 /* Generate dwarf info. */
20032 mem = gen_frame_mem (SImode,
20033 plus_constant (Pmode,
20034 stack_pointer_rtx,
20035 offset));
20036 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20037 RTX_FRAME_RELATED_P (tmp) = 1;
20038 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20040 mem = gen_frame_mem (SImode,
20041 plus_constant (Pmode,
20042 stack_pointer_rtx,
20043 offset + 4));
20044 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
20045 RTX_FRAME_RELATED_P (tmp) = 1;
20046 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20048 offset += 8;
20049 j += 2;
20051 else
20053 /* Emit a single word store. */
20054 if (offset < 0)
20056 /* Allocate stack space for all saved registers. */
20057 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20058 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20059 mem = gen_frame_mem (SImode, tmp);
20060 offset = 0;
20062 else if (offset > 0)
20063 mem = gen_frame_mem (SImode,
20064 plus_constant (Pmode,
20065 stack_pointer_rtx,
20066 offset));
20067 else
20068 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20070 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20071 RTX_FRAME_RELATED_P (tmp) = 1;
20072 tmp = emit_insn (tmp);
20074 /* Record the first store insn. */
20075 if (dwarf_index == 1)
20076 insn = tmp;
20078 /* Generate dwarf info. */
20079 mem = gen_frame_mem (SImode,
20080 plus_constant(Pmode,
20081 stack_pointer_rtx,
20082 offset));
20083 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20084 RTX_FRAME_RELATED_P (tmp) = 1;
20085 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20087 offset += 4;
20088 j += 1;
20091 else
20092 j++;
20094 /* Attach dwarf info to the first insn we generate. */
20095 gcc_assert (insn != NULL_RTX);
20096 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20097 RTX_FRAME_RELATED_P (insn) = 1;
20100 /* Generate and emit an insn that we will recognize as a push_multi.
20101 Unfortunately, since this insn does not reflect very well the actual
20102 semantics of the operation, we need to annotate the insn for the benefit
20103 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
20104 MASK for registers that should be annotated for DWARF2 frame unwind
20105 information. */
20106 static rtx
20107 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
20109 int num_regs = 0;
20110 int num_dwarf_regs = 0;
20111 int i, j;
20112 rtx par;
20113 rtx dwarf;
20114 int dwarf_par_index;
20115 rtx tmp, reg;
20117 /* We don't record the PC in the dwarf frame information. */
20118 dwarf_regs_mask &= ~(1 << PC_REGNUM);
20120 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20122 if (mask & (1 << i))
20123 num_regs++;
20124 if (dwarf_regs_mask & (1 << i))
20125 num_dwarf_regs++;
20128 gcc_assert (num_regs && num_regs <= 16);
20129 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
20131 /* For the body of the insn we are going to generate an UNSPEC in
20132 parallel with several USEs. This allows the insn to be recognized
20133 by the push_multi pattern in the arm.md file.
20135 The body of the insn looks something like this:
20137 (parallel [
20138 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20139 (const_int:SI <num>)))
20140 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20141 (use (reg:SI XX))
20142 (use (reg:SI YY))
20146 For the frame note however, we try to be more explicit and actually
20147 show each register being stored into the stack frame, plus a (single)
20148 decrement of the stack pointer. We do it this way in order to be
20149 friendly to the stack unwinding code, which only wants to see a single
20150 stack decrement per instruction. The RTL we generate for the note looks
20151 something like this:
20153 (sequence [
20154 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20155 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20156 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20157 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20161 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20162 instead we'd have a parallel expression detailing all
20163 the stores to the various memory addresses so that debug
20164 information is more up-to-date. Remember however while writing
20165 this to take care of the constraints with the push instruction.
20167 Note also that this has to be taken care of for the VFP registers.
20169 For more see PR43399. */
20171 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20172 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20173 dwarf_par_index = 1;
20175 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20177 if (mask & (1 << i))
20179 reg = gen_rtx_REG (SImode, i);
20181 XVECEXP (par, 0, 0)
20182 = gen_rtx_SET (gen_frame_mem
20183 (BLKmode,
20184 gen_rtx_PRE_MODIFY (Pmode,
20185 stack_pointer_rtx,
20186 plus_constant
20187 (Pmode, stack_pointer_rtx,
20188 -4 * num_regs))
20190 gen_rtx_UNSPEC (BLKmode,
20191 gen_rtvec (1, reg),
20192 UNSPEC_PUSH_MULT));
20194 if (dwarf_regs_mask & (1 << i))
20196 tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
20197 reg);
20198 RTX_FRAME_RELATED_P (tmp) = 1;
20199 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20202 break;
20206 for (j = 1, i++; j < num_regs; i++)
20208 if (mask & (1 << i))
20210 reg = gen_rtx_REG (SImode, i);
20212 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20214 if (dwarf_regs_mask & (1 << i))
20217 = gen_rtx_SET (gen_frame_mem
20218 (SImode,
20219 plus_constant (Pmode, stack_pointer_rtx,
20220 4 * j)),
20221 reg);
20222 RTX_FRAME_RELATED_P (tmp) = 1;
20223 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20226 j++;
20230 par = emit_insn (par);
20232 tmp = gen_rtx_SET (stack_pointer_rtx,
20233 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20234 RTX_FRAME_RELATED_P (tmp) = 1;
20235 XVECEXP (dwarf, 0, 0) = tmp;
20237 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20239 return par;
20242 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20243 SIZE is the offset to be adjusted.
20244 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20245 static void
20246 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20248 rtx dwarf;
20250 RTX_FRAME_RELATED_P (insn) = 1;
20251 dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
20252 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20255 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20256 SAVED_REGS_MASK shows which registers need to be restored.
20258 Unfortunately, since this insn does not reflect very well the actual
20259 semantics of the operation, we need to annotate the insn for the benefit
20260 of DWARF2 frame unwind information. */
20261 static void
20262 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20264 int num_regs = 0;
20265 int i, j;
20266 rtx par;
20267 rtx dwarf = NULL_RTX;
20268 rtx tmp, reg;
20269 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20270 int offset_adj;
20271 int emit_update;
20273 offset_adj = return_in_pc ? 1 : 0;
20274 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20275 if (saved_regs_mask & (1 << i))
20276 num_regs++;
20278 gcc_assert (num_regs && num_regs <= 16);
20280 /* If SP is in reglist, then we don't emit SP update insn. */
20281 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20283 /* The parallel needs to hold num_regs SETs
20284 and one SET for the stack update. */
20285 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20287 if (return_in_pc)
20288 XVECEXP (par, 0, 0) = ret_rtx;
20290 if (emit_update)
20292 /* Increment the stack pointer, based on there being
20293 num_regs 4-byte registers to restore. */
20294 tmp = gen_rtx_SET (stack_pointer_rtx,
20295 plus_constant (Pmode,
20296 stack_pointer_rtx,
20297 4 * num_regs));
20298 RTX_FRAME_RELATED_P (tmp) = 1;
20299 XVECEXP (par, 0, offset_adj) = tmp;
20302 /* Now restore every reg, which may include PC. */
20303 for (j = 0, i = 0; j < num_regs; i++)
20304 if (saved_regs_mask & (1 << i))
20306 reg = gen_rtx_REG (SImode, i);
20307 if ((num_regs == 1) && emit_update && !return_in_pc)
20309 /* Emit single load with writeback. */
20310 tmp = gen_frame_mem (SImode,
20311 gen_rtx_POST_INC (Pmode,
20312 stack_pointer_rtx));
20313 tmp = emit_insn (gen_rtx_SET (reg, tmp));
20314 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20315 return;
20318 tmp = gen_rtx_SET (reg,
20319 gen_frame_mem
20320 (SImode,
20321 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20322 RTX_FRAME_RELATED_P (tmp) = 1;
20323 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20325 /* We need to maintain a sequence for DWARF info too. As dwarf info
20326 should not have PC, skip PC. */
20327 if (i != PC_REGNUM)
20328 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20330 j++;
20333 if (return_in_pc)
20334 par = emit_jump_insn (par);
20335 else
20336 par = emit_insn (par);
20338 REG_NOTES (par) = dwarf;
20339 if (!return_in_pc)
20340 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20341 stack_pointer_rtx, stack_pointer_rtx);
20344 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20345 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20347 Unfortunately, since this insn does not reflect very well the actual
20348 semantics of the operation, we need to annotate the insn for the benefit
20349 of DWARF2 frame unwind information. */
20350 static void
20351 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20353 int i, j;
20354 rtx par;
20355 rtx dwarf = NULL_RTX;
20356 rtx tmp, reg;
20358 gcc_assert (num_regs && num_regs <= 32);
20360 /* Workaround ARM10 VFPr1 bug. */
20361 if (num_regs == 2 && !arm_arch6)
20363 if (first_reg == 15)
20364 first_reg--;
20366 num_regs++;
20369 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20370 there could be up to 32 D-registers to restore.
20371 If there are more than 16 D-registers, make two recursive calls,
20372 each of which emits one pop_multi instruction. */
20373 if (num_regs > 16)
20375 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20376 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20377 return;
20380 /* The parallel needs to hold num_regs SETs
20381 and one SET for the stack update. */
20382 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20384 /* Increment the stack pointer, based on there being
20385 num_regs 8-byte registers to restore. */
20386 tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
20387 RTX_FRAME_RELATED_P (tmp) = 1;
20388 XVECEXP (par, 0, 0) = tmp;
20390 /* Now show every reg that will be restored, using a SET for each. */
20391 for (j = 0, i=first_reg; j < num_regs; i += 2)
20393 reg = gen_rtx_REG (DFmode, i);
20395 tmp = gen_rtx_SET (reg,
20396 gen_frame_mem
20397 (DFmode,
20398 plus_constant (Pmode, base_reg, 8 * j)));
20399 RTX_FRAME_RELATED_P (tmp) = 1;
20400 XVECEXP (par, 0, j + 1) = tmp;
20402 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20404 j++;
20407 par = emit_insn (par);
20408 REG_NOTES (par) = dwarf;
20410 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20411 if (REGNO (base_reg) == IP_REGNUM)
20413 RTX_FRAME_RELATED_P (par) = 1;
20414 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20416 else
20417 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20418 base_reg, base_reg);
20421 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20422 number of registers are being popped, multiple LDRD patterns are created for
20423 all register pairs. If odd number of registers are popped, last register is
20424 loaded by using LDR pattern. */
20425 static void
20426 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20428 int num_regs = 0;
20429 int i, j;
20430 rtx par = NULL_RTX;
20431 rtx dwarf = NULL_RTX;
20432 rtx tmp, reg, tmp1;
20433 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20435 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20436 if (saved_regs_mask & (1 << i))
20437 num_regs++;
20439 gcc_assert (num_regs && num_regs <= 16);
20441 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20442 to be popped. So, if num_regs is even, now it will become odd,
20443 and we can generate pop with PC. If num_regs is odd, it will be
20444 even now, and ldr with return can be generated for PC. */
20445 if (return_in_pc)
20446 num_regs--;
20448 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20450 /* Var j iterates over all the registers to gather all the registers in
20451 saved_regs_mask. Var i gives index of saved registers in stack frame.
20452 A PARALLEL RTX of register-pair is created here, so that pattern for
20453 LDRD can be matched. As PC is always last register to be popped, and
20454 we have already decremented num_regs if PC, we don't have to worry
20455 about PC in this loop. */
20456 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20457 if (saved_regs_mask & (1 << j))
20459 /* Create RTX for memory load. */
20460 reg = gen_rtx_REG (SImode, j);
20461 tmp = gen_rtx_SET (reg,
20462 gen_frame_mem (SImode,
20463 plus_constant (Pmode,
20464 stack_pointer_rtx, 4 * i)));
20465 RTX_FRAME_RELATED_P (tmp) = 1;
20467 if (i % 2 == 0)
20469 /* When saved-register index (i) is even, the RTX to be emitted is
20470 yet to be created. Hence create it first. The LDRD pattern we
20471 are generating is :
20472 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20473 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20474 where target registers need not be consecutive. */
20475 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20476 dwarf = NULL_RTX;
20479 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20480 added as 0th element and if i is odd, reg_i is added as 1st element
20481 of LDRD pattern shown above. */
20482 XVECEXP (par, 0, (i % 2)) = tmp;
20483 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20485 if ((i % 2) == 1)
20487 /* When saved-register index (i) is odd, RTXs for both the registers
20488 to be loaded are generated in above given LDRD pattern, and the
20489 pattern can be emitted now. */
20490 par = emit_insn (par);
20491 REG_NOTES (par) = dwarf;
20492 RTX_FRAME_RELATED_P (par) = 1;
20495 i++;
20498 /* If the number of registers pushed is odd AND return_in_pc is false OR
20499 number of registers are even AND return_in_pc is true, last register is
20500 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20501 then LDR with post increment. */
20503 /* Increment the stack pointer, based on there being
20504 num_regs 4-byte registers to restore. */
20505 tmp = gen_rtx_SET (stack_pointer_rtx,
20506 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20507 RTX_FRAME_RELATED_P (tmp) = 1;
20508 tmp = emit_insn (tmp);
20509 if (!return_in_pc)
20511 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20512 stack_pointer_rtx, stack_pointer_rtx);
20515 dwarf = NULL_RTX;
20517 if (((num_regs % 2) == 1 && !return_in_pc)
20518 || ((num_regs % 2) == 0 && return_in_pc))
20520 /* Scan for the single register to be popped. Skip until the saved
20521 register is found. */
20522 for (; (saved_regs_mask & (1 << j)) == 0; j++);
20524 /* Gen LDR with post increment here. */
20525 tmp1 = gen_rtx_MEM (SImode,
20526 gen_rtx_POST_INC (SImode,
20527 stack_pointer_rtx));
20528 set_mem_alias_set (tmp1, get_frame_alias_set ());
20530 reg = gen_rtx_REG (SImode, j);
20531 tmp = gen_rtx_SET (reg, tmp1);
20532 RTX_FRAME_RELATED_P (tmp) = 1;
20533 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20535 if (return_in_pc)
20537 /* If return_in_pc, j must be PC_REGNUM. */
20538 gcc_assert (j == PC_REGNUM);
20539 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20540 XVECEXP (par, 0, 0) = ret_rtx;
20541 XVECEXP (par, 0, 1) = tmp;
20542 par = emit_jump_insn (par);
20544 else
20546 par = emit_insn (tmp);
20547 REG_NOTES (par) = dwarf;
20548 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20549 stack_pointer_rtx, stack_pointer_rtx);
20553 else if ((num_regs % 2) == 1 && return_in_pc)
20555 /* There are 2 registers to be popped. So, generate the pattern
20556 pop_multiple_with_stack_update_and_return to pop in PC. */
20557 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20560 return;
20563 /* LDRD in ARM mode needs consecutive registers as operands. This function
20564 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20565 offset addressing and then generates one separate stack udpate. This provides
20566 more scheduling freedom, compared to writeback on every load. However,
20567 if the function returns using load into PC directly
20568 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20569 before the last load. TODO: Add a peephole optimization to recognize
20570 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20571 peephole optimization to merge the load at stack-offset zero
20572 with the stack update instruction using load with writeback
20573 in post-index addressing mode. */
20574 static void
20575 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20577 int j = 0;
20578 int offset = 0;
20579 rtx par = NULL_RTX;
20580 rtx dwarf = NULL_RTX;
20581 rtx tmp, mem;
20583 /* Restore saved registers. */
20584 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20585 j = 0;
20586 while (j <= LAST_ARM_REGNUM)
20587 if (saved_regs_mask & (1 << j))
20589 if ((j % 2) == 0
20590 && (saved_regs_mask & (1 << (j + 1)))
20591 && (j + 1) != PC_REGNUM)
20593 /* Current register and next register form register pair for which
20594 LDRD can be generated. PC is always the last register popped, and
20595 we handle it separately. */
20596 if (offset > 0)
20597 mem = gen_frame_mem (DImode,
20598 plus_constant (Pmode,
20599 stack_pointer_rtx,
20600 offset));
20601 else
20602 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20604 tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
20605 tmp = emit_insn (tmp);
20606 RTX_FRAME_RELATED_P (tmp) = 1;
20608 /* Generate dwarf info. */
20610 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20611 gen_rtx_REG (SImode, j),
20612 NULL_RTX);
20613 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20614 gen_rtx_REG (SImode, j + 1),
20615 dwarf);
20617 REG_NOTES (tmp) = dwarf;
20619 offset += 8;
20620 j += 2;
20622 else if (j != PC_REGNUM)
20624 /* Emit a single word load. */
20625 if (offset > 0)
20626 mem = gen_frame_mem (SImode,
20627 plus_constant (Pmode,
20628 stack_pointer_rtx,
20629 offset));
20630 else
20631 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20633 tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
20634 tmp = emit_insn (tmp);
20635 RTX_FRAME_RELATED_P (tmp) = 1;
20637 /* Generate dwarf info. */
20638 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20639 gen_rtx_REG (SImode, j),
20640 NULL_RTX);
20642 offset += 4;
20643 j += 1;
20645 else /* j == PC_REGNUM */
20646 j++;
20648 else
20649 j++;
20651 /* Update the stack. */
20652 if (offset > 0)
20654 tmp = gen_rtx_SET (stack_pointer_rtx,
20655 plus_constant (Pmode,
20656 stack_pointer_rtx,
20657 offset));
20658 tmp = emit_insn (tmp);
20659 arm_add_cfa_adjust_cfa_note (tmp, offset,
20660 stack_pointer_rtx, stack_pointer_rtx);
20661 offset = 0;
20664 if (saved_regs_mask & (1 << PC_REGNUM))
20666 /* Only PC is to be popped. */
20667 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20668 XVECEXP (par, 0, 0) = ret_rtx;
20669 tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
20670 gen_frame_mem (SImode,
20671 gen_rtx_POST_INC (SImode,
20672 stack_pointer_rtx)));
20673 RTX_FRAME_RELATED_P (tmp) = 1;
20674 XVECEXP (par, 0, 1) = tmp;
20675 par = emit_jump_insn (par);
20677 /* Generate dwarf info. */
20678 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20679 gen_rtx_REG (SImode, PC_REGNUM),
20680 NULL_RTX);
20681 REG_NOTES (par) = dwarf;
20682 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20683 stack_pointer_rtx, stack_pointer_rtx);
20687 /* Calculate the size of the return value that is passed in registers. */
20688 static unsigned
20689 arm_size_return_regs (void)
20691 machine_mode mode;
20693 if (crtl->return_rtx != 0)
20694 mode = GET_MODE (crtl->return_rtx);
20695 else
20696 mode = DECL_MODE (DECL_RESULT (current_function_decl));
20698 return GET_MODE_SIZE (mode);
20701 /* Return true if the current function needs to save/restore LR. */
20702 static bool
20703 thumb_force_lr_save (void)
20705 return !cfun->machine->lr_save_eliminated
20706 && (!crtl->is_leaf
20707 || thumb_far_jump_used_p ()
20708 || df_regs_ever_live_p (LR_REGNUM));
20711 /* We do not know if r3 will be available because
20712 we do have an indirect tailcall happening in this
20713 particular case. */
20714 static bool
20715 is_indirect_tailcall_p (rtx call)
20717 rtx pat = PATTERN (call);
20719 /* Indirect tail call. */
20720 pat = XVECEXP (pat, 0, 0);
20721 if (GET_CODE (pat) == SET)
20722 pat = SET_SRC (pat);
20724 pat = XEXP (XEXP (pat, 0), 0);
20725 return REG_P (pat);
20728 /* Return true if r3 is used by any of the tail call insns in the
20729 current function. */
20730 static bool
20731 any_sibcall_could_use_r3 (void)
20733 edge_iterator ei;
20734 edge e;
20736 if (!crtl->tail_call_emit)
20737 return false;
20738 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20739 if (e->flags & EDGE_SIBCALL)
20741 rtx_insn *call = BB_END (e->src);
20742 if (!CALL_P (call))
20743 call = prev_nonnote_nondebug_insn (call);
20744 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20745 if (find_regno_fusage (call, USE, 3)
20746 || is_indirect_tailcall_p (call))
20747 return true;
20749 return false;
20753 /* Compute the distance from register FROM to register TO.
20754 These can be the arg pointer (26), the soft frame pointer (25),
20755 the stack pointer (13) or the hard frame pointer (11).
20756 In thumb mode r7 is used as the soft frame pointer, if needed.
20757 Typical stack layout looks like this:
20759 old stack pointer -> | |
20760 ----
20761 | | \
20762 | | saved arguments for
20763 | | vararg functions
20764 | | /
20766 hard FP & arg pointer -> | | \
20767 | | stack
20768 | | frame
20769 | | /
20771 | | \
20772 | | call saved
20773 | | registers
20774 soft frame pointer -> | | /
20776 | | \
20777 | | local
20778 | | variables
20779 locals base pointer -> | | /
20781 | | \
20782 | | outgoing
20783 | | arguments
20784 current stack pointer -> | | /
20787 For a given function some or all of these stack components
20788 may not be needed, giving rise to the possibility of
20789 eliminating some of the registers.
20791 The values returned by this function must reflect the behavior
20792 of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
20794 The sign of the number returned reflects the direction of stack
20795 growth, so the values are positive for all eliminations except
20796 from the soft frame pointer to the hard frame pointer.
20798 SFP may point just inside the local variables block to ensure correct
20799 alignment. */
20802 /* Return cached stack offsets. */
20804 static arm_stack_offsets *
20805 arm_get_frame_offsets (void)
20807 struct arm_stack_offsets *offsets;
20809 offsets = &cfun->machine->stack_offsets;
20811 return offsets;
20815 /* Calculate stack offsets. These are used to calculate register elimination
20816 offsets and in prologue/epilogue code. Also calculates which registers
20817 should be saved. */
20819 static void
20820 arm_compute_frame_layout (void)
20822 struct arm_stack_offsets *offsets;
20823 unsigned long func_type;
20824 int saved;
20825 int core_saved;
20826 HOST_WIDE_INT frame_size;
20827 int i;
20829 offsets = &cfun->machine->stack_offsets;
20831 /* Initially this is the size of the local variables. It will translated
20832 into an offset once we have determined the size of preceding data. */
20833 frame_size = ROUND_UP_WORD (get_frame_size ());
20835 /* Space for variadic functions. */
20836 offsets->saved_args = crtl->args.pretend_args_size;
20838 /* In Thumb mode this is incorrect, but never used. */
20839 offsets->frame
20840 = (offsets->saved_args
20841 + arm_compute_static_chain_stack_bytes ()
20842 + (frame_pointer_needed ? 4 : 0));
20844 if (TARGET_32BIT)
20846 unsigned int regno;
20848 offsets->saved_regs_mask = arm_compute_save_core_reg_mask ();
20849 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20850 saved = core_saved;
20852 /* We know that SP will be doubleword aligned on entry, and we must
20853 preserve that condition at any subroutine call. We also require the
20854 soft frame pointer to be doubleword aligned. */
20856 if (TARGET_REALLY_IWMMXT)
20858 /* Check for the call-saved iWMMXt registers. */
20859 for (regno = FIRST_IWMMXT_REGNUM;
20860 regno <= LAST_IWMMXT_REGNUM;
20861 regno++)
20862 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20863 saved += 8;
20866 func_type = arm_current_func_type ();
20867 /* Space for saved VFP registers. */
20868 if (! IS_VOLATILE (func_type)
20869 && TARGET_HARD_FLOAT)
20870 saved += arm_get_vfp_saved_size ();
20872 else /* TARGET_THUMB1 */
20874 offsets->saved_regs_mask = thumb1_compute_save_core_reg_mask ();
20875 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20876 saved = core_saved;
20877 if (TARGET_BACKTRACE)
20878 saved += 16;
20881 /* Saved registers include the stack frame. */
20882 offsets->saved_regs
20883 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
20884 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
20886 /* A leaf function does not need any stack alignment if it has nothing
20887 on the stack. */
20888 if (crtl->is_leaf && frame_size == 0
20889 /* However if it calls alloca(), we have a dynamically allocated
20890 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20891 && ! cfun->calls_alloca)
20893 offsets->outgoing_args = offsets->soft_frame;
20894 offsets->locals_base = offsets->soft_frame;
20895 return;
20898 /* Ensure SFP has the correct alignment. */
20899 if (ARM_DOUBLEWORD_ALIGN
20900 && (offsets->soft_frame & 7))
20902 offsets->soft_frame += 4;
20903 /* Try to align stack by pushing an extra reg. Don't bother doing this
20904 when there is a stack frame as the alignment will be rolled into
20905 the normal stack adjustment. */
20906 if (frame_size + crtl->outgoing_args_size == 0)
20908 int reg = -1;
20910 /* Register r3 is caller-saved. Normally it does not need to be
20911 saved on entry by the prologue. However if we choose to save
20912 it for padding then we may confuse the compiler into thinking
20913 a prologue sequence is required when in fact it is not. This
20914 will occur when shrink-wrapping if r3 is used as a scratch
20915 register and there are no other callee-saved writes.
20917 This situation can be avoided when other callee-saved registers
20918 are available and r3 is not mandatory if we choose a callee-saved
20919 register for padding. */
20920 bool prefer_callee_reg_p = false;
20922 /* If it is safe to use r3, then do so. This sometimes
20923 generates better code on Thumb-2 by avoiding the need to
20924 use 32-bit push/pop instructions. */
20925 if (! any_sibcall_could_use_r3 ()
20926 && arm_size_return_regs () <= 12
20927 && (offsets->saved_regs_mask & (1 << 3)) == 0
20928 && (TARGET_THUMB2
20929 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
20931 reg = 3;
20932 if (!TARGET_THUMB2)
20933 prefer_callee_reg_p = true;
20935 if (reg == -1
20936 || prefer_callee_reg_p)
20938 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
20940 /* Avoid fixed registers; they may be changed at
20941 arbitrary times so it's unsafe to restore them
20942 during the epilogue. */
20943 if (!fixed_regs[i]
20944 && (offsets->saved_regs_mask & (1 << i)) == 0)
20946 reg = i;
20947 break;
20952 if (reg != -1)
20954 offsets->saved_regs += 4;
20955 offsets->saved_regs_mask |= (1 << reg);
20960 offsets->locals_base = offsets->soft_frame + frame_size;
20961 offsets->outgoing_args = (offsets->locals_base
20962 + crtl->outgoing_args_size);
20964 if (ARM_DOUBLEWORD_ALIGN)
20966 /* Ensure SP remains doubleword aligned. */
20967 if (offsets->outgoing_args & 7)
20968 offsets->outgoing_args += 4;
20969 gcc_assert (!(offsets->outgoing_args & 7));
20974 /* Calculate the relative offsets for the different stack pointers. Positive
20975 offsets are in the direction of stack growth. */
20977 HOST_WIDE_INT
20978 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20980 arm_stack_offsets *offsets;
20982 offsets = arm_get_frame_offsets ();
20984 /* OK, now we have enough information to compute the distances.
20985 There must be an entry in these switch tables for each pair
20986 of registers in ELIMINABLE_REGS, even if some of the entries
20987 seem to be redundant or useless. */
20988 switch (from)
20990 case ARG_POINTER_REGNUM:
20991 switch (to)
20993 case THUMB_HARD_FRAME_POINTER_REGNUM:
20994 return 0;
20996 case FRAME_POINTER_REGNUM:
20997 /* This is the reverse of the soft frame pointer
20998 to hard frame pointer elimination below. */
20999 return offsets->soft_frame - offsets->saved_args;
21001 case ARM_HARD_FRAME_POINTER_REGNUM:
21002 /* This is only non-zero in the case where the static chain register
21003 is stored above the frame. */
21004 return offsets->frame - offsets->saved_args - 4;
21006 case STACK_POINTER_REGNUM:
21007 /* If nothing has been pushed on the stack at all
21008 then this will return -4. This *is* correct! */
21009 return offsets->outgoing_args - (offsets->saved_args + 4);
21011 default:
21012 gcc_unreachable ();
21014 gcc_unreachable ();
21016 case FRAME_POINTER_REGNUM:
21017 switch (to)
21019 case THUMB_HARD_FRAME_POINTER_REGNUM:
21020 return 0;
21022 case ARM_HARD_FRAME_POINTER_REGNUM:
21023 /* The hard frame pointer points to the top entry in the
21024 stack frame. The soft frame pointer to the bottom entry
21025 in the stack frame. If there is no stack frame at all,
21026 then they are identical. */
21028 return offsets->frame - offsets->soft_frame;
21030 case STACK_POINTER_REGNUM:
21031 return offsets->outgoing_args - offsets->soft_frame;
21033 default:
21034 gcc_unreachable ();
21036 gcc_unreachable ();
21038 default:
21039 /* You cannot eliminate from the stack pointer.
21040 In theory you could eliminate from the hard frame
21041 pointer to the stack pointer, but this will never
21042 happen, since if a stack frame is not needed the
21043 hard frame pointer will never be used. */
21044 gcc_unreachable ();
21048 /* Given FROM and TO register numbers, say whether this elimination is
21049 allowed. Frame pointer elimination is automatically handled.
21051 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
21052 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
21053 pointer, we must eliminate FRAME_POINTER_REGNUM into
21054 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
21055 ARG_POINTER_REGNUM. */
21057 bool
21058 arm_can_eliminate (const int from, const int to)
21060 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
21061 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
21062 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
21063 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
21064 true);
21067 /* Emit RTL to save coprocessor registers on function entry. Returns the
21068 number of bytes pushed. */
21070 static int
21071 arm_save_coproc_regs(void)
21073 int saved_size = 0;
21074 unsigned reg;
21075 unsigned start_reg;
21076 rtx insn;
21078 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
21079 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
21081 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21082 insn = gen_rtx_MEM (V2SImode, insn);
21083 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
21084 RTX_FRAME_RELATED_P (insn) = 1;
21085 saved_size += 8;
21088 if (TARGET_HARD_FLOAT)
21090 start_reg = FIRST_VFP_REGNUM;
21092 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
21094 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
21095 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
21097 if (start_reg != reg)
21098 saved_size += vfp_emit_fstmd (start_reg,
21099 (reg - start_reg) / 2);
21100 start_reg = reg + 2;
21103 if (start_reg != reg)
21104 saved_size += vfp_emit_fstmd (start_reg,
21105 (reg - start_reg) / 2);
21107 return saved_size;
21111 /* Set the Thumb frame pointer from the stack pointer. */
21113 static void
21114 thumb_set_frame_pointer (arm_stack_offsets *offsets)
21116 HOST_WIDE_INT amount;
21117 rtx insn, dwarf;
21119 amount = offsets->outgoing_args - offsets->locals_base;
21120 if (amount < 1024)
21121 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21122 stack_pointer_rtx, GEN_INT (amount)));
21123 else
21125 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21126 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21127 expects the first two operands to be the same. */
21128 if (TARGET_THUMB2)
21130 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21131 stack_pointer_rtx,
21132 hard_frame_pointer_rtx));
21134 else
21136 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21137 hard_frame_pointer_rtx,
21138 stack_pointer_rtx));
21140 dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
21141 plus_constant (Pmode, stack_pointer_rtx, amount));
21142 RTX_FRAME_RELATED_P (dwarf) = 1;
21143 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21146 RTX_FRAME_RELATED_P (insn) = 1;
21149 struct scratch_reg {
21150 rtx reg;
21151 bool saved;
21154 /* Return a short-lived scratch register for use as a 2nd scratch register on
21155 function entry after the registers are saved in the prologue. This register
21156 must be released by means of release_scratch_register_on_entry. IP is not
21157 considered since it is always used as the 1st scratch register if available.
21159 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
21160 mask of live registers. */
21162 static void
21163 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
21164 unsigned long live_regs)
21166 int regno = -1;
21168 sr->saved = false;
21170 if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
21171 regno = LR_REGNUM;
21172 else
21174 unsigned int i;
21176 for (i = 4; i < 11; i++)
21177 if (regno1 != i && (live_regs & (1 << i)) != 0)
21179 regno = i;
21180 break;
21183 if (regno < 0)
21185 /* If IP is used as the 1st scratch register for a nested function,
21186 then either r3 wasn't available or is used to preserve IP. */
21187 if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
21188 regno1 = 3;
21189 regno = (regno1 == 3 ? 2 : 3);
21190 sr->saved
21191 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
21192 regno);
21196 sr->reg = gen_rtx_REG (SImode, regno);
21197 if (sr->saved)
21199 rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21200 rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
21201 rtx x = gen_rtx_SET (stack_pointer_rtx,
21202 plus_constant (Pmode, stack_pointer_rtx, -4));
21203 RTX_FRAME_RELATED_P (insn) = 1;
21204 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21208 /* Release a scratch register obtained from the preceding function. */
21210 static void
21211 release_scratch_register_on_entry (struct scratch_reg *sr)
21213 if (sr->saved)
21215 rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
21216 rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
21217 rtx x = gen_rtx_SET (stack_pointer_rtx,
21218 plus_constant (Pmode, stack_pointer_rtx, 4));
21219 RTX_FRAME_RELATED_P (insn) = 1;
21220 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21224 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
21226 #if PROBE_INTERVAL > 4096
21227 #error Cannot use indexed addressing mode for stack probing
21228 #endif
21230 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
21231 inclusive. These are offsets from the current stack pointer. REGNO1
21232 is the index number of the 1st scratch register and LIVE_REGS is the
21233 mask of live registers. */
21235 static void
21236 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
21237 unsigned int regno1, unsigned long live_regs)
21239 rtx reg1 = gen_rtx_REG (Pmode, regno1);
21241 /* See if we have a constant small number of probes to generate. If so,
21242 that's the easy case. */
21243 if (size <= PROBE_INTERVAL)
21245 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21246 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21247 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
21250 /* The run-time loop is made up of 10 insns in the generic case while the
21251 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
21252 else if (size <= 5 * PROBE_INTERVAL)
21254 HOST_WIDE_INT i, rem;
21256 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21257 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21258 emit_stack_probe (reg1);
21260 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
21261 it exceeds SIZE. If only two probes are needed, this will not
21262 generate any code. Then probe at FIRST + SIZE. */
21263 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
21265 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21266 emit_stack_probe (reg1);
21269 rem = size - (i - PROBE_INTERVAL);
21270 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21272 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21273 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
21275 else
21276 emit_stack_probe (plus_constant (Pmode, reg1, -rem));
21279 /* Otherwise, do the same as above, but in a loop. Note that we must be
21280 extra careful with variables wrapping around because we might be at
21281 the very top (or the very bottom) of the address space and we have
21282 to be able to handle this case properly; in particular, we use an
21283 equality test for the loop condition. */
21284 else
21286 HOST_WIDE_INT rounded_size;
21287 struct scratch_reg sr;
21289 get_scratch_register_on_entry (&sr, regno1, live_regs);
21291 emit_move_insn (reg1, GEN_INT (first));
21294 /* Step 1: round SIZE to the previous multiple of the interval. */
21296 rounded_size = size & -PROBE_INTERVAL;
21297 emit_move_insn (sr.reg, GEN_INT (rounded_size));
21300 /* Step 2: compute initial and final value of the loop counter. */
21302 /* TEST_ADDR = SP + FIRST. */
21303 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21305 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
21306 emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
21309 /* Step 3: the loop
21313 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
21314 probe at TEST_ADDR
21316 while (TEST_ADDR != LAST_ADDR)
21318 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
21319 until it is equal to ROUNDED_SIZE. */
21321 emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
21324 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
21325 that SIZE is equal to ROUNDED_SIZE. */
21327 if (size != rounded_size)
21329 HOST_WIDE_INT rem = size - rounded_size;
21331 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21333 emit_set_insn (sr.reg,
21334 plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
21335 emit_stack_probe (plus_constant (Pmode, sr.reg,
21336 PROBE_INTERVAL - rem));
21338 else
21339 emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
21342 release_scratch_register_on_entry (&sr);
21345 /* Make sure nothing is scheduled before we are done. */
21346 emit_insn (gen_blockage ());
21349 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
21350 absolute addresses. */
21352 const char *
21353 output_probe_stack_range (rtx reg1, rtx reg2)
21355 static int labelno = 0;
21356 char loop_lab[32];
21357 rtx xops[2];
21359 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
21361 /* Loop. */
21362 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
21364 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
21365 xops[0] = reg1;
21366 xops[1] = GEN_INT (PROBE_INTERVAL);
21367 output_asm_insn ("sub\t%0, %0, %1", xops);
21369 /* Probe at TEST_ADDR. */
21370 output_asm_insn ("str\tr0, [%0, #0]", xops);
21372 /* Test if TEST_ADDR == LAST_ADDR. */
21373 xops[1] = reg2;
21374 output_asm_insn ("cmp\t%0, %1", xops);
21376 /* Branch. */
21377 fputs ("\tbne\t", asm_out_file);
21378 assemble_name_raw (asm_out_file, loop_lab);
21379 fputc ('\n', asm_out_file);
21381 return "";
21384 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21385 function. */
21386 void
21387 arm_expand_prologue (void)
21389 rtx amount;
21390 rtx insn;
21391 rtx ip_rtx;
21392 unsigned long live_regs_mask;
21393 unsigned long func_type;
21394 int fp_offset = 0;
21395 int saved_pretend_args = 0;
21396 int saved_regs = 0;
21397 unsigned HOST_WIDE_INT args_to_push;
21398 HOST_WIDE_INT size;
21399 arm_stack_offsets *offsets;
21400 bool clobber_ip;
21402 func_type = arm_current_func_type ();
21404 /* Naked functions don't have prologues. */
21405 if (IS_NAKED (func_type))
21407 if (flag_stack_usage_info)
21408 current_function_static_stack_size = 0;
21409 return;
21412 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21413 args_to_push = crtl->args.pretend_args_size;
21415 /* Compute which register we will have to save onto the stack. */
21416 offsets = arm_get_frame_offsets ();
21417 live_regs_mask = offsets->saved_regs_mask;
21419 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21421 if (IS_STACKALIGN (func_type))
21423 rtx r0, r1;
21425 /* Handle a word-aligned stack pointer. We generate the following:
21427 mov r0, sp
21428 bic r1, r0, #7
21429 mov sp, r1
21430 <save and restore r0 in normal prologue/epilogue>
21431 mov sp, r0
21432 bx lr
21434 The unwinder doesn't need to know about the stack realignment.
21435 Just tell it we saved SP in r0. */
21436 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21438 r0 = gen_rtx_REG (SImode, R0_REGNUM);
21439 r1 = gen_rtx_REG (SImode, R1_REGNUM);
21441 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21442 RTX_FRAME_RELATED_P (insn) = 1;
21443 add_reg_note (insn, REG_CFA_REGISTER, NULL);
21445 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21447 /* ??? The CFA changes here, which may cause GDB to conclude that it
21448 has entered a different function. That said, the unwind info is
21449 correct, individually, before and after this instruction because
21450 we've described the save of SP, which will override the default
21451 handling of SP as restoring from the CFA. */
21452 emit_insn (gen_movsi (stack_pointer_rtx, r1));
21455 /* The static chain register is the same as the IP register. If it is
21456 clobbered when creating the frame, we need to save and restore it. */
21457 clobber_ip = IS_NESTED (func_type)
21458 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21459 || (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21460 && !df_regs_ever_live_p (LR_REGNUM)
21461 && arm_r3_live_at_start_p ()));
21463 /* Find somewhere to store IP whilst the frame is being created.
21464 We try the following places in order:
21466 1. The last argument register r3 if it is available.
21467 2. A slot on the stack above the frame if there are no
21468 arguments to push onto the stack.
21469 3. Register r3 again, after pushing the argument registers
21470 onto the stack, if this is a varargs function.
21471 4. The last slot on the stack created for the arguments to
21472 push, if this isn't a varargs function.
21474 Note - we only need to tell the dwarf2 backend about the SP
21475 adjustment in the second variant; the static chain register
21476 doesn't need to be unwound, as it doesn't contain a value
21477 inherited from the caller. */
21478 if (clobber_ip)
21480 if (!arm_r3_live_at_start_p ())
21481 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21482 else if (args_to_push == 0)
21484 rtx addr, dwarf;
21486 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21487 saved_regs += 4;
21489 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21490 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21491 fp_offset = 4;
21493 /* Just tell the dwarf backend that we adjusted SP. */
21494 dwarf = gen_rtx_SET (stack_pointer_rtx,
21495 plus_constant (Pmode, stack_pointer_rtx,
21496 -fp_offset));
21497 RTX_FRAME_RELATED_P (insn) = 1;
21498 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21500 else
21502 /* Store the args on the stack. */
21503 if (cfun->machine->uses_anonymous_args)
21505 insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21506 (0xf0 >> (args_to_push / 4)) & 0xf);
21507 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21508 saved_pretend_args = 1;
21510 else
21512 rtx addr, dwarf;
21514 if (args_to_push == 4)
21515 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21516 else
21517 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21518 plus_constant (Pmode,
21519 stack_pointer_rtx,
21520 -args_to_push));
21522 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21524 /* Just tell the dwarf backend that we adjusted SP. */
21525 dwarf = gen_rtx_SET (stack_pointer_rtx,
21526 plus_constant (Pmode, stack_pointer_rtx,
21527 -args_to_push));
21528 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21531 RTX_FRAME_RELATED_P (insn) = 1;
21532 fp_offset = args_to_push;
21533 args_to_push = 0;
21537 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21539 if (IS_INTERRUPT (func_type))
21541 /* Interrupt functions must not corrupt any registers.
21542 Creating a frame pointer however, corrupts the IP
21543 register, so we must push it first. */
21544 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21546 /* Do not set RTX_FRAME_RELATED_P on this insn.
21547 The dwarf stack unwinding code only wants to see one
21548 stack decrement per function, and this is not it. If
21549 this instruction is labeled as being part of the frame
21550 creation sequence then dwarf2out_frame_debug_expr will
21551 die when it encounters the assignment of IP to FP
21552 later on, since the use of SP here establishes SP as
21553 the CFA register and not IP.
21555 Anyway this instruction is not really part of the stack
21556 frame creation although it is part of the prologue. */
21559 insn = emit_set_insn (ip_rtx,
21560 plus_constant (Pmode, stack_pointer_rtx,
21561 fp_offset));
21562 RTX_FRAME_RELATED_P (insn) = 1;
21565 if (args_to_push)
21567 /* Push the argument registers, or reserve space for them. */
21568 if (cfun->machine->uses_anonymous_args)
21569 insn = emit_multi_reg_push
21570 ((0xf0 >> (args_to_push / 4)) & 0xf,
21571 (0xf0 >> (args_to_push / 4)) & 0xf);
21572 else
21573 insn = emit_insn
21574 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21575 GEN_INT (- args_to_push)));
21576 RTX_FRAME_RELATED_P (insn) = 1;
21579 /* If this is an interrupt service routine, and the link register
21580 is going to be pushed, and we're not generating extra
21581 push of IP (needed when frame is needed and frame layout if apcs),
21582 subtracting four from LR now will mean that the function return
21583 can be done with a single instruction. */
21584 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21585 && (live_regs_mask & (1 << LR_REGNUM)) != 0
21586 && !(frame_pointer_needed && TARGET_APCS_FRAME)
21587 && TARGET_ARM)
21589 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21591 emit_set_insn (lr, plus_constant (SImode, lr, -4));
21594 if (live_regs_mask)
21596 unsigned long dwarf_regs_mask = live_regs_mask;
21598 saved_regs += bit_count (live_regs_mask) * 4;
21599 if (optimize_size && !frame_pointer_needed
21600 && saved_regs == offsets->saved_regs - offsets->saved_args)
21602 /* If no coprocessor registers are being pushed and we don't have
21603 to worry about a frame pointer then push extra registers to
21604 create the stack frame. This is done in a way that does not
21605 alter the frame layout, so is independent of the epilogue. */
21606 int n;
21607 int frame;
21608 n = 0;
21609 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21610 n++;
21611 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21612 if (frame && n * 4 >= frame)
21614 n = frame / 4;
21615 live_regs_mask |= (1 << n) - 1;
21616 saved_regs += frame;
21620 if (TARGET_LDRD
21621 && current_tune->prefer_ldrd_strd
21622 && !optimize_function_for_size_p (cfun))
21624 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21625 if (TARGET_THUMB2)
21626 thumb2_emit_strd_push (live_regs_mask);
21627 else if (TARGET_ARM
21628 && !TARGET_APCS_FRAME
21629 && !IS_INTERRUPT (func_type))
21630 arm_emit_strd_push (live_regs_mask);
21631 else
21633 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21634 RTX_FRAME_RELATED_P (insn) = 1;
21637 else
21639 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21640 RTX_FRAME_RELATED_P (insn) = 1;
21644 if (! IS_VOLATILE (func_type))
21645 saved_regs += arm_save_coproc_regs ();
21647 if (frame_pointer_needed && TARGET_ARM)
21649 /* Create the new frame pointer. */
21650 if (TARGET_APCS_FRAME)
21652 insn = GEN_INT (-(4 + args_to_push + fp_offset));
21653 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21654 RTX_FRAME_RELATED_P (insn) = 1;
21656 else
21658 insn = GEN_INT (saved_regs - (4 + fp_offset));
21659 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21660 stack_pointer_rtx, insn));
21661 RTX_FRAME_RELATED_P (insn) = 1;
21665 size = offsets->outgoing_args - offsets->saved_args;
21666 if (flag_stack_usage_info)
21667 current_function_static_stack_size = size;
21669 /* If this isn't an interrupt service routine and we have a frame, then do
21670 stack checking. We use IP as the first scratch register, except for the
21671 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
21672 if (!IS_INTERRUPT (func_type)
21673 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
21675 unsigned int regno;
21677 if (!IS_NESTED (func_type) || clobber_ip)
21678 regno = IP_REGNUM;
21679 else if (df_regs_ever_live_p (LR_REGNUM))
21680 regno = LR_REGNUM;
21681 else
21682 regno = 3;
21684 if (crtl->is_leaf && !cfun->calls_alloca)
21686 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
21687 arm_emit_probe_stack_range (STACK_CHECK_PROTECT,
21688 size - STACK_CHECK_PROTECT,
21689 regno, live_regs_mask);
21691 else if (size > 0)
21692 arm_emit_probe_stack_range (STACK_CHECK_PROTECT, size,
21693 regno, live_regs_mask);
21696 /* Recover the static chain register. */
21697 if (clobber_ip)
21699 if (!arm_r3_live_at_start_p () || saved_pretend_args)
21700 insn = gen_rtx_REG (SImode, 3);
21701 else
21703 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21704 insn = gen_frame_mem (SImode, insn);
21706 emit_set_insn (ip_rtx, insn);
21707 emit_insn (gen_force_register_use (ip_rtx));
21710 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21712 /* This add can produce multiple insns for a large constant, so we
21713 need to get tricky. */
21714 rtx_insn *last = get_last_insn ();
21716 amount = GEN_INT (offsets->saved_args + saved_regs
21717 - offsets->outgoing_args);
21719 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21720 amount));
21723 last = last ? NEXT_INSN (last) : get_insns ();
21724 RTX_FRAME_RELATED_P (last) = 1;
21726 while (last != insn);
21728 /* If the frame pointer is needed, emit a special barrier that
21729 will prevent the scheduler from moving stores to the frame
21730 before the stack adjustment. */
21731 if (frame_pointer_needed)
21732 emit_insn (gen_stack_tie (stack_pointer_rtx,
21733 hard_frame_pointer_rtx));
21737 if (frame_pointer_needed && TARGET_THUMB2)
21738 thumb_set_frame_pointer (offsets);
21740 if (flag_pic && arm_pic_register != INVALID_REGNUM)
21742 unsigned long mask;
21744 mask = live_regs_mask;
21745 mask &= THUMB2_WORK_REGS;
21746 if (!IS_NESTED (func_type))
21747 mask |= (1 << IP_REGNUM);
21748 arm_load_pic_register (mask);
21751 /* If we are profiling, make sure no instructions are scheduled before
21752 the call to mcount. Similarly if the user has requested no
21753 scheduling in the prolog. Similarly if we want non-call exceptions
21754 using the EABI unwinder, to prevent faulting instructions from being
21755 swapped with a stack adjustment. */
21756 if (crtl->profile || !TARGET_SCHED_PROLOG
21757 || (arm_except_unwind_info (&global_options) == UI_TARGET
21758 && cfun->can_throw_non_call_exceptions))
21759 emit_insn (gen_blockage ());
21761 /* If the link register is being kept alive, with the return address in it,
21762 then make sure that it does not get reused by the ce2 pass. */
21763 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
21764 cfun->machine->lr_save_eliminated = 1;
21767 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21768 static void
21769 arm_print_condition (FILE *stream)
21771 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
21773 /* Branch conversion is not implemented for Thumb-2. */
21774 if (TARGET_THUMB)
21776 output_operand_lossage ("predicated Thumb instruction");
21777 return;
21779 if (current_insn_predicate != NULL)
21781 output_operand_lossage
21782 ("predicated instruction in conditional sequence");
21783 return;
21786 fputs (arm_condition_codes[arm_current_cc], stream);
21788 else if (current_insn_predicate)
21790 enum arm_cond_code code;
21792 if (TARGET_THUMB1)
21794 output_operand_lossage ("predicated Thumb instruction");
21795 return;
21798 code = get_arm_condition_code (current_insn_predicate);
21799 fputs (arm_condition_codes[code], stream);
21804 /* Globally reserved letters: acln
21805 Puncutation letters currently used: @_|?().!#
21806 Lower case letters currently used: bcdefhimpqtvwxyz
21807 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21808 Letters previously used, but now deprecated/obsolete: sVWXYZ.
21810 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21812 If CODE is 'd', then the X is a condition operand and the instruction
21813 should only be executed if the condition is true.
21814 if CODE is 'D', then the X is a condition operand and the instruction
21815 should only be executed if the condition is false: however, if the mode
21816 of the comparison is CCFPEmode, then always execute the instruction -- we
21817 do this because in these circumstances !GE does not necessarily imply LT;
21818 in these cases the instruction pattern will take care to make sure that
21819 an instruction containing %d will follow, thereby undoing the effects of
21820 doing this instruction unconditionally.
21821 If CODE is 'N' then X is a floating point operand that must be negated
21822 before output.
21823 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21824 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21825 static void
21826 arm_print_operand (FILE *stream, rtx x, int code)
21828 switch (code)
21830 case '@':
21831 fputs (ASM_COMMENT_START, stream);
21832 return;
21834 case '_':
21835 fputs (user_label_prefix, stream);
21836 return;
21838 case '|':
21839 fputs (REGISTER_PREFIX, stream);
21840 return;
21842 case '?':
21843 arm_print_condition (stream);
21844 return;
21846 case '.':
21847 /* The current condition code for a condition code setting instruction.
21848 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21849 fputc('s', stream);
21850 arm_print_condition (stream);
21851 return;
21853 case '!':
21854 /* If the instruction is conditionally executed then print
21855 the current condition code, otherwise print 's'. */
21856 gcc_assert (TARGET_THUMB2);
21857 if (current_insn_predicate)
21858 arm_print_condition (stream);
21859 else
21860 fputc('s', stream);
21861 break;
21863 /* %# is a "break" sequence. It doesn't output anything, but is used to
21864 separate e.g. operand numbers from following text, if that text consists
21865 of further digits which we don't want to be part of the operand
21866 number. */
21867 case '#':
21868 return;
21870 case 'N':
21872 REAL_VALUE_TYPE r;
21873 r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
21874 fprintf (stream, "%s", fp_const_from_val (&r));
21876 return;
21878 /* An integer or symbol address without a preceding # sign. */
21879 case 'c':
21880 switch (GET_CODE (x))
21882 case CONST_INT:
21883 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
21884 break;
21886 case SYMBOL_REF:
21887 output_addr_const (stream, x);
21888 break;
21890 case CONST:
21891 if (GET_CODE (XEXP (x, 0)) == PLUS
21892 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
21894 output_addr_const (stream, x);
21895 break;
21897 /* Fall through. */
21899 default:
21900 output_operand_lossage ("Unsupported operand for code '%c'", code);
21902 return;
21904 /* An integer that we want to print in HEX. */
21905 case 'x':
21906 switch (GET_CODE (x))
21908 case CONST_INT:
21909 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
21910 break;
21912 default:
21913 output_operand_lossage ("Unsupported operand for code '%c'", code);
21915 return;
21917 case 'B':
21918 if (CONST_INT_P (x))
21920 HOST_WIDE_INT val;
21921 val = ARM_SIGN_EXTEND (~INTVAL (x));
21922 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
21924 else
21926 putc ('~', stream);
21927 output_addr_const (stream, x);
21929 return;
21931 case 'b':
21932 /* Print the log2 of a CONST_INT. */
21934 HOST_WIDE_INT val;
21936 if (!CONST_INT_P (x)
21937 || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
21938 output_operand_lossage ("Unsupported operand for code '%c'", code);
21939 else
21940 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21942 return;
21944 case 'L':
21945 /* The low 16 bits of an immediate constant. */
21946 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
21947 return;
21949 case 'i':
21950 fprintf (stream, "%s", arithmetic_instr (x, 1));
21951 return;
21953 case 'I':
21954 fprintf (stream, "%s", arithmetic_instr (x, 0));
21955 return;
21957 case 'S':
21959 HOST_WIDE_INT val;
21960 const char *shift;
21962 shift = shift_op (x, &val);
21964 if (shift)
21966 fprintf (stream, ", %s ", shift);
21967 if (val == -1)
21968 arm_print_operand (stream, XEXP (x, 1), 0);
21969 else
21970 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21973 return;
21975 /* An explanation of the 'Q', 'R' and 'H' register operands:
21977 In a pair of registers containing a DI or DF value the 'Q'
21978 operand returns the register number of the register containing
21979 the least significant part of the value. The 'R' operand returns
21980 the register number of the register containing the most
21981 significant part of the value.
21983 The 'H' operand returns the higher of the two register numbers.
21984 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21985 same as the 'Q' operand, since the most significant part of the
21986 value is held in the lower number register. The reverse is true
21987 on systems where WORDS_BIG_ENDIAN is false.
21989 The purpose of these operands is to distinguish between cases
21990 where the endian-ness of the values is important (for example
21991 when they are added together), and cases where the endian-ness
21992 is irrelevant, but the order of register operations is important.
21993 For example when loading a value from memory into a register
21994 pair, the endian-ness does not matter. Provided that the value
21995 from the lower memory address is put into the lower numbered
21996 register, and the value from the higher address is put into the
21997 higher numbered register, the load will work regardless of whether
21998 the value being loaded is big-wordian or little-wordian. The
21999 order of the two register loads can matter however, if the address
22000 of the memory location is actually held in one of the registers
22001 being overwritten by the load.
22003 The 'Q' and 'R' constraints are also available for 64-bit
22004 constants. */
22005 case 'Q':
22006 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22008 rtx part = gen_lowpart (SImode, x);
22009 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22010 return;
22013 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22015 output_operand_lossage ("invalid operand for code '%c'", code);
22016 return;
22019 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
22020 return;
22022 case 'R':
22023 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22025 machine_mode mode = GET_MODE (x);
22026 rtx part;
22028 if (mode == VOIDmode)
22029 mode = DImode;
22030 part = gen_highpart_mode (SImode, mode, x);
22031 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22032 return;
22035 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22037 output_operand_lossage ("invalid operand for code '%c'", code);
22038 return;
22041 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
22042 return;
22044 case 'H':
22045 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22047 output_operand_lossage ("invalid operand for code '%c'", code);
22048 return;
22051 asm_fprintf (stream, "%r", REGNO (x) + 1);
22052 return;
22054 case 'J':
22055 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22057 output_operand_lossage ("invalid operand for code '%c'", code);
22058 return;
22061 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
22062 return;
22064 case 'K':
22065 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22067 output_operand_lossage ("invalid operand for code '%c'", code);
22068 return;
22071 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
22072 return;
22074 case 'm':
22075 asm_fprintf (stream, "%r",
22076 REG_P (XEXP (x, 0))
22077 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
22078 return;
22080 case 'M':
22081 asm_fprintf (stream, "{%r-%r}",
22082 REGNO (x),
22083 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
22084 return;
22086 /* Like 'M', but writing doubleword vector registers, for use by Neon
22087 insns. */
22088 case 'h':
22090 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
22091 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
22092 if (numregs == 1)
22093 asm_fprintf (stream, "{d%d}", regno);
22094 else
22095 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
22097 return;
22099 case 'd':
22100 /* CONST_TRUE_RTX means always -- that's the default. */
22101 if (x == const_true_rtx)
22102 return;
22104 if (!COMPARISON_P (x))
22106 output_operand_lossage ("invalid operand for code '%c'", code);
22107 return;
22110 fputs (arm_condition_codes[get_arm_condition_code (x)],
22111 stream);
22112 return;
22114 case 'D':
22115 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
22116 want to do that. */
22117 if (x == const_true_rtx)
22119 output_operand_lossage ("instruction never executed");
22120 return;
22122 if (!COMPARISON_P (x))
22124 output_operand_lossage ("invalid operand for code '%c'", code);
22125 return;
22128 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
22129 (get_arm_condition_code (x))],
22130 stream);
22131 return;
22133 case 's':
22134 case 'V':
22135 case 'W':
22136 case 'X':
22137 case 'Y':
22138 case 'Z':
22139 /* Former Maverick support, removed after GCC-4.7. */
22140 output_operand_lossage ("obsolete Maverick format code '%c'", code);
22141 return;
22143 case 'U':
22144 if (!REG_P (x)
22145 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
22146 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
22147 /* Bad value for wCG register number. */
22149 output_operand_lossage ("invalid operand for code '%c'", code);
22150 return;
22153 else
22154 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
22155 return;
22157 /* Print an iWMMXt control register name. */
22158 case 'w':
22159 if (!CONST_INT_P (x)
22160 || INTVAL (x) < 0
22161 || INTVAL (x) >= 16)
22162 /* Bad value for wC register number. */
22164 output_operand_lossage ("invalid operand for code '%c'", code);
22165 return;
22168 else
22170 static const char * wc_reg_names [16] =
22172 "wCID", "wCon", "wCSSF", "wCASF",
22173 "wC4", "wC5", "wC6", "wC7",
22174 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
22175 "wC12", "wC13", "wC14", "wC15"
22178 fputs (wc_reg_names [INTVAL (x)], stream);
22180 return;
22182 /* Print the high single-precision register of a VFP double-precision
22183 register. */
22184 case 'p':
22186 machine_mode mode = GET_MODE (x);
22187 int regno;
22189 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
22191 output_operand_lossage ("invalid operand for code '%c'", code);
22192 return;
22195 regno = REGNO (x);
22196 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
22198 output_operand_lossage ("invalid operand for code '%c'", code);
22199 return;
22202 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
22204 return;
22206 /* Print a VFP/Neon double precision or quad precision register name. */
22207 case 'P':
22208 case 'q':
22210 machine_mode mode = GET_MODE (x);
22211 int is_quad = (code == 'q');
22212 int regno;
22214 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
22216 output_operand_lossage ("invalid operand for code '%c'", code);
22217 return;
22220 if (!REG_P (x)
22221 || !IS_VFP_REGNUM (REGNO (x)))
22223 output_operand_lossage ("invalid operand for code '%c'", code);
22224 return;
22227 regno = REGNO (x);
22228 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
22229 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
22231 output_operand_lossage ("invalid operand for code '%c'", code);
22232 return;
22235 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
22236 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
22238 return;
22240 /* These two codes print the low/high doubleword register of a Neon quad
22241 register, respectively. For pair-structure types, can also print
22242 low/high quadword registers. */
22243 case 'e':
22244 case 'f':
22246 machine_mode mode = GET_MODE (x);
22247 int regno;
22249 if ((GET_MODE_SIZE (mode) != 16
22250 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
22252 output_operand_lossage ("invalid operand for code '%c'", code);
22253 return;
22256 regno = REGNO (x);
22257 if (!NEON_REGNO_OK_FOR_QUAD (regno))
22259 output_operand_lossage ("invalid operand for code '%c'", code);
22260 return;
22263 if (GET_MODE_SIZE (mode) == 16)
22264 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
22265 + (code == 'f' ? 1 : 0));
22266 else
22267 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
22268 + (code == 'f' ? 1 : 0));
22270 return;
22272 /* Print a VFPv3 floating-point constant, represented as an integer
22273 index. */
22274 case 'G':
22276 int index = vfp3_const_double_index (x);
22277 gcc_assert (index != -1);
22278 fprintf (stream, "%d", index);
22280 return;
22282 /* Print bits representing opcode features for Neon.
22284 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
22285 and polynomials as unsigned.
22287 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22289 Bit 2 is 1 for rounding functions, 0 otherwise. */
22291 /* Identify the type as 's', 'u', 'p' or 'f'. */
22292 case 'T':
22294 HOST_WIDE_INT bits = INTVAL (x);
22295 fputc ("uspf"[bits & 3], stream);
22297 return;
22299 /* Likewise, but signed and unsigned integers are both 'i'. */
22300 case 'F':
22302 HOST_WIDE_INT bits = INTVAL (x);
22303 fputc ("iipf"[bits & 3], stream);
22305 return;
22307 /* As for 'T', but emit 'u' instead of 'p'. */
22308 case 't':
22310 HOST_WIDE_INT bits = INTVAL (x);
22311 fputc ("usuf"[bits & 3], stream);
22313 return;
22315 /* Bit 2: rounding (vs none). */
22316 case 'O':
22318 HOST_WIDE_INT bits = INTVAL (x);
22319 fputs ((bits & 4) != 0 ? "r" : "", stream);
22321 return;
22323 /* Memory operand for vld1/vst1 instruction. */
22324 case 'A':
22326 rtx addr;
22327 bool postinc = FALSE;
22328 rtx postinc_reg = NULL;
22329 unsigned align, memsize, align_bits;
22331 gcc_assert (MEM_P (x));
22332 addr = XEXP (x, 0);
22333 if (GET_CODE (addr) == POST_INC)
22335 postinc = 1;
22336 addr = XEXP (addr, 0);
22338 if (GET_CODE (addr) == POST_MODIFY)
22340 postinc_reg = XEXP( XEXP (addr, 1), 1);
22341 addr = XEXP (addr, 0);
22343 asm_fprintf (stream, "[%r", REGNO (addr));
22345 /* We know the alignment of this access, so we can emit a hint in the
22346 instruction (for some alignments) as an aid to the memory subsystem
22347 of the target. */
22348 align = MEM_ALIGN (x) >> 3;
22349 memsize = MEM_SIZE (x);
22351 /* Only certain alignment specifiers are supported by the hardware. */
22352 if (memsize == 32 && (align % 32) == 0)
22353 align_bits = 256;
22354 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
22355 align_bits = 128;
22356 else if (memsize >= 8 && (align % 8) == 0)
22357 align_bits = 64;
22358 else
22359 align_bits = 0;
22361 if (align_bits != 0)
22362 asm_fprintf (stream, ":%d", align_bits);
22364 asm_fprintf (stream, "]");
22366 if (postinc)
22367 fputs("!", stream);
22368 if (postinc_reg)
22369 asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22371 return;
22373 case 'C':
22375 rtx addr;
22377 gcc_assert (MEM_P (x));
22378 addr = XEXP (x, 0);
22379 gcc_assert (REG_P (addr));
22380 asm_fprintf (stream, "[%r]", REGNO (addr));
22382 return;
22384 /* Translate an S register number into a D register number and element index. */
22385 case 'y':
22387 machine_mode mode = GET_MODE (x);
22388 int regno;
22390 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22392 output_operand_lossage ("invalid operand for code '%c'", code);
22393 return;
22396 regno = REGNO (x);
22397 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22399 output_operand_lossage ("invalid operand for code '%c'", code);
22400 return;
22403 regno = regno - FIRST_VFP_REGNUM;
22404 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22406 return;
22408 case 'v':
22409 gcc_assert (CONST_DOUBLE_P (x));
22410 int result;
22411 result = vfp3_const_double_for_fract_bits (x);
22412 if (result == 0)
22413 result = vfp3_const_double_for_bits (x);
22414 fprintf (stream, "#%d", result);
22415 return;
22417 /* Register specifier for vld1.16/vst1.16. Translate the S register
22418 number into a D register number and element index. */
22419 case 'z':
22421 machine_mode mode = GET_MODE (x);
22422 int regno;
22424 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22426 output_operand_lossage ("invalid operand for code '%c'", code);
22427 return;
22430 regno = REGNO (x);
22431 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22433 output_operand_lossage ("invalid operand for code '%c'", code);
22434 return;
22437 regno = regno - FIRST_VFP_REGNUM;
22438 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22440 return;
22442 default:
22443 if (x == 0)
22445 output_operand_lossage ("missing operand");
22446 return;
22449 switch (GET_CODE (x))
22451 case REG:
22452 asm_fprintf (stream, "%r", REGNO (x));
22453 break;
22455 case MEM:
22456 output_address (GET_MODE (x), XEXP (x, 0));
22457 break;
22459 case CONST_DOUBLE:
22461 char fpstr[20];
22462 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22463 sizeof (fpstr), 0, 1);
22464 fprintf (stream, "#%s", fpstr);
22466 break;
22468 default:
22469 gcc_assert (GET_CODE (x) != NEG);
22470 fputc ('#', stream);
22471 if (GET_CODE (x) == HIGH)
22473 fputs (":lower16:", stream);
22474 x = XEXP (x, 0);
22477 output_addr_const (stream, x);
22478 break;
22483 /* Target hook for printing a memory address. */
22484 static void
22485 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
22487 if (TARGET_32BIT)
22489 int is_minus = GET_CODE (x) == MINUS;
22491 if (REG_P (x))
22492 asm_fprintf (stream, "[%r]", REGNO (x));
22493 else if (GET_CODE (x) == PLUS || is_minus)
22495 rtx base = XEXP (x, 0);
22496 rtx index = XEXP (x, 1);
22497 HOST_WIDE_INT offset = 0;
22498 if (!REG_P (base)
22499 || (REG_P (index) && REGNO (index) == SP_REGNUM))
22501 /* Ensure that BASE is a register. */
22502 /* (one of them must be). */
22503 /* Also ensure the SP is not used as in index register. */
22504 std::swap (base, index);
22506 switch (GET_CODE (index))
22508 case CONST_INT:
22509 offset = INTVAL (index);
22510 if (is_minus)
22511 offset = -offset;
22512 asm_fprintf (stream, "[%r, #%wd]",
22513 REGNO (base), offset);
22514 break;
22516 case REG:
22517 asm_fprintf (stream, "[%r, %s%r]",
22518 REGNO (base), is_minus ? "-" : "",
22519 REGNO (index));
22520 break;
22522 case MULT:
22523 case ASHIFTRT:
22524 case LSHIFTRT:
22525 case ASHIFT:
22526 case ROTATERT:
22528 asm_fprintf (stream, "[%r, %s%r",
22529 REGNO (base), is_minus ? "-" : "",
22530 REGNO (XEXP (index, 0)));
22531 arm_print_operand (stream, index, 'S');
22532 fputs ("]", stream);
22533 break;
22536 default:
22537 gcc_unreachable ();
22540 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22541 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22543 gcc_assert (REG_P (XEXP (x, 0)));
22545 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22546 asm_fprintf (stream, "[%r, #%s%d]!",
22547 REGNO (XEXP (x, 0)),
22548 GET_CODE (x) == PRE_DEC ? "-" : "",
22549 GET_MODE_SIZE (mode));
22550 else
22551 asm_fprintf (stream, "[%r], #%s%d",
22552 REGNO (XEXP (x, 0)),
22553 GET_CODE (x) == POST_DEC ? "-" : "",
22554 GET_MODE_SIZE (mode));
22556 else if (GET_CODE (x) == PRE_MODIFY)
22558 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22559 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22560 asm_fprintf (stream, "#%wd]!",
22561 INTVAL (XEXP (XEXP (x, 1), 1)));
22562 else
22563 asm_fprintf (stream, "%r]!",
22564 REGNO (XEXP (XEXP (x, 1), 1)));
22566 else if (GET_CODE (x) == POST_MODIFY)
22568 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22569 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22570 asm_fprintf (stream, "#%wd",
22571 INTVAL (XEXP (XEXP (x, 1), 1)));
22572 else
22573 asm_fprintf (stream, "%r",
22574 REGNO (XEXP (XEXP (x, 1), 1)));
22576 else output_addr_const (stream, x);
22578 else
22580 if (REG_P (x))
22581 asm_fprintf (stream, "[%r]", REGNO (x));
22582 else if (GET_CODE (x) == POST_INC)
22583 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22584 else if (GET_CODE (x) == PLUS)
22586 gcc_assert (REG_P (XEXP (x, 0)));
22587 if (CONST_INT_P (XEXP (x, 1)))
22588 asm_fprintf (stream, "[%r, #%wd]",
22589 REGNO (XEXP (x, 0)),
22590 INTVAL (XEXP (x, 1)));
22591 else
22592 asm_fprintf (stream, "[%r, %r]",
22593 REGNO (XEXP (x, 0)),
22594 REGNO (XEXP (x, 1)));
22596 else
22597 output_addr_const (stream, x);
22601 /* Target hook for indicating whether a punctuation character for
22602 TARGET_PRINT_OPERAND is valid. */
22603 static bool
22604 arm_print_operand_punct_valid_p (unsigned char code)
22606 return (code == '@' || code == '|' || code == '.'
22607 || code == '(' || code == ')' || code == '#'
22608 || (TARGET_32BIT && (code == '?'))
22609 || (TARGET_THUMB2 && (code == '!'))
22610 || (TARGET_THUMB && (code == '_')));
22613 /* Target hook for assembling integer objects. The ARM version needs to
22614 handle word-sized values specially. */
22615 static bool
22616 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22618 machine_mode mode;
22620 if (size == UNITS_PER_WORD && aligned_p)
22622 fputs ("\t.word\t", asm_out_file);
22623 output_addr_const (asm_out_file, x);
22625 /* Mark symbols as position independent. We only do this in the
22626 .text segment, not in the .data segment. */
22627 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22628 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22630 /* See legitimize_pic_address for an explanation of the
22631 TARGET_VXWORKS_RTP check. */
22632 /* References to weak symbols cannot be resolved locally:
22633 they may be overridden by a non-weak definition at link
22634 time. */
22635 if (!arm_pic_data_is_text_relative
22636 || (GET_CODE (x) == SYMBOL_REF
22637 && (!SYMBOL_REF_LOCAL_P (x)
22638 || (SYMBOL_REF_DECL (x)
22639 ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0))))
22640 fputs ("(GOT)", asm_out_file);
22641 else
22642 fputs ("(GOTOFF)", asm_out_file);
22644 fputc ('\n', asm_out_file);
22645 return true;
22648 mode = GET_MODE (x);
22650 if (arm_vector_mode_supported_p (mode))
22652 int i, units;
22654 gcc_assert (GET_CODE (x) == CONST_VECTOR);
22656 units = CONST_VECTOR_NUNITS (x);
22657 size = GET_MODE_UNIT_SIZE (mode);
22659 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22660 for (i = 0; i < units; i++)
22662 rtx elt = CONST_VECTOR_ELT (x, i);
22663 assemble_integer
22664 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22666 else
22667 for (i = 0; i < units; i++)
22669 rtx elt = CONST_VECTOR_ELT (x, i);
22670 assemble_real
22671 (*CONST_DOUBLE_REAL_VALUE (elt),
22672 as_a <scalar_float_mode> (GET_MODE_INNER (mode)),
22673 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22676 return true;
22679 return default_assemble_integer (x, size, aligned_p);
22682 static void
22683 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22685 section *s;
22687 if (!TARGET_AAPCS_BASED)
22689 (is_ctor ?
22690 default_named_section_asm_out_constructor
22691 : default_named_section_asm_out_destructor) (symbol, priority);
22692 return;
22695 /* Put these in the .init_array section, using a special relocation. */
22696 if (priority != DEFAULT_INIT_PRIORITY)
22698 char buf[18];
22699 sprintf (buf, "%s.%.5u",
22700 is_ctor ? ".init_array" : ".fini_array",
22701 priority);
22702 s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL_TREE);
22704 else if (is_ctor)
22705 s = ctors_section;
22706 else
22707 s = dtors_section;
22709 switch_to_section (s);
22710 assemble_align (POINTER_SIZE);
22711 fputs ("\t.word\t", asm_out_file);
22712 output_addr_const (asm_out_file, symbol);
22713 fputs ("(target1)\n", asm_out_file);
22716 /* Add a function to the list of static constructors. */
22718 static void
22719 arm_elf_asm_constructor (rtx symbol, int priority)
22721 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22724 /* Add a function to the list of static destructors. */
22726 static void
22727 arm_elf_asm_destructor (rtx symbol, int priority)
22729 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22732 /* A finite state machine takes care of noticing whether or not instructions
22733 can be conditionally executed, and thus decrease execution time and code
22734 size by deleting branch instructions. The fsm is controlled by
22735 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22737 /* The state of the fsm controlling condition codes are:
22738 0: normal, do nothing special
22739 1: make ASM_OUTPUT_OPCODE not output this instruction
22740 2: make ASM_OUTPUT_OPCODE not output this instruction
22741 3: make instructions conditional
22742 4: make instructions conditional
22744 State transitions (state->state by whom under condition):
22745 0 -> 1 final_prescan_insn if the `target' is a label
22746 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22747 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22748 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22749 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22750 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22751 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22752 (the target insn is arm_target_insn).
22754 If the jump clobbers the conditions then we use states 2 and 4.
22756 A similar thing can be done with conditional return insns.
22758 XXX In case the `target' is an unconditional branch, this conditionalising
22759 of the instructions always reduces code size, but not always execution
22760 time. But then, I want to reduce the code size to somewhere near what
22761 /bin/cc produces. */
22763 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22764 instructions. When a COND_EXEC instruction is seen the subsequent
22765 instructions are scanned so that multiple conditional instructions can be
22766 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22767 specify the length and true/false mask for the IT block. These will be
22768 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22770 /* Returns the index of the ARM condition code string in
22771 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22772 COMPARISON should be an rtx like `(eq (...) (...))'. */
22774 enum arm_cond_code
22775 maybe_get_arm_condition_code (rtx comparison)
22777 machine_mode mode = GET_MODE (XEXP (comparison, 0));
22778 enum arm_cond_code code;
22779 enum rtx_code comp_code = GET_CODE (comparison);
22781 if (GET_MODE_CLASS (mode) != MODE_CC)
22782 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
22783 XEXP (comparison, 1));
22785 switch (mode)
22787 case E_CC_DNEmode: code = ARM_NE; goto dominance;
22788 case E_CC_DEQmode: code = ARM_EQ; goto dominance;
22789 case E_CC_DGEmode: code = ARM_GE; goto dominance;
22790 case E_CC_DGTmode: code = ARM_GT; goto dominance;
22791 case E_CC_DLEmode: code = ARM_LE; goto dominance;
22792 case E_CC_DLTmode: code = ARM_LT; goto dominance;
22793 case E_CC_DGEUmode: code = ARM_CS; goto dominance;
22794 case E_CC_DGTUmode: code = ARM_HI; goto dominance;
22795 case E_CC_DLEUmode: code = ARM_LS; goto dominance;
22796 case E_CC_DLTUmode: code = ARM_CC;
22798 dominance:
22799 if (comp_code == EQ)
22800 return ARM_INVERSE_CONDITION_CODE (code);
22801 if (comp_code == NE)
22802 return code;
22803 return ARM_NV;
22805 case E_CC_NOOVmode:
22806 switch (comp_code)
22808 case NE: return ARM_NE;
22809 case EQ: return ARM_EQ;
22810 case GE: return ARM_PL;
22811 case LT: return ARM_MI;
22812 default: return ARM_NV;
22815 case E_CC_Zmode:
22816 switch (comp_code)
22818 case NE: return ARM_NE;
22819 case EQ: return ARM_EQ;
22820 default: return ARM_NV;
22823 case E_CC_Nmode:
22824 switch (comp_code)
22826 case NE: return ARM_MI;
22827 case EQ: return ARM_PL;
22828 default: return ARM_NV;
22831 case E_CCFPEmode:
22832 case E_CCFPmode:
22833 /* We can handle all cases except UNEQ and LTGT. */
22834 switch (comp_code)
22836 case GE: return ARM_GE;
22837 case GT: return ARM_GT;
22838 case LE: return ARM_LS;
22839 case LT: return ARM_MI;
22840 case NE: return ARM_NE;
22841 case EQ: return ARM_EQ;
22842 case ORDERED: return ARM_VC;
22843 case UNORDERED: return ARM_VS;
22844 case UNLT: return ARM_LT;
22845 case UNLE: return ARM_LE;
22846 case UNGT: return ARM_HI;
22847 case UNGE: return ARM_PL;
22848 /* UNEQ and LTGT do not have a representation. */
22849 case UNEQ: /* Fall through. */
22850 case LTGT: /* Fall through. */
22851 default: return ARM_NV;
22854 case E_CC_SWPmode:
22855 switch (comp_code)
22857 case NE: return ARM_NE;
22858 case EQ: return ARM_EQ;
22859 case GE: return ARM_LE;
22860 case GT: return ARM_LT;
22861 case LE: return ARM_GE;
22862 case LT: return ARM_GT;
22863 case GEU: return ARM_LS;
22864 case GTU: return ARM_CC;
22865 case LEU: return ARM_CS;
22866 case LTU: return ARM_HI;
22867 default: return ARM_NV;
22870 case E_CC_Cmode:
22871 switch (comp_code)
22873 case LTU: return ARM_CS;
22874 case GEU: return ARM_CC;
22875 case NE: return ARM_CS;
22876 case EQ: return ARM_CC;
22877 default: return ARM_NV;
22880 case E_CC_CZmode:
22881 switch (comp_code)
22883 case NE: return ARM_NE;
22884 case EQ: return ARM_EQ;
22885 case GEU: return ARM_CS;
22886 case GTU: return ARM_HI;
22887 case LEU: return ARM_LS;
22888 case LTU: return ARM_CC;
22889 default: return ARM_NV;
22892 case E_CC_NCVmode:
22893 switch (comp_code)
22895 case GE: return ARM_GE;
22896 case LT: return ARM_LT;
22897 case GEU: return ARM_CS;
22898 case LTU: return ARM_CC;
22899 default: return ARM_NV;
22902 case E_CC_Vmode:
22903 switch (comp_code)
22905 case NE: return ARM_VS;
22906 case EQ: return ARM_VC;
22907 default: return ARM_NV;
22910 case E_CCmode:
22911 switch (comp_code)
22913 case NE: return ARM_NE;
22914 case EQ: return ARM_EQ;
22915 case GE: return ARM_GE;
22916 case GT: return ARM_GT;
22917 case LE: return ARM_LE;
22918 case LT: return ARM_LT;
22919 case GEU: return ARM_CS;
22920 case GTU: return ARM_HI;
22921 case LEU: return ARM_LS;
22922 case LTU: return ARM_CC;
22923 default: return ARM_NV;
22926 default: gcc_unreachable ();
22930 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22931 static enum arm_cond_code
22932 get_arm_condition_code (rtx comparison)
22934 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
22935 gcc_assert (code != ARM_NV);
22936 return code;
22939 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. We only have condition
22940 code registers when not targetting Thumb1. The VFP condition register
22941 only exists when generating hard-float code. */
22942 static bool
22943 arm_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
22945 if (!TARGET_32BIT)
22946 return false;
22948 *p1 = CC_REGNUM;
22949 *p2 = TARGET_HARD_FLOAT ? VFPCC_REGNUM : INVALID_REGNUM;
22950 return true;
22953 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22954 instructions. */
22955 void
22956 thumb2_final_prescan_insn (rtx_insn *insn)
22958 rtx_insn *first_insn = insn;
22959 rtx body = PATTERN (insn);
22960 rtx predicate;
22961 enum arm_cond_code code;
22962 int n;
22963 int mask;
22964 int max;
22966 /* max_insns_skipped in the tune was already taken into account in the
22967 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
22968 just emit the IT blocks as we can. It does not make sense to split
22969 the IT blocks. */
22970 max = MAX_INSN_PER_IT_BLOCK;
22972 /* Remove the previous insn from the count of insns to be output. */
22973 if (arm_condexec_count)
22974 arm_condexec_count--;
22976 /* Nothing to do if we are already inside a conditional block. */
22977 if (arm_condexec_count)
22978 return;
22980 if (GET_CODE (body) != COND_EXEC)
22981 return;
22983 /* Conditional jumps are implemented directly. */
22984 if (JUMP_P (insn))
22985 return;
22987 predicate = COND_EXEC_TEST (body);
22988 arm_current_cc = get_arm_condition_code (predicate);
22990 n = get_attr_ce_count (insn);
22991 arm_condexec_count = 1;
22992 arm_condexec_mask = (1 << n) - 1;
22993 arm_condexec_masklen = n;
22994 /* See if subsequent instructions can be combined into the same block. */
22995 for (;;)
22997 insn = next_nonnote_insn (insn);
22999 /* Jumping into the middle of an IT block is illegal, so a label or
23000 barrier terminates the block. */
23001 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
23002 break;
23004 body = PATTERN (insn);
23005 /* USE and CLOBBER aren't really insns, so just skip them. */
23006 if (GET_CODE (body) == USE
23007 || GET_CODE (body) == CLOBBER)
23008 continue;
23010 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
23011 if (GET_CODE (body) != COND_EXEC)
23012 break;
23013 /* Maximum number of conditionally executed instructions in a block. */
23014 n = get_attr_ce_count (insn);
23015 if (arm_condexec_masklen + n > max)
23016 break;
23018 predicate = COND_EXEC_TEST (body);
23019 code = get_arm_condition_code (predicate);
23020 mask = (1 << n) - 1;
23021 if (arm_current_cc == code)
23022 arm_condexec_mask |= (mask << arm_condexec_masklen);
23023 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
23024 break;
23026 arm_condexec_count++;
23027 arm_condexec_masklen += n;
23029 /* A jump must be the last instruction in a conditional block. */
23030 if (JUMP_P (insn))
23031 break;
23033 /* Restore recog_data (getting the attributes of other insns can
23034 destroy this array, but final.c assumes that it remains intact
23035 across this call). */
23036 extract_constrain_insn_cached (first_insn);
23039 void
23040 arm_final_prescan_insn (rtx_insn *insn)
23042 /* BODY will hold the body of INSN. */
23043 rtx body = PATTERN (insn);
23045 /* This will be 1 if trying to repeat the trick, and things need to be
23046 reversed if it appears to fail. */
23047 int reverse = 0;
23049 /* If we start with a return insn, we only succeed if we find another one. */
23050 int seeking_return = 0;
23051 enum rtx_code return_code = UNKNOWN;
23053 /* START_INSN will hold the insn from where we start looking. This is the
23054 first insn after the following code_label if REVERSE is true. */
23055 rtx_insn *start_insn = insn;
23057 /* If in state 4, check if the target branch is reached, in order to
23058 change back to state 0. */
23059 if (arm_ccfsm_state == 4)
23061 if (insn == arm_target_insn)
23063 arm_target_insn = NULL;
23064 arm_ccfsm_state = 0;
23066 return;
23069 /* If in state 3, it is possible to repeat the trick, if this insn is an
23070 unconditional branch to a label, and immediately following this branch
23071 is the previous target label which is only used once, and the label this
23072 branch jumps to is not too far off. */
23073 if (arm_ccfsm_state == 3)
23075 if (simplejump_p (insn))
23077 start_insn = next_nonnote_insn (start_insn);
23078 if (BARRIER_P (start_insn))
23080 /* XXX Isn't this always a barrier? */
23081 start_insn = next_nonnote_insn (start_insn);
23083 if (LABEL_P (start_insn)
23084 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23085 && LABEL_NUSES (start_insn) == 1)
23086 reverse = TRUE;
23087 else
23088 return;
23090 else if (ANY_RETURN_P (body))
23092 start_insn = next_nonnote_insn (start_insn);
23093 if (BARRIER_P (start_insn))
23094 start_insn = next_nonnote_insn (start_insn);
23095 if (LABEL_P (start_insn)
23096 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23097 && LABEL_NUSES (start_insn) == 1)
23099 reverse = TRUE;
23100 seeking_return = 1;
23101 return_code = GET_CODE (body);
23103 else
23104 return;
23106 else
23107 return;
23110 gcc_assert (!arm_ccfsm_state || reverse);
23111 if (!JUMP_P (insn))
23112 return;
23114 /* This jump might be paralleled with a clobber of the condition codes
23115 the jump should always come first */
23116 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
23117 body = XVECEXP (body, 0, 0);
23119 if (reverse
23120 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
23121 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
23123 int insns_skipped;
23124 int fail = FALSE, succeed = FALSE;
23125 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
23126 int then_not_else = TRUE;
23127 rtx_insn *this_insn = start_insn;
23128 rtx label = 0;
23130 /* Register the insn jumped to. */
23131 if (reverse)
23133 if (!seeking_return)
23134 label = XEXP (SET_SRC (body), 0);
23136 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
23137 label = XEXP (XEXP (SET_SRC (body), 1), 0);
23138 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
23140 label = XEXP (XEXP (SET_SRC (body), 2), 0);
23141 then_not_else = FALSE;
23143 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
23145 seeking_return = 1;
23146 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
23148 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
23150 seeking_return = 1;
23151 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
23152 then_not_else = FALSE;
23154 else
23155 gcc_unreachable ();
23157 /* See how many insns this branch skips, and what kind of insns. If all
23158 insns are okay, and the label or unconditional branch to the same
23159 label is not too far away, succeed. */
23160 for (insns_skipped = 0;
23161 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
23163 rtx scanbody;
23165 this_insn = next_nonnote_insn (this_insn);
23166 if (!this_insn)
23167 break;
23169 switch (GET_CODE (this_insn))
23171 case CODE_LABEL:
23172 /* Succeed if it is the target label, otherwise fail since
23173 control falls in from somewhere else. */
23174 if (this_insn == label)
23176 arm_ccfsm_state = 1;
23177 succeed = TRUE;
23179 else
23180 fail = TRUE;
23181 break;
23183 case BARRIER:
23184 /* Succeed if the following insn is the target label.
23185 Otherwise fail.
23186 If return insns are used then the last insn in a function
23187 will be a barrier. */
23188 this_insn = next_nonnote_insn (this_insn);
23189 if (this_insn && this_insn == label)
23191 arm_ccfsm_state = 1;
23192 succeed = TRUE;
23194 else
23195 fail = TRUE;
23196 break;
23198 case CALL_INSN:
23199 /* The AAPCS says that conditional calls should not be
23200 used since they make interworking inefficient (the
23201 linker can't transform BL<cond> into BLX). That's
23202 only a problem if the machine has BLX. */
23203 if (arm_arch5)
23205 fail = TRUE;
23206 break;
23209 /* Succeed if the following insn is the target label, or
23210 if the following two insns are a barrier and the
23211 target label. */
23212 this_insn = next_nonnote_insn (this_insn);
23213 if (this_insn && BARRIER_P (this_insn))
23214 this_insn = next_nonnote_insn (this_insn);
23216 if (this_insn && this_insn == label
23217 && insns_skipped < max_insns_skipped)
23219 arm_ccfsm_state = 1;
23220 succeed = TRUE;
23222 else
23223 fail = TRUE;
23224 break;
23226 case JUMP_INSN:
23227 /* If this is an unconditional branch to the same label, succeed.
23228 If it is to another label, do nothing. If it is conditional,
23229 fail. */
23230 /* XXX Probably, the tests for SET and the PC are
23231 unnecessary. */
23233 scanbody = PATTERN (this_insn);
23234 if (GET_CODE (scanbody) == SET
23235 && GET_CODE (SET_DEST (scanbody)) == PC)
23237 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
23238 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
23240 arm_ccfsm_state = 2;
23241 succeed = TRUE;
23243 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
23244 fail = TRUE;
23246 /* Fail if a conditional return is undesirable (e.g. on a
23247 StrongARM), but still allow this if optimizing for size. */
23248 else if (GET_CODE (scanbody) == return_code
23249 && !use_return_insn (TRUE, NULL)
23250 && !optimize_size)
23251 fail = TRUE;
23252 else if (GET_CODE (scanbody) == return_code)
23254 arm_ccfsm_state = 2;
23255 succeed = TRUE;
23257 else if (GET_CODE (scanbody) == PARALLEL)
23259 switch (get_attr_conds (this_insn))
23261 case CONDS_NOCOND:
23262 break;
23263 default:
23264 fail = TRUE;
23265 break;
23268 else
23269 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
23271 break;
23273 case INSN:
23274 /* Instructions using or affecting the condition codes make it
23275 fail. */
23276 scanbody = PATTERN (this_insn);
23277 if (!(GET_CODE (scanbody) == SET
23278 || GET_CODE (scanbody) == PARALLEL)
23279 || get_attr_conds (this_insn) != CONDS_NOCOND)
23280 fail = TRUE;
23281 break;
23283 default:
23284 break;
23287 if (succeed)
23289 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
23290 arm_target_label = CODE_LABEL_NUMBER (label);
23291 else
23293 gcc_assert (seeking_return || arm_ccfsm_state == 2);
23295 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
23297 this_insn = next_nonnote_insn (this_insn);
23298 gcc_assert (!this_insn
23299 || (!BARRIER_P (this_insn)
23300 && !LABEL_P (this_insn)));
23302 if (!this_insn)
23304 /* Oh, dear! we ran off the end.. give up. */
23305 extract_constrain_insn_cached (insn);
23306 arm_ccfsm_state = 0;
23307 arm_target_insn = NULL;
23308 return;
23310 arm_target_insn = this_insn;
23313 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23314 what it was. */
23315 if (!reverse)
23316 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
23318 if (reverse || then_not_else)
23319 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
23322 /* Restore recog_data (getting the attributes of other insns can
23323 destroy this array, but final.c assumes that it remains intact
23324 across this call. */
23325 extract_constrain_insn_cached (insn);
23329 /* Output IT instructions. */
23330 void
23331 thumb2_asm_output_opcode (FILE * stream)
23333 char buff[5];
23334 int n;
23336 if (arm_condexec_mask)
23338 for (n = 0; n < arm_condexec_masklen; n++)
23339 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
23340 buff[n] = 0;
23341 asm_fprintf(stream, "i%s\t%s\n\t", buff,
23342 arm_condition_codes[arm_current_cc]);
23343 arm_condexec_mask = 0;
23347 /* Returns true if REGNO is a valid register
23348 for holding a quantity of type MODE. */
23350 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
23352 if (GET_MODE_CLASS (mode) == MODE_CC)
23353 return (regno == CC_REGNUM
23354 || (TARGET_HARD_FLOAT
23355 && regno == VFPCC_REGNUM));
23357 if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
23358 return false;
23360 if (TARGET_THUMB1)
23361 /* For the Thumb we only allow values bigger than SImode in
23362 registers 0 - 6, so that there is always a second low
23363 register available to hold the upper part of the value.
23364 We probably we ought to ensure that the register is the
23365 start of an even numbered register pair. */
23366 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
23368 if (TARGET_HARD_FLOAT && IS_VFP_REGNUM (regno))
23370 if (mode == SFmode || mode == SImode)
23371 return VFP_REGNO_OK_FOR_SINGLE (regno);
23373 if (mode == DFmode)
23374 return VFP_REGNO_OK_FOR_DOUBLE (regno);
23376 if (mode == HFmode)
23377 return VFP_REGNO_OK_FOR_SINGLE (regno);
23379 /* VFP registers can hold HImode values. */
23380 if (mode == HImode)
23381 return VFP_REGNO_OK_FOR_SINGLE (regno);
23383 if (TARGET_NEON)
23384 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
23385 || (VALID_NEON_QREG_MODE (mode)
23386 && NEON_REGNO_OK_FOR_QUAD (regno))
23387 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
23388 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
23389 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
23390 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
23391 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23393 return FALSE;
23396 if (TARGET_REALLY_IWMMXT)
23398 if (IS_IWMMXT_GR_REGNUM (regno))
23399 return mode == SImode;
23401 if (IS_IWMMXT_REGNUM (regno))
23402 return VALID_IWMMXT_REG_MODE (mode);
23405 /* We allow almost any value to be stored in the general registers.
23406 Restrict doubleword quantities to even register pairs in ARM state
23407 so that we can use ldrd. Do not allow very large Neon structure
23408 opaque modes in general registers; they would use too many. */
23409 if (regno <= LAST_ARM_REGNUM)
23411 if (ARM_NUM_REGS (mode) > 4)
23412 return FALSE;
23414 if (TARGET_THUMB2)
23415 return TRUE;
23417 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23420 if (regno == FRAME_POINTER_REGNUM
23421 || regno == ARG_POINTER_REGNUM)
23422 /* We only allow integers in the fake hard registers. */
23423 return GET_MODE_CLASS (mode) == MODE_INT;
23425 return FALSE;
23428 /* Implement MODES_TIEABLE_P. */
23430 bool
23431 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
23433 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23434 return true;
23436 /* We specifically want to allow elements of "structure" modes to
23437 be tieable to the structure. This more general condition allows
23438 other rarer situations too. */
23439 if (TARGET_NEON
23440 && (VALID_NEON_DREG_MODE (mode1)
23441 || VALID_NEON_QREG_MODE (mode1)
23442 || VALID_NEON_STRUCT_MODE (mode1))
23443 && (VALID_NEON_DREG_MODE (mode2)
23444 || VALID_NEON_QREG_MODE (mode2)
23445 || VALID_NEON_STRUCT_MODE (mode2)))
23446 return true;
23448 return false;
23451 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23452 not used in arm mode. */
23454 enum reg_class
23455 arm_regno_class (int regno)
23457 if (regno == PC_REGNUM)
23458 return NO_REGS;
23460 if (TARGET_THUMB1)
23462 if (regno == STACK_POINTER_REGNUM)
23463 return STACK_REG;
23464 if (regno == CC_REGNUM)
23465 return CC_REG;
23466 if (regno < 8)
23467 return LO_REGS;
23468 return HI_REGS;
23471 if (TARGET_THUMB2 && regno < 8)
23472 return LO_REGS;
23474 if ( regno <= LAST_ARM_REGNUM
23475 || regno == FRAME_POINTER_REGNUM
23476 || regno == ARG_POINTER_REGNUM)
23477 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23479 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23480 return TARGET_THUMB2 ? CC_REG : NO_REGS;
23482 if (IS_VFP_REGNUM (regno))
23484 if (regno <= D7_VFP_REGNUM)
23485 return VFP_D0_D7_REGS;
23486 else if (regno <= LAST_LO_VFP_REGNUM)
23487 return VFP_LO_REGS;
23488 else
23489 return VFP_HI_REGS;
23492 if (IS_IWMMXT_REGNUM (regno))
23493 return IWMMXT_REGS;
23495 if (IS_IWMMXT_GR_REGNUM (regno))
23496 return IWMMXT_GR_REGS;
23498 return NO_REGS;
23501 /* Handle a special case when computing the offset
23502 of an argument from the frame pointer. */
23504 arm_debugger_arg_offset (int value, rtx addr)
23506 rtx_insn *insn;
23508 /* We are only interested if dbxout_parms() failed to compute the offset. */
23509 if (value != 0)
23510 return 0;
23512 /* We can only cope with the case where the address is held in a register. */
23513 if (!REG_P (addr))
23514 return 0;
23516 /* If we are using the frame pointer to point at the argument, then
23517 an offset of 0 is correct. */
23518 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23519 return 0;
23521 /* If we are using the stack pointer to point at the
23522 argument, then an offset of 0 is correct. */
23523 /* ??? Check this is consistent with thumb2 frame layout. */
23524 if ((TARGET_THUMB || !frame_pointer_needed)
23525 && REGNO (addr) == SP_REGNUM)
23526 return 0;
23528 /* Oh dear. The argument is pointed to by a register rather
23529 than being held in a register, or being stored at a known
23530 offset from the frame pointer. Since GDB only understands
23531 those two kinds of argument we must translate the address
23532 held in the register into an offset from the frame pointer.
23533 We do this by searching through the insns for the function
23534 looking to see where this register gets its value. If the
23535 register is initialized from the frame pointer plus an offset
23536 then we are in luck and we can continue, otherwise we give up.
23538 This code is exercised by producing debugging information
23539 for a function with arguments like this:
23541 double func (double a, double b, int c, double d) {return d;}
23543 Without this code the stab for parameter 'd' will be set to
23544 an offset of 0 from the frame pointer, rather than 8. */
23546 /* The if() statement says:
23548 If the insn is a normal instruction
23549 and if the insn is setting the value in a register
23550 and if the register being set is the register holding the address of the argument
23551 and if the address is computing by an addition
23552 that involves adding to a register
23553 which is the frame pointer
23554 a constant integer
23556 then... */
23558 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23560 if ( NONJUMP_INSN_P (insn)
23561 && GET_CODE (PATTERN (insn)) == SET
23562 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23563 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23564 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23565 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23566 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23569 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23571 break;
23575 if (value == 0)
23577 debug_rtx (addr);
23578 warning (0, "unable to compute real location of stacked parameter");
23579 value = 8; /* XXX magic hack */
23582 return value;
23585 /* Implement TARGET_PROMOTED_TYPE. */
23587 static tree
23588 arm_promoted_type (const_tree t)
23590 if (SCALAR_FLOAT_TYPE_P (t)
23591 && TYPE_PRECISION (t) == 16
23592 && TYPE_MAIN_VARIANT (t) == arm_fp16_type_node)
23593 return float_type_node;
23594 return NULL_TREE;
23597 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23598 This simply adds HFmode as a supported mode; even though we don't
23599 implement arithmetic on this type directly, it's supported by
23600 optabs conversions, much the way the double-word arithmetic is
23601 special-cased in the default hook. */
23603 static bool
23604 arm_scalar_mode_supported_p (scalar_mode mode)
23606 if (mode == HFmode)
23607 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
23608 else if (ALL_FIXED_POINT_MODE_P (mode))
23609 return true;
23610 else
23611 return default_scalar_mode_supported_p (mode);
23614 /* Set the value of FLT_EVAL_METHOD.
23615 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
23617 0: evaluate all operations and constants, whose semantic type has at
23618 most the range and precision of type float, to the range and
23619 precision of float; evaluate all other operations and constants to
23620 the range and precision of the semantic type;
23622 N, where _FloatN is a supported interchange floating type
23623 evaluate all operations and constants, whose semantic type has at
23624 most the range and precision of _FloatN type, to the range and
23625 precision of the _FloatN type; evaluate all other operations and
23626 constants to the range and precision of the semantic type;
23628 If we have the ARMv8.2-A extensions then we support _Float16 in native
23629 precision, so we should set this to 16. Otherwise, we support the type,
23630 but want to evaluate expressions in float precision, so set this to
23631 0. */
23633 static enum flt_eval_method
23634 arm_excess_precision (enum excess_precision_type type)
23636 switch (type)
23638 case EXCESS_PRECISION_TYPE_FAST:
23639 case EXCESS_PRECISION_TYPE_STANDARD:
23640 /* We can calculate either in 16-bit range and precision or
23641 32-bit range and precision. Make that decision based on whether
23642 we have native support for the ARMv8.2-A 16-bit floating-point
23643 instructions or not. */
23644 return (TARGET_VFP_FP16INST
23645 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
23646 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
23647 case EXCESS_PRECISION_TYPE_IMPLICIT:
23648 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
23649 default:
23650 gcc_unreachable ();
23652 return FLT_EVAL_METHOD_UNPREDICTABLE;
23656 /* Implement TARGET_FLOATN_MODE. Make very sure that we don't provide
23657 _Float16 if we are using anything other than ieee format for 16-bit
23658 floating point. Otherwise, punt to the default implementation. */
23659 static opt_scalar_float_mode
23660 arm_floatn_mode (int n, bool extended)
23662 if (!extended && n == 16)
23664 if (arm_fp16_format == ARM_FP16_FORMAT_IEEE)
23665 return HFmode;
23666 return opt_scalar_float_mode ();
23669 return default_floatn_mode (n, extended);
23673 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23674 not to early-clobber SRC registers in the process.
23676 We assume that the operands described by SRC and DEST represent a
23677 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
23678 number of components into which the copy has been decomposed. */
23679 void
23680 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
23682 unsigned int i;
23684 if (!reg_overlap_mentioned_p (operands[0], operands[1])
23685 || REGNO (operands[0]) < REGNO (operands[1]))
23687 for (i = 0; i < count; i++)
23689 operands[2 * i] = dest[i];
23690 operands[2 * i + 1] = src[i];
23693 else
23695 for (i = 0; i < count; i++)
23697 operands[2 * i] = dest[count - i - 1];
23698 operands[2 * i + 1] = src[count - i - 1];
23703 /* Split operands into moves from op[1] + op[2] into op[0]. */
23705 void
23706 neon_split_vcombine (rtx operands[3])
23708 unsigned int dest = REGNO (operands[0]);
23709 unsigned int src1 = REGNO (operands[1]);
23710 unsigned int src2 = REGNO (operands[2]);
23711 machine_mode halfmode = GET_MODE (operands[1]);
23712 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
23713 rtx destlo, desthi;
23715 if (src1 == dest && src2 == dest + halfregs)
23717 /* No-op move. Can't split to nothing; emit something. */
23718 emit_note (NOTE_INSN_DELETED);
23719 return;
23722 /* Preserve register attributes for variable tracking. */
23723 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
23724 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
23725 GET_MODE_SIZE (halfmode));
23727 /* Special case of reversed high/low parts. Use VSWP. */
23728 if (src2 == dest && src1 == dest + halfregs)
23730 rtx x = gen_rtx_SET (destlo, operands[1]);
23731 rtx y = gen_rtx_SET (desthi, operands[2]);
23732 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
23733 return;
23736 if (!reg_overlap_mentioned_p (operands[2], destlo))
23738 /* Try to avoid unnecessary moves if part of the result
23739 is in the right place already. */
23740 if (src1 != dest)
23741 emit_move_insn (destlo, operands[1]);
23742 if (src2 != dest + halfregs)
23743 emit_move_insn (desthi, operands[2]);
23745 else
23747 if (src2 != dest + halfregs)
23748 emit_move_insn (desthi, operands[2]);
23749 if (src1 != dest)
23750 emit_move_insn (destlo, operands[1]);
23754 /* Return the number (counting from 0) of
23755 the least significant set bit in MASK. */
23757 inline static int
23758 number_of_first_bit_set (unsigned mask)
23760 return ctz_hwi (mask);
23763 /* Like emit_multi_reg_push, but allowing for a different set of
23764 registers to be described as saved. MASK is the set of registers
23765 to be saved; REAL_REGS is the set of registers to be described as
23766 saved. If REAL_REGS is 0, only describe the stack adjustment. */
23768 static rtx_insn *
23769 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
23771 unsigned long regno;
23772 rtx par[10], tmp, reg;
23773 rtx_insn *insn;
23774 int i, j;
23776 /* Build the parallel of the registers actually being stored. */
23777 for (i = 0; mask; ++i, mask &= mask - 1)
23779 regno = ctz_hwi (mask);
23780 reg = gen_rtx_REG (SImode, regno);
23782 if (i == 0)
23783 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
23784 else
23785 tmp = gen_rtx_USE (VOIDmode, reg);
23787 par[i] = tmp;
23790 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23791 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
23792 tmp = gen_frame_mem (BLKmode, tmp);
23793 tmp = gen_rtx_SET (tmp, par[0]);
23794 par[0] = tmp;
23796 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
23797 insn = emit_insn (tmp);
23799 /* Always build the stack adjustment note for unwind info. */
23800 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23801 tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
23802 par[0] = tmp;
23804 /* Build the parallel of the registers recorded as saved for unwind. */
23805 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
23807 regno = ctz_hwi (real_regs);
23808 reg = gen_rtx_REG (SImode, regno);
23810 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
23811 tmp = gen_frame_mem (SImode, tmp);
23812 tmp = gen_rtx_SET (tmp, reg);
23813 RTX_FRAME_RELATED_P (tmp) = 1;
23814 par[j + 1] = tmp;
23817 if (j == 0)
23818 tmp = par[0];
23819 else
23821 RTX_FRAME_RELATED_P (par[0]) = 1;
23822 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
23825 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
23827 return insn;
23830 /* Emit code to push or pop registers to or from the stack. F is the
23831 assembly file. MASK is the registers to pop. */
23832 static void
23833 thumb_pop (FILE *f, unsigned long mask)
23835 int regno;
23836 int lo_mask = mask & 0xFF;
23838 gcc_assert (mask);
23840 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
23842 /* Special case. Do not generate a POP PC statement here, do it in
23843 thumb_exit() */
23844 thumb_exit (f, -1);
23845 return;
23848 fprintf (f, "\tpop\t{");
23850 /* Look at the low registers first. */
23851 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
23853 if (lo_mask & 1)
23855 asm_fprintf (f, "%r", regno);
23857 if ((lo_mask & ~1) != 0)
23858 fprintf (f, ", ");
23862 if (mask & (1 << PC_REGNUM))
23864 /* Catch popping the PC. */
23865 if (TARGET_INTERWORK || TARGET_BACKTRACE || crtl->calls_eh_return
23866 || IS_CMSE_ENTRY (arm_current_func_type ()))
23868 /* The PC is never poped directly, instead
23869 it is popped into r3 and then BX is used. */
23870 fprintf (f, "}\n");
23872 thumb_exit (f, -1);
23874 return;
23876 else
23878 if (mask & 0xFF)
23879 fprintf (f, ", ");
23881 asm_fprintf (f, "%r", PC_REGNUM);
23885 fprintf (f, "}\n");
23888 /* Generate code to return from a thumb function.
23889 If 'reg_containing_return_addr' is -1, then the return address is
23890 actually on the stack, at the stack pointer. */
23891 static void
23892 thumb_exit (FILE *f, int reg_containing_return_addr)
23894 unsigned regs_available_for_popping;
23895 unsigned regs_to_pop;
23896 int pops_needed;
23897 unsigned available;
23898 unsigned required;
23899 machine_mode mode;
23900 int size;
23901 int restore_a4 = FALSE;
23903 /* Compute the registers we need to pop. */
23904 regs_to_pop = 0;
23905 pops_needed = 0;
23907 if (reg_containing_return_addr == -1)
23909 regs_to_pop |= 1 << LR_REGNUM;
23910 ++pops_needed;
23913 if (TARGET_BACKTRACE)
23915 /* Restore the (ARM) frame pointer and stack pointer. */
23916 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
23917 pops_needed += 2;
23920 /* If there is nothing to pop then just emit the BX instruction and
23921 return. */
23922 if (pops_needed == 0)
23924 if (crtl->calls_eh_return)
23925 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23927 if (IS_CMSE_ENTRY (arm_current_func_type ()))
23929 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n",
23930 reg_containing_return_addr);
23931 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
23933 else
23934 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23935 return;
23937 /* Otherwise if we are not supporting interworking and we have not created
23938 a backtrace structure and the function was not entered in ARM mode then
23939 just pop the return address straight into the PC. */
23940 else if (!TARGET_INTERWORK
23941 && !TARGET_BACKTRACE
23942 && !is_called_in_ARM_mode (current_function_decl)
23943 && !crtl->calls_eh_return
23944 && !IS_CMSE_ENTRY (arm_current_func_type ()))
23946 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
23947 return;
23950 /* Find out how many of the (return) argument registers we can corrupt. */
23951 regs_available_for_popping = 0;
23953 /* If returning via __builtin_eh_return, the bottom three registers
23954 all contain information needed for the return. */
23955 if (crtl->calls_eh_return)
23956 size = 12;
23957 else
23959 /* If we can deduce the registers used from the function's
23960 return value. This is more reliable that examining
23961 df_regs_ever_live_p () because that will be set if the register is
23962 ever used in the function, not just if the register is used
23963 to hold a return value. */
23965 if (crtl->return_rtx != 0)
23966 mode = GET_MODE (crtl->return_rtx);
23967 else
23968 mode = DECL_MODE (DECL_RESULT (current_function_decl));
23970 size = GET_MODE_SIZE (mode);
23972 if (size == 0)
23974 /* In a void function we can use any argument register.
23975 In a function that returns a structure on the stack
23976 we can use the second and third argument registers. */
23977 if (mode == VOIDmode)
23978 regs_available_for_popping =
23979 (1 << ARG_REGISTER (1))
23980 | (1 << ARG_REGISTER (2))
23981 | (1 << ARG_REGISTER (3));
23982 else
23983 regs_available_for_popping =
23984 (1 << ARG_REGISTER (2))
23985 | (1 << ARG_REGISTER (3));
23987 else if (size <= 4)
23988 regs_available_for_popping =
23989 (1 << ARG_REGISTER (2))
23990 | (1 << ARG_REGISTER (3));
23991 else if (size <= 8)
23992 regs_available_for_popping =
23993 (1 << ARG_REGISTER (3));
23996 /* Match registers to be popped with registers into which we pop them. */
23997 for (available = regs_available_for_popping,
23998 required = regs_to_pop;
23999 required != 0 && available != 0;
24000 available &= ~(available & - available),
24001 required &= ~(required & - required))
24002 -- pops_needed;
24004 /* If we have any popping registers left over, remove them. */
24005 if (available > 0)
24006 regs_available_for_popping &= ~available;
24008 /* Otherwise if we need another popping register we can use
24009 the fourth argument register. */
24010 else if (pops_needed)
24012 /* If we have not found any free argument registers and
24013 reg a4 contains the return address, we must move it. */
24014 if (regs_available_for_popping == 0
24015 && reg_containing_return_addr == LAST_ARG_REGNUM)
24017 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24018 reg_containing_return_addr = LR_REGNUM;
24020 else if (size > 12)
24022 /* Register a4 is being used to hold part of the return value,
24023 but we have dire need of a free, low register. */
24024 restore_a4 = TRUE;
24026 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
24029 if (reg_containing_return_addr != LAST_ARG_REGNUM)
24031 /* The fourth argument register is available. */
24032 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
24034 --pops_needed;
24038 /* Pop as many registers as we can. */
24039 thumb_pop (f, regs_available_for_popping);
24041 /* Process the registers we popped. */
24042 if (reg_containing_return_addr == -1)
24044 /* The return address was popped into the lowest numbered register. */
24045 regs_to_pop &= ~(1 << LR_REGNUM);
24047 reg_containing_return_addr =
24048 number_of_first_bit_set (regs_available_for_popping);
24050 /* Remove this register for the mask of available registers, so that
24051 the return address will not be corrupted by further pops. */
24052 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
24055 /* If we popped other registers then handle them here. */
24056 if (regs_available_for_popping)
24058 int frame_pointer;
24060 /* Work out which register currently contains the frame pointer. */
24061 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
24063 /* Move it into the correct place. */
24064 asm_fprintf (f, "\tmov\t%r, %r\n",
24065 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
24067 /* (Temporarily) remove it from the mask of popped registers. */
24068 regs_available_for_popping &= ~(1 << frame_pointer);
24069 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
24071 if (regs_available_for_popping)
24073 int stack_pointer;
24075 /* We popped the stack pointer as well,
24076 find the register that contains it. */
24077 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
24079 /* Move it into the stack register. */
24080 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
24082 /* At this point we have popped all necessary registers, so
24083 do not worry about restoring regs_available_for_popping
24084 to its correct value:
24086 assert (pops_needed == 0)
24087 assert (regs_available_for_popping == (1 << frame_pointer))
24088 assert (regs_to_pop == (1 << STACK_POINTER)) */
24090 else
24092 /* Since we have just move the popped value into the frame
24093 pointer, the popping register is available for reuse, and
24094 we know that we still have the stack pointer left to pop. */
24095 regs_available_for_popping |= (1 << frame_pointer);
24099 /* If we still have registers left on the stack, but we no longer have
24100 any registers into which we can pop them, then we must move the return
24101 address into the link register and make available the register that
24102 contained it. */
24103 if (regs_available_for_popping == 0 && pops_needed > 0)
24105 regs_available_for_popping |= 1 << reg_containing_return_addr;
24107 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
24108 reg_containing_return_addr);
24110 reg_containing_return_addr = LR_REGNUM;
24113 /* If we have registers left on the stack then pop some more.
24114 We know that at most we will want to pop FP and SP. */
24115 if (pops_needed > 0)
24117 int popped_into;
24118 int move_to;
24120 thumb_pop (f, regs_available_for_popping);
24122 /* We have popped either FP or SP.
24123 Move whichever one it is into the correct register. */
24124 popped_into = number_of_first_bit_set (regs_available_for_popping);
24125 move_to = number_of_first_bit_set (regs_to_pop);
24127 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
24128 --pops_needed;
24131 /* If we still have not popped everything then we must have only
24132 had one register available to us and we are now popping the SP. */
24133 if (pops_needed > 0)
24135 int popped_into;
24137 thumb_pop (f, regs_available_for_popping);
24139 popped_into = number_of_first_bit_set (regs_available_for_popping);
24141 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
24143 assert (regs_to_pop == (1 << STACK_POINTER))
24144 assert (pops_needed == 1)
24148 /* If necessary restore the a4 register. */
24149 if (restore_a4)
24151 if (reg_containing_return_addr != LR_REGNUM)
24153 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24154 reg_containing_return_addr = LR_REGNUM;
24157 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
24160 if (crtl->calls_eh_return)
24161 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
24163 /* Return to caller. */
24164 if (IS_CMSE_ENTRY (arm_current_func_type ()))
24166 /* This is for the cases where LR is not being used to contain the return
24167 address. It may therefore contain information that we might not want
24168 to leak, hence it must be cleared. The value in R0 will never be a
24169 secret at this point, so it is safe to use it, see the clearing code
24170 in 'cmse_nonsecure_entry_clear_before_return'. */
24171 if (reg_containing_return_addr != LR_REGNUM)
24172 asm_fprintf (f, "\tmov\tlr, r0\n");
24174 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr);
24175 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
24177 else
24178 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
24181 /* Scan INSN just before assembler is output for it.
24182 For Thumb-1, we track the status of the condition codes; this
24183 information is used in the cbranchsi4_insn pattern. */
24184 void
24185 thumb1_final_prescan_insn (rtx_insn *insn)
24187 if (flag_print_asm_name)
24188 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
24189 INSN_ADDRESSES (INSN_UID (insn)));
24190 /* Don't overwrite the previous setter when we get to a cbranch. */
24191 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
24193 enum attr_conds conds;
24195 if (cfun->machine->thumb1_cc_insn)
24197 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
24198 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
24199 CC_STATUS_INIT;
24201 conds = get_attr_conds (insn);
24202 if (conds == CONDS_SET)
24204 rtx set = single_set (insn);
24205 cfun->machine->thumb1_cc_insn = insn;
24206 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
24207 cfun->machine->thumb1_cc_op1 = const0_rtx;
24208 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
24209 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
24211 rtx src1 = XEXP (SET_SRC (set), 1);
24212 if (src1 == const0_rtx)
24213 cfun->machine->thumb1_cc_mode = CCmode;
24215 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
24217 /* Record the src register operand instead of dest because
24218 cprop_hardreg pass propagates src. */
24219 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
24222 else if (conds != CONDS_NOCOND)
24223 cfun->machine->thumb1_cc_insn = NULL_RTX;
24226 /* Check if unexpected far jump is used. */
24227 if (cfun->machine->lr_save_eliminated
24228 && get_attr_far_jump (insn) == FAR_JUMP_YES)
24229 internal_error("Unexpected thumb1 far jump");
24233 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
24235 unsigned HOST_WIDE_INT mask = 0xff;
24236 int i;
24238 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
24239 if (val == 0) /* XXX */
24240 return 0;
24242 for (i = 0; i < 25; i++)
24243 if ((val & (mask << i)) == val)
24244 return 1;
24246 return 0;
24249 /* Returns nonzero if the current function contains,
24250 or might contain a far jump. */
24251 static int
24252 thumb_far_jump_used_p (void)
24254 rtx_insn *insn;
24255 bool far_jump = false;
24256 unsigned int func_size = 0;
24258 /* If we have already decided that far jumps may be used,
24259 do not bother checking again, and always return true even if
24260 it turns out that they are not being used. Once we have made
24261 the decision that far jumps are present (and that hence the link
24262 register will be pushed onto the stack) we cannot go back on it. */
24263 if (cfun->machine->far_jump_used)
24264 return 1;
24266 /* If this function is not being called from the prologue/epilogue
24267 generation code then it must be being called from the
24268 INITIAL_ELIMINATION_OFFSET macro. */
24269 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
24271 /* In this case we know that we are being asked about the elimination
24272 of the arg pointer register. If that register is not being used,
24273 then there are no arguments on the stack, and we do not have to
24274 worry that a far jump might force the prologue to push the link
24275 register, changing the stack offsets. In this case we can just
24276 return false, since the presence of far jumps in the function will
24277 not affect stack offsets.
24279 If the arg pointer is live (or if it was live, but has now been
24280 eliminated and so set to dead) then we do have to test to see if
24281 the function might contain a far jump. This test can lead to some
24282 false negatives, since before reload is completed, then length of
24283 branch instructions is not known, so gcc defaults to returning their
24284 longest length, which in turn sets the far jump attribute to true.
24286 A false negative will not result in bad code being generated, but it
24287 will result in a needless push and pop of the link register. We
24288 hope that this does not occur too often.
24290 If we need doubleword stack alignment this could affect the other
24291 elimination offsets so we can't risk getting it wrong. */
24292 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
24293 cfun->machine->arg_pointer_live = 1;
24294 else if (!cfun->machine->arg_pointer_live)
24295 return 0;
24298 /* We should not change far_jump_used during or after reload, as there is
24299 no chance to change stack frame layout. */
24300 if (reload_in_progress || reload_completed)
24301 return 0;
24303 /* Check to see if the function contains a branch
24304 insn with the far jump attribute set. */
24305 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
24307 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
24309 far_jump = true;
24311 func_size += get_attr_length (insn);
24314 /* Attribute far_jump will always be true for thumb1 before
24315 shorten_branch pass. So checking far_jump attribute before
24316 shorten_branch isn't much useful.
24318 Following heuristic tries to estimate more accurately if a far jump
24319 may finally be used. The heuristic is very conservative as there is
24320 no chance to roll-back the decision of not to use far jump.
24322 Thumb1 long branch offset is -2048 to 2046. The worst case is each
24323 2-byte insn is associated with a 4 byte constant pool. Using
24324 function size 2048/3 as the threshold is conservative enough. */
24325 if (far_jump)
24327 if ((func_size * 3) >= 2048)
24329 /* Record the fact that we have decided that
24330 the function does use far jumps. */
24331 cfun->machine->far_jump_used = 1;
24332 return 1;
24336 return 0;
24339 /* Return nonzero if FUNC must be entered in ARM mode. */
24340 static bool
24341 is_called_in_ARM_mode (tree func)
24343 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
24345 /* Ignore the problem about functions whose address is taken. */
24346 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
24347 return true;
24349 #ifdef ARM_PE
24350 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
24351 #else
24352 return false;
24353 #endif
24356 /* Given the stack offsets and register mask in OFFSETS, decide how
24357 many additional registers to push instead of subtracting a constant
24358 from SP. For epilogues the principle is the same except we use pop.
24359 FOR_PROLOGUE indicates which we're generating. */
24360 static int
24361 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
24363 HOST_WIDE_INT amount;
24364 unsigned long live_regs_mask = offsets->saved_regs_mask;
24365 /* Extract a mask of the ones we can give to the Thumb's push/pop
24366 instruction. */
24367 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
24368 /* Then count how many other high registers will need to be pushed. */
24369 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24370 int n_free, reg_base, size;
24372 if (!for_prologue && frame_pointer_needed)
24373 amount = offsets->locals_base - offsets->saved_regs;
24374 else
24375 amount = offsets->outgoing_args - offsets->saved_regs;
24377 /* If the stack frame size is 512 exactly, we can save one load
24378 instruction, which should make this a win even when optimizing
24379 for speed. */
24380 if (!optimize_size && amount != 512)
24381 return 0;
24383 /* Can't do this if there are high registers to push. */
24384 if (high_regs_pushed != 0)
24385 return 0;
24387 /* Shouldn't do it in the prologue if no registers would normally
24388 be pushed at all. In the epilogue, also allow it if we'll have
24389 a pop insn for the PC. */
24390 if (l_mask == 0
24391 && (for_prologue
24392 || TARGET_BACKTRACE
24393 || (live_regs_mask & 1 << LR_REGNUM) == 0
24394 || TARGET_INTERWORK
24395 || crtl->args.pretend_args_size != 0))
24396 return 0;
24398 /* Don't do this if thumb_expand_prologue wants to emit instructions
24399 between the push and the stack frame allocation. */
24400 if (for_prologue
24401 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
24402 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
24403 return 0;
24405 reg_base = 0;
24406 n_free = 0;
24407 if (!for_prologue)
24409 size = arm_size_return_regs ();
24410 reg_base = ARM_NUM_INTS (size);
24411 live_regs_mask >>= reg_base;
24414 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
24415 && (for_prologue || call_used_regs[reg_base + n_free]))
24417 live_regs_mask >>= 1;
24418 n_free++;
24421 if (n_free == 0)
24422 return 0;
24423 gcc_assert (amount / 4 * 4 == amount);
24425 if (amount >= 512 && (amount - n_free * 4) < 512)
24426 return (amount - 508) / 4;
24427 if (amount <= n_free * 4)
24428 return amount / 4;
24429 return 0;
24432 /* The bits which aren't usefully expanded as rtl. */
24433 const char *
24434 thumb1_unexpanded_epilogue (void)
24436 arm_stack_offsets *offsets;
24437 int regno;
24438 unsigned long live_regs_mask = 0;
24439 int high_regs_pushed = 0;
24440 int extra_pop;
24441 int had_to_push_lr;
24442 int size;
24444 if (cfun->machine->return_used_this_function != 0)
24445 return "";
24447 if (IS_NAKED (arm_current_func_type ()))
24448 return "";
24450 offsets = arm_get_frame_offsets ();
24451 live_regs_mask = offsets->saved_regs_mask;
24452 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24454 /* If we can deduce the registers used from the function's return value.
24455 This is more reliable that examining df_regs_ever_live_p () because that
24456 will be set if the register is ever used in the function, not just if
24457 the register is used to hold a return value. */
24458 size = arm_size_return_regs ();
24460 extra_pop = thumb1_extra_regs_pushed (offsets, false);
24461 if (extra_pop > 0)
24463 unsigned long extra_mask = (1 << extra_pop) - 1;
24464 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
24467 /* The prolog may have pushed some high registers to use as
24468 work registers. e.g. the testsuite file:
24469 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24470 compiles to produce:
24471 push {r4, r5, r6, r7, lr}
24472 mov r7, r9
24473 mov r6, r8
24474 push {r6, r7}
24475 as part of the prolog. We have to undo that pushing here. */
24477 if (high_regs_pushed)
24479 unsigned long mask = live_regs_mask & 0xff;
24480 int next_hi_reg;
24482 /* The available low registers depend on the size of the value we are
24483 returning. */
24484 if (size <= 12)
24485 mask |= 1 << 3;
24486 if (size <= 8)
24487 mask |= 1 << 2;
24489 if (mask == 0)
24490 /* Oh dear! We have no low registers into which we can pop
24491 high registers! */
24492 internal_error
24493 ("no low registers available for popping high registers");
24495 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
24496 if (live_regs_mask & (1 << next_hi_reg))
24497 break;
24499 while (high_regs_pushed)
24501 /* Find lo register(s) into which the high register(s) can
24502 be popped. */
24503 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24505 if (mask & (1 << regno))
24506 high_regs_pushed--;
24507 if (high_regs_pushed == 0)
24508 break;
24511 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
24513 /* Pop the values into the low register(s). */
24514 thumb_pop (asm_out_file, mask);
24516 /* Move the value(s) into the high registers. */
24517 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24519 if (mask & (1 << regno))
24521 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
24522 regno);
24524 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
24525 if (live_regs_mask & (1 << next_hi_reg))
24526 break;
24530 live_regs_mask &= ~0x0f00;
24533 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
24534 live_regs_mask &= 0xff;
24536 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
24538 /* Pop the return address into the PC. */
24539 if (had_to_push_lr)
24540 live_regs_mask |= 1 << PC_REGNUM;
24542 /* Either no argument registers were pushed or a backtrace
24543 structure was created which includes an adjusted stack
24544 pointer, so just pop everything. */
24545 if (live_regs_mask)
24546 thumb_pop (asm_out_file, live_regs_mask);
24548 /* We have either just popped the return address into the
24549 PC or it is was kept in LR for the entire function.
24550 Note that thumb_pop has already called thumb_exit if the
24551 PC was in the list. */
24552 if (!had_to_push_lr)
24553 thumb_exit (asm_out_file, LR_REGNUM);
24555 else
24557 /* Pop everything but the return address. */
24558 if (live_regs_mask)
24559 thumb_pop (asm_out_file, live_regs_mask);
24561 if (had_to_push_lr)
24563 if (size > 12)
24565 /* We have no free low regs, so save one. */
24566 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
24567 LAST_ARG_REGNUM);
24570 /* Get the return address into a temporary register. */
24571 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
24573 if (size > 12)
24575 /* Move the return address to lr. */
24576 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
24577 LAST_ARG_REGNUM);
24578 /* Restore the low register. */
24579 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
24580 IP_REGNUM);
24581 regno = LR_REGNUM;
24583 else
24584 regno = LAST_ARG_REGNUM;
24586 else
24587 regno = LR_REGNUM;
24589 /* Remove the argument registers that were pushed onto the stack. */
24590 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
24591 SP_REGNUM, SP_REGNUM,
24592 crtl->args.pretend_args_size);
24594 thumb_exit (asm_out_file, regno);
24597 return "";
24600 /* Functions to save and restore machine-specific function data. */
24601 static struct machine_function *
24602 arm_init_machine_status (void)
24604 struct machine_function *machine;
24605 machine = ggc_cleared_alloc<machine_function> ();
24607 #if ARM_FT_UNKNOWN != 0
24608 machine->func_type = ARM_FT_UNKNOWN;
24609 #endif
24610 return machine;
24613 /* Return an RTX indicating where the return address to the
24614 calling function can be found. */
24616 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
24618 if (count != 0)
24619 return NULL_RTX;
24621 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
24624 /* Do anything needed before RTL is emitted for each function. */
24625 void
24626 arm_init_expanders (void)
24628 /* Arrange to initialize and mark the machine per-function status. */
24629 init_machine_status = arm_init_machine_status;
24631 /* This is to stop the combine pass optimizing away the alignment
24632 adjustment of va_arg. */
24633 /* ??? It is claimed that this should not be necessary. */
24634 if (cfun)
24635 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
24638 /* Check that FUNC is called with a different mode. */
24640 bool
24641 arm_change_mode_p (tree func)
24643 if (TREE_CODE (func) != FUNCTION_DECL)
24644 return false;
24646 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
24648 if (!callee_tree)
24649 callee_tree = target_option_default_node;
24651 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
24652 int flags = callee_opts->x_target_flags;
24654 return (TARGET_THUMB_P (flags) != TARGET_THUMB);
24657 /* Like arm_compute_initial_elimination offset. Simpler because there
24658 isn't an ABI specified frame pointer for Thumb. Instead, we set it
24659 to point at the base of the local variables after static stack
24660 space for a function has been allocated. */
24662 HOST_WIDE_INT
24663 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
24665 arm_stack_offsets *offsets;
24667 offsets = arm_get_frame_offsets ();
24669 switch (from)
24671 case ARG_POINTER_REGNUM:
24672 switch (to)
24674 case STACK_POINTER_REGNUM:
24675 return offsets->outgoing_args - offsets->saved_args;
24677 case FRAME_POINTER_REGNUM:
24678 return offsets->soft_frame - offsets->saved_args;
24680 case ARM_HARD_FRAME_POINTER_REGNUM:
24681 return offsets->saved_regs - offsets->saved_args;
24683 case THUMB_HARD_FRAME_POINTER_REGNUM:
24684 return offsets->locals_base - offsets->saved_args;
24686 default:
24687 gcc_unreachable ();
24689 break;
24691 case FRAME_POINTER_REGNUM:
24692 switch (to)
24694 case STACK_POINTER_REGNUM:
24695 return offsets->outgoing_args - offsets->soft_frame;
24697 case ARM_HARD_FRAME_POINTER_REGNUM:
24698 return offsets->saved_regs - offsets->soft_frame;
24700 case THUMB_HARD_FRAME_POINTER_REGNUM:
24701 return offsets->locals_base - offsets->soft_frame;
24703 default:
24704 gcc_unreachable ();
24706 break;
24708 default:
24709 gcc_unreachable ();
24713 /* Generate the function's prologue. */
24715 void
24716 thumb1_expand_prologue (void)
24718 rtx_insn *insn;
24720 HOST_WIDE_INT amount;
24721 HOST_WIDE_INT size;
24722 arm_stack_offsets *offsets;
24723 unsigned long func_type;
24724 int regno;
24725 unsigned long live_regs_mask;
24726 unsigned long l_mask;
24727 unsigned high_regs_pushed = 0;
24728 bool lr_needs_saving;
24730 func_type = arm_current_func_type ();
24732 /* Naked functions don't have prologues. */
24733 if (IS_NAKED (func_type))
24735 if (flag_stack_usage_info)
24736 current_function_static_stack_size = 0;
24737 return;
24740 if (IS_INTERRUPT (func_type))
24742 error ("interrupt Service Routines cannot be coded in Thumb mode");
24743 return;
24746 if (is_called_in_ARM_mode (current_function_decl))
24747 emit_insn (gen_prologue_thumb1_interwork ());
24749 offsets = arm_get_frame_offsets ();
24750 live_regs_mask = offsets->saved_regs_mask;
24751 lr_needs_saving = live_regs_mask & (1 << LR_REGNUM);
24753 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
24754 l_mask = live_regs_mask & 0x40ff;
24755 /* Then count how many other high registers will need to be pushed. */
24756 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24758 if (crtl->args.pretend_args_size)
24760 rtx x = GEN_INT (-crtl->args.pretend_args_size);
24762 if (cfun->machine->uses_anonymous_args)
24764 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
24765 unsigned long mask;
24767 mask = 1ul << (LAST_ARG_REGNUM + 1);
24768 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
24770 insn = thumb1_emit_multi_reg_push (mask, 0);
24772 else
24774 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24775 stack_pointer_rtx, x));
24777 RTX_FRAME_RELATED_P (insn) = 1;
24780 if (TARGET_BACKTRACE)
24782 HOST_WIDE_INT offset = 0;
24783 unsigned work_register;
24784 rtx work_reg, x, arm_hfp_rtx;
24786 /* We have been asked to create a stack backtrace structure.
24787 The code looks like this:
24789 0 .align 2
24790 0 func:
24791 0 sub SP, #16 Reserve space for 4 registers.
24792 2 push {R7} Push low registers.
24793 4 add R7, SP, #20 Get the stack pointer before the push.
24794 6 str R7, [SP, #8] Store the stack pointer
24795 (before reserving the space).
24796 8 mov R7, PC Get hold of the start of this code + 12.
24797 10 str R7, [SP, #16] Store it.
24798 12 mov R7, FP Get hold of the current frame pointer.
24799 14 str R7, [SP, #4] Store it.
24800 16 mov R7, LR Get hold of the current return address.
24801 18 str R7, [SP, #12] Store it.
24802 20 add R7, SP, #16 Point at the start of the
24803 backtrace structure.
24804 22 mov FP, R7 Put this value into the frame pointer. */
24806 work_register = thumb_find_work_register (live_regs_mask);
24807 work_reg = gen_rtx_REG (SImode, work_register);
24808 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
24810 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24811 stack_pointer_rtx, GEN_INT (-16)));
24812 RTX_FRAME_RELATED_P (insn) = 1;
24814 if (l_mask)
24816 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
24817 RTX_FRAME_RELATED_P (insn) = 1;
24818 lr_needs_saving = false;
24820 offset = bit_count (l_mask) * UNITS_PER_WORD;
24823 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
24824 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24826 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
24827 x = gen_frame_mem (SImode, x);
24828 emit_move_insn (x, work_reg);
24830 /* Make sure that the instruction fetching the PC is in the right place
24831 to calculate "start of backtrace creation code + 12". */
24832 /* ??? The stores using the common WORK_REG ought to be enough to
24833 prevent the scheduler from doing anything weird. Failing that
24834 we could always move all of the following into an UNSPEC_VOLATILE. */
24835 if (l_mask)
24837 x = gen_rtx_REG (SImode, PC_REGNUM);
24838 emit_move_insn (work_reg, x);
24840 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24841 x = gen_frame_mem (SImode, x);
24842 emit_move_insn (x, work_reg);
24844 emit_move_insn (work_reg, arm_hfp_rtx);
24846 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24847 x = gen_frame_mem (SImode, x);
24848 emit_move_insn (x, work_reg);
24850 else
24852 emit_move_insn (work_reg, arm_hfp_rtx);
24854 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24855 x = gen_frame_mem (SImode, x);
24856 emit_move_insn (x, work_reg);
24858 x = gen_rtx_REG (SImode, PC_REGNUM);
24859 emit_move_insn (work_reg, x);
24861 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24862 x = gen_frame_mem (SImode, x);
24863 emit_move_insn (x, work_reg);
24866 x = gen_rtx_REG (SImode, LR_REGNUM);
24867 emit_move_insn (work_reg, x);
24869 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
24870 x = gen_frame_mem (SImode, x);
24871 emit_move_insn (x, work_reg);
24873 x = GEN_INT (offset + 12);
24874 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24876 emit_move_insn (arm_hfp_rtx, work_reg);
24878 /* Optimization: If we are not pushing any low registers but we are going
24879 to push some high registers then delay our first push. This will just
24880 be a push of LR and we can combine it with the push of the first high
24881 register. */
24882 else if ((l_mask & 0xff) != 0
24883 || (high_regs_pushed == 0 && lr_needs_saving))
24885 unsigned long mask = l_mask;
24886 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
24887 insn = thumb1_emit_multi_reg_push (mask, mask);
24888 RTX_FRAME_RELATED_P (insn) = 1;
24889 lr_needs_saving = false;
24892 if (high_regs_pushed)
24894 unsigned pushable_regs;
24895 unsigned next_hi_reg;
24896 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
24897 : crtl->args.info.nregs;
24898 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
24900 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
24901 if (live_regs_mask & (1 << next_hi_reg))
24902 break;
24904 /* Here we need to mask out registers used for passing arguments
24905 even if they can be pushed. This is to avoid using them to stash the high
24906 registers. Such kind of stash may clobber the use of arguments. */
24907 pushable_regs = l_mask & (~arg_regs_mask);
24908 if (lr_needs_saving)
24909 pushable_regs &= ~(1 << LR_REGNUM);
24911 if (pushable_regs == 0)
24912 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
24914 while (high_regs_pushed > 0)
24916 unsigned long real_regs_mask = 0;
24917 unsigned long push_mask = 0;
24919 for (regno = LR_REGNUM; regno >= 0; regno --)
24921 if (pushable_regs & (1 << regno))
24923 emit_move_insn (gen_rtx_REG (SImode, regno),
24924 gen_rtx_REG (SImode, next_hi_reg));
24926 high_regs_pushed --;
24927 real_regs_mask |= (1 << next_hi_reg);
24928 push_mask |= (1 << regno);
24930 if (high_regs_pushed)
24932 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
24933 next_hi_reg --)
24934 if (live_regs_mask & (1 << next_hi_reg))
24935 break;
24937 else
24938 break;
24942 /* If we had to find a work register and we have not yet
24943 saved the LR then add it to the list of regs to push. */
24944 if (lr_needs_saving)
24946 push_mask |= 1 << LR_REGNUM;
24947 real_regs_mask |= 1 << LR_REGNUM;
24948 lr_needs_saving = false;
24951 insn = thumb1_emit_multi_reg_push (push_mask, real_regs_mask);
24952 RTX_FRAME_RELATED_P (insn) = 1;
24956 /* Load the pic register before setting the frame pointer,
24957 so we can use r7 as a temporary work register. */
24958 if (flag_pic && arm_pic_register != INVALID_REGNUM)
24959 arm_load_pic_register (live_regs_mask);
24961 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
24962 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
24963 stack_pointer_rtx);
24965 size = offsets->outgoing_args - offsets->saved_args;
24966 if (flag_stack_usage_info)
24967 current_function_static_stack_size = size;
24969 /* If we have a frame, then do stack checking. FIXME: not implemented. */
24970 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK && size)
24971 sorry ("-fstack-check=specific for Thumb-1");
24973 amount = offsets->outgoing_args - offsets->saved_regs;
24974 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
24975 if (amount)
24977 if (amount < 512)
24979 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
24980 GEN_INT (- amount)));
24981 RTX_FRAME_RELATED_P (insn) = 1;
24983 else
24985 rtx reg, dwarf;
24987 /* The stack decrement is too big for an immediate value in a single
24988 insn. In theory we could issue multiple subtracts, but after
24989 three of them it becomes more space efficient to place the full
24990 value in the constant pool and load into a register. (Also the
24991 ARM debugger really likes to see only one stack decrement per
24992 function). So instead we look for a scratch register into which
24993 we can load the decrement, and then we subtract this from the
24994 stack pointer. Unfortunately on the thumb the only available
24995 scratch registers are the argument registers, and we cannot use
24996 these as they may hold arguments to the function. Instead we
24997 attempt to locate a call preserved register which is used by this
24998 function. If we can find one, then we know that it will have
24999 been pushed at the start of the prologue and so we can corrupt
25000 it now. */
25001 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
25002 if (live_regs_mask & (1 << regno))
25003 break;
25005 gcc_assert(regno <= LAST_LO_REGNUM);
25007 reg = gen_rtx_REG (SImode, regno);
25009 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
25011 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25012 stack_pointer_rtx, reg));
25014 dwarf = gen_rtx_SET (stack_pointer_rtx,
25015 plus_constant (Pmode, stack_pointer_rtx,
25016 -amount));
25017 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
25018 RTX_FRAME_RELATED_P (insn) = 1;
25022 if (frame_pointer_needed)
25023 thumb_set_frame_pointer (offsets);
25025 /* If we are profiling, make sure no instructions are scheduled before
25026 the call to mcount. Similarly if the user has requested no
25027 scheduling in the prolog. Similarly if we want non-call exceptions
25028 using the EABI unwinder, to prevent faulting instructions from being
25029 swapped with a stack adjustment. */
25030 if (crtl->profile || !TARGET_SCHED_PROLOG
25031 || (arm_except_unwind_info (&global_options) == UI_TARGET
25032 && cfun->can_throw_non_call_exceptions))
25033 emit_insn (gen_blockage ());
25035 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
25036 if (live_regs_mask & 0xff)
25037 cfun->machine->lr_save_eliminated = 0;
25040 /* Clear caller saved registers not used to pass return values and leaked
25041 condition flags before exiting a cmse_nonsecure_entry function. */
25043 void
25044 cmse_nonsecure_entry_clear_before_return (void)
25046 uint64_t to_clear_mask[2];
25047 uint32_t padding_bits_to_clear = 0;
25048 uint32_t * padding_bits_to_clear_ptr = &padding_bits_to_clear;
25049 int regno, maxregno = IP_REGNUM;
25050 tree result_type;
25051 rtx result_rtl;
25053 to_clear_mask[0] = (1ULL << (NUM_ARG_REGS)) - 1;
25054 to_clear_mask[0] |= (1ULL << IP_REGNUM);
25056 /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
25057 registers. We also check that TARGET_HARD_FLOAT and !TARGET_THUMB1 hold
25058 to make sure the instructions used to clear them are present. */
25059 if (TARGET_HARD_FLOAT && !TARGET_THUMB1)
25061 uint64_t float_mask = (1ULL << (D7_VFP_REGNUM + 1)) - 1;
25062 maxregno = LAST_VFP_REGNUM;
25064 float_mask &= ~((1ULL << FIRST_VFP_REGNUM) - 1);
25065 to_clear_mask[0] |= float_mask;
25067 float_mask = (1ULL << (maxregno - 63)) - 1;
25068 to_clear_mask[1] = float_mask;
25070 /* Make sure we don't clear the two scratch registers used to clear the
25071 relevant FPSCR bits in output_return_instruction. */
25072 emit_use (gen_rtx_REG (SImode, IP_REGNUM));
25073 to_clear_mask[0] &= ~(1ULL << IP_REGNUM);
25074 emit_use (gen_rtx_REG (SImode, 4));
25075 to_clear_mask[0] &= ~(1ULL << 4);
25078 /* If the user has defined registers to be caller saved, these are no longer
25079 restored by the function before returning and must thus be cleared for
25080 security purposes. */
25081 for (regno = NUM_ARG_REGS; regno < LAST_VFP_REGNUM; regno++)
25083 /* We do not touch registers that can be used to pass arguments as per
25084 the AAPCS, since these should never be made callee-saved by user
25085 options. */
25086 if (IN_RANGE (regno, FIRST_VFP_REGNUM, D7_VFP_REGNUM))
25087 continue;
25088 if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM))
25089 continue;
25090 if (call_used_regs[regno])
25091 to_clear_mask[regno / 64] |= (1ULL << (regno % 64));
25094 /* Make sure we do not clear the registers used to return the result in. */
25095 result_type = TREE_TYPE (DECL_RESULT (current_function_decl));
25096 if (!VOID_TYPE_P (result_type))
25098 result_rtl = arm_function_value (result_type, current_function_decl, 0);
25100 /* No need to check that we return in registers, because we don't
25101 support returning on stack yet. */
25102 to_clear_mask[0]
25103 &= ~compute_not_to_clear_mask (result_type, result_rtl, 0,
25104 padding_bits_to_clear_ptr);
25107 if (padding_bits_to_clear != 0)
25109 rtx reg_rtx;
25110 /* Padding bits to clear is not 0 so we know we are dealing with
25111 returning a composite type, which only uses r0. Let's make sure that
25112 r1-r3 is cleared too, we will use r1 as a scratch register. */
25113 gcc_assert ((to_clear_mask[0] & 0xe) == 0xe);
25115 reg_rtx = gen_rtx_REG (SImode, R1_REGNUM);
25117 /* Fill the lower half of the negated padding_bits_to_clear. */
25118 emit_move_insn (reg_rtx,
25119 GEN_INT ((((~padding_bits_to_clear) << 16u) >> 16u)));
25121 /* Also fill the top half of the negated padding_bits_to_clear. */
25122 if (((~padding_bits_to_clear) >> 16) > 0)
25123 emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode, reg_rtx,
25124 GEN_INT (16),
25125 GEN_INT (16)),
25126 GEN_INT ((~padding_bits_to_clear) >> 16)));
25128 emit_insn (gen_andsi3 (gen_rtx_REG (SImode, R0_REGNUM),
25129 gen_rtx_REG (SImode, R0_REGNUM),
25130 reg_rtx));
25133 for (regno = R0_REGNUM; regno <= maxregno; regno++)
25135 if (!(to_clear_mask[regno / 64] & (1ULL << (regno % 64))))
25136 continue;
25138 if (IS_VFP_REGNUM (regno))
25140 /* If regno is an even vfp register and its successor is also to
25141 be cleared, use vmov. */
25142 if (TARGET_VFP_DOUBLE
25143 && VFP_REGNO_OK_FOR_DOUBLE (regno)
25144 && to_clear_mask[regno / 64] & (1ULL << ((regno % 64) + 1)))
25146 emit_move_insn (gen_rtx_REG (DFmode, regno),
25147 CONST1_RTX (DFmode));
25148 emit_use (gen_rtx_REG (DFmode, regno));
25149 regno++;
25151 else
25153 emit_move_insn (gen_rtx_REG (SFmode, regno),
25154 CONST1_RTX (SFmode));
25155 emit_use (gen_rtx_REG (SFmode, regno));
25158 else
25160 if (TARGET_THUMB1)
25162 if (regno == R0_REGNUM)
25163 emit_move_insn (gen_rtx_REG (SImode, regno),
25164 const0_rtx);
25165 else
25166 /* R0 has either been cleared before, see code above, or it
25167 holds a return value, either way it is not secret
25168 information. */
25169 emit_move_insn (gen_rtx_REG (SImode, regno),
25170 gen_rtx_REG (SImode, R0_REGNUM));
25171 emit_use (gen_rtx_REG (SImode, regno));
25173 else
25175 emit_move_insn (gen_rtx_REG (SImode, regno),
25176 gen_rtx_REG (SImode, LR_REGNUM));
25177 emit_use (gen_rtx_REG (SImode, regno));
25183 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
25184 POP instruction can be generated. LR should be replaced by PC. All
25185 the checks required are already done by USE_RETURN_INSN (). Hence,
25186 all we really need to check here is if single register is to be
25187 returned, or multiple register return. */
25188 void
25189 thumb2_expand_return (bool simple_return)
25191 int i, num_regs;
25192 unsigned long saved_regs_mask;
25193 arm_stack_offsets *offsets;
25195 offsets = arm_get_frame_offsets ();
25196 saved_regs_mask = offsets->saved_regs_mask;
25198 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
25199 if (saved_regs_mask & (1 << i))
25200 num_regs++;
25202 if (!simple_return && saved_regs_mask)
25204 /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
25205 functions or adapt code to handle according to ACLE. This path should
25206 not be reachable for cmse_nonsecure_entry functions though we prefer
25207 to assert it for now to ensure that future code changes do not silently
25208 change this behavior. */
25209 gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
25210 if (num_regs == 1)
25212 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25213 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
25214 rtx addr = gen_rtx_MEM (SImode,
25215 gen_rtx_POST_INC (SImode,
25216 stack_pointer_rtx));
25217 set_mem_alias_set (addr, get_frame_alias_set ());
25218 XVECEXP (par, 0, 0) = ret_rtx;
25219 XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
25220 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
25221 emit_jump_insn (par);
25223 else
25225 saved_regs_mask &= ~ (1 << LR_REGNUM);
25226 saved_regs_mask |= (1 << PC_REGNUM);
25227 arm_emit_multi_reg_pop (saved_regs_mask);
25230 else
25232 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25233 cmse_nonsecure_entry_clear_before_return ();
25234 emit_jump_insn (simple_return_rtx);
25238 void
25239 thumb1_expand_epilogue (void)
25241 HOST_WIDE_INT amount;
25242 arm_stack_offsets *offsets;
25243 int regno;
25245 /* Naked functions don't have prologues. */
25246 if (IS_NAKED (arm_current_func_type ()))
25247 return;
25249 offsets = arm_get_frame_offsets ();
25250 amount = offsets->outgoing_args - offsets->saved_regs;
25252 if (frame_pointer_needed)
25254 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
25255 amount = offsets->locals_base - offsets->saved_regs;
25257 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
25259 gcc_assert (amount >= 0);
25260 if (amount)
25262 emit_insn (gen_blockage ());
25264 if (amount < 512)
25265 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25266 GEN_INT (amount)));
25267 else
25269 /* r3 is always free in the epilogue. */
25270 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
25272 emit_insn (gen_movsi (reg, GEN_INT (amount)));
25273 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
25277 /* Emit a USE (stack_pointer_rtx), so that
25278 the stack adjustment will not be deleted. */
25279 emit_insn (gen_force_register_use (stack_pointer_rtx));
25281 if (crtl->profile || !TARGET_SCHED_PROLOG)
25282 emit_insn (gen_blockage ());
25284 /* Emit a clobber for each insn that will be restored in the epilogue,
25285 so that flow2 will get register lifetimes correct. */
25286 for (regno = 0; regno < 13; regno++)
25287 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
25288 emit_clobber (gen_rtx_REG (SImode, regno));
25290 if (! df_regs_ever_live_p (LR_REGNUM))
25291 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
25293 /* Clear all caller-saved regs that are not used to return. */
25294 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25295 cmse_nonsecure_entry_clear_before_return ();
25298 /* Epilogue code for APCS frame. */
25299 static void
25300 arm_expand_epilogue_apcs_frame (bool really_return)
25302 unsigned long func_type;
25303 unsigned long saved_regs_mask;
25304 int num_regs = 0;
25305 int i;
25306 int floats_from_frame = 0;
25307 arm_stack_offsets *offsets;
25309 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
25310 func_type = arm_current_func_type ();
25312 /* Get frame offsets for ARM. */
25313 offsets = arm_get_frame_offsets ();
25314 saved_regs_mask = offsets->saved_regs_mask;
25316 /* Find the offset of the floating-point save area in the frame. */
25317 floats_from_frame
25318 = (offsets->saved_args
25319 + arm_compute_static_chain_stack_bytes ()
25320 - offsets->frame);
25322 /* Compute how many core registers saved and how far away the floats are. */
25323 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25324 if (saved_regs_mask & (1 << i))
25326 num_regs++;
25327 floats_from_frame += 4;
25330 if (TARGET_HARD_FLOAT)
25332 int start_reg;
25333 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
25335 /* The offset is from IP_REGNUM. */
25336 int saved_size = arm_get_vfp_saved_size ();
25337 if (saved_size > 0)
25339 rtx_insn *insn;
25340 floats_from_frame += saved_size;
25341 insn = emit_insn (gen_addsi3 (ip_rtx,
25342 hard_frame_pointer_rtx,
25343 GEN_INT (-floats_from_frame)));
25344 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
25345 ip_rtx, hard_frame_pointer_rtx);
25348 /* Generate VFP register multi-pop. */
25349 start_reg = FIRST_VFP_REGNUM;
25351 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
25352 /* Look for a case where a reg does not need restoring. */
25353 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25354 && (!df_regs_ever_live_p (i + 1)
25355 || call_used_regs[i + 1]))
25357 if (start_reg != i)
25358 arm_emit_vfp_multi_reg_pop (start_reg,
25359 (i - start_reg) / 2,
25360 gen_rtx_REG (SImode,
25361 IP_REGNUM));
25362 start_reg = i + 2;
25365 /* Restore the remaining regs that we have discovered (or possibly
25366 even all of them, if the conditional in the for loop never
25367 fired). */
25368 if (start_reg != i)
25369 arm_emit_vfp_multi_reg_pop (start_reg,
25370 (i - start_reg) / 2,
25371 gen_rtx_REG (SImode, IP_REGNUM));
25374 if (TARGET_IWMMXT)
25376 /* The frame pointer is guaranteed to be non-double-word aligned, as
25377 it is set to double-word-aligned old_stack_pointer - 4. */
25378 rtx_insn *insn;
25379 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
25381 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
25382 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25384 rtx addr = gen_frame_mem (V2SImode,
25385 plus_constant (Pmode, hard_frame_pointer_rtx,
25386 - lrm_count * 4));
25387 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25388 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25389 gen_rtx_REG (V2SImode, i),
25390 NULL_RTX);
25391 lrm_count += 2;
25395 /* saved_regs_mask should contain IP which contains old stack pointer
25396 at the time of activation creation. Since SP and IP are adjacent registers,
25397 we can restore the value directly into SP. */
25398 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
25399 saved_regs_mask &= ~(1 << IP_REGNUM);
25400 saved_regs_mask |= (1 << SP_REGNUM);
25402 /* There are two registers left in saved_regs_mask - LR and PC. We
25403 only need to restore LR (the return address), but to
25404 save time we can load it directly into PC, unless we need a
25405 special function exit sequence, or we are not really returning. */
25406 if (really_return
25407 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
25408 && !crtl->calls_eh_return)
25409 /* Delete LR from the register mask, so that LR on
25410 the stack is loaded into the PC in the register mask. */
25411 saved_regs_mask &= ~(1 << LR_REGNUM);
25412 else
25413 saved_regs_mask &= ~(1 << PC_REGNUM);
25415 num_regs = bit_count (saved_regs_mask);
25416 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
25418 rtx_insn *insn;
25419 emit_insn (gen_blockage ());
25420 /* Unwind the stack to just below the saved registers. */
25421 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25422 hard_frame_pointer_rtx,
25423 GEN_INT (- 4 * num_regs)));
25425 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
25426 stack_pointer_rtx, hard_frame_pointer_rtx);
25429 arm_emit_multi_reg_pop (saved_regs_mask);
25431 if (IS_INTERRUPT (func_type))
25433 /* Interrupt handlers will have pushed the
25434 IP onto the stack, so restore it now. */
25435 rtx_insn *insn;
25436 rtx addr = gen_rtx_MEM (SImode,
25437 gen_rtx_POST_INC (SImode,
25438 stack_pointer_rtx));
25439 set_mem_alias_set (addr, get_frame_alias_set ());
25440 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
25441 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25442 gen_rtx_REG (SImode, IP_REGNUM),
25443 NULL_RTX);
25446 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
25447 return;
25449 if (crtl->calls_eh_return)
25450 emit_insn (gen_addsi3 (stack_pointer_rtx,
25451 stack_pointer_rtx,
25452 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25454 if (IS_STACKALIGN (func_type))
25455 /* Restore the original stack pointer. Before prologue, the stack was
25456 realigned and the original stack pointer saved in r0. For details,
25457 see comment in arm_expand_prologue. */
25458 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25460 emit_jump_insn (simple_return_rtx);
25463 /* Generate RTL to represent ARM epilogue. Really_return is true if the
25464 function is not a sibcall. */
25465 void
25466 arm_expand_epilogue (bool really_return)
25468 unsigned long func_type;
25469 unsigned long saved_regs_mask;
25470 int num_regs = 0;
25471 int i;
25472 int amount;
25473 arm_stack_offsets *offsets;
25475 func_type = arm_current_func_type ();
25477 /* Naked functions don't have epilogue. Hence, generate return pattern, and
25478 let output_return_instruction take care of instruction emission if any. */
25479 if (IS_NAKED (func_type)
25480 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
25482 if (really_return)
25483 emit_jump_insn (simple_return_rtx);
25484 return;
25487 /* If we are throwing an exception, then we really must be doing a
25488 return, so we can't tail-call. */
25489 gcc_assert (!crtl->calls_eh_return || really_return);
25491 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
25493 arm_expand_epilogue_apcs_frame (really_return);
25494 return;
25497 /* Get frame offsets for ARM. */
25498 offsets = arm_get_frame_offsets ();
25499 saved_regs_mask = offsets->saved_regs_mask;
25500 num_regs = bit_count (saved_regs_mask);
25502 if (frame_pointer_needed)
25504 rtx_insn *insn;
25505 /* Restore stack pointer if necessary. */
25506 if (TARGET_ARM)
25508 /* In ARM mode, frame pointer points to first saved register.
25509 Restore stack pointer to last saved register. */
25510 amount = offsets->frame - offsets->saved_regs;
25512 /* Force out any pending memory operations that reference stacked data
25513 before stack de-allocation occurs. */
25514 emit_insn (gen_blockage ());
25515 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25516 hard_frame_pointer_rtx,
25517 GEN_INT (amount)));
25518 arm_add_cfa_adjust_cfa_note (insn, amount,
25519 stack_pointer_rtx,
25520 hard_frame_pointer_rtx);
25522 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25523 deleted. */
25524 emit_insn (gen_force_register_use (stack_pointer_rtx));
25526 else
25528 /* In Thumb-2 mode, the frame pointer points to the last saved
25529 register. */
25530 amount = offsets->locals_base - offsets->saved_regs;
25531 if (amount)
25533 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
25534 hard_frame_pointer_rtx,
25535 GEN_INT (amount)));
25536 arm_add_cfa_adjust_cfa_note (insn, amount,
25537 hard_frame_pointer_rtx,
25538 hard_frame_pointer_rtx);
25541 /* Force out any pending memory operations that reference stacked data
25542 before stack de-allocation occurs. */
25543 emit_insn (gen_blockage ());
25544 insn = emit_insn (gen_movsi (stack_pointer_rtx,
25545 hard_frame_pointer_rtx));
25546 arm_add_cfa_adjust_cfa_note (insn, 0,
25547 stack_pointer_rtx,
25548 hard_frame_pointer_rtx);
25549 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25550 deleted. */
25551 emit_insn (gen_force_register_use (stack_pointer_rtx));
25554 else
25556 /* Pop off outgoing args and local frame to adjust stack pointer to
25557 last saved register. */
25558 amount = offsets->outgoing_args - offsets->saved_regs;
25559 if (amount)
25561 rtx_insn *tmp;
25562 /* Force out any pending memory operations that reference stacked data
25563 before stack de-allocation occurs. */
25564 emit_insn (gen_blockage ());
25565 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
25566 stack_pointer_rtx,
25567 GEN_INT (amount)));
25568 arm_add_cfa_adjust_cfa_note (tmp, amount,
25569 stack_pointer_rtx, stack_pointer_rtx);
25570 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25571 not deleted. */
25572 emit_insn (gen_force_register_use (stack_pointer_rtx));
25576 if (TARGET_HARD_FLOAT)
25578 /* Generate VFP register multi-pop. */
25579 int end_reg = LAST_VFP_REGNUM + 1;
25581 /* Scan the registers in reverse order. We need to match
25582 any groupings made in the prologue and generate matching
25583 vldm operations. The need to match groups is because,
25584 unlike pop, vldm can only do consecutive regs. */
25585 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
25586 /* Look for a case where a reg does not need restoring. */
25587 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25588 && (!df_regs_ever_live_p (i + 1)
25589 || call_used_regs[i + 1]))
25591 /* Restore the regs discovered so far (from reg+2 to
25592 end_reg). */
25593 if (end_reg > i + 2)
25594 arm_emit_vfp_multi_reg_pop (i + 2,
25595 (end_reg - (i + 2)) / 2,
25596 stack_pointer_rtx);
25597 end_reg = i;
25600 /* Restore the remaining regs that we have discovered (or possibly
25601 even all of them, if the conditional in the for loop never
25602 fired). */
25603 if (end_reg > i + 2)
25604 arm_emit_vfp_multi_reg_pop (i + 2,
25605 (end_reg - (i + 2)) / 2,
25606 stack_pointer_rtx);
25609 if (TARGET_IWMMXT)
25610 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
25611 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25613 rtx_insn *insn;
25614 rtx addr = gen_rtx_MEM (V2SImode,
25615 gen_rtx_POST_INC (SImode,
25616 stack_pointer_rtx));
25617 set_mem_alias_set (addr, get_frame_alias_set ());
25618 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25619 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25620 gen_rtx_REG (V2SImode, i),
25621 NULL_RTX);
25622 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25623 stack_pointer_rtx, stack_pointer_rtx);
25626 if (saved_regs_mask)
25628 rtx insn;
25629 bool return_in_pc = false;
25631 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
25632 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
25633 && !IS_CMSE_ENTRY (func_type)
25634 && !IS_STACKALIGN (func_type)
25635 && really_return
25636 && crtl->args.pretend_args_size == 0
25637 && saved_regs_mask & (1 << LR_REGNUM)
25638 && !crtl->calls_eh_return)
25640 saved_regs_mask &= ~(1 << LR_REGNUM);
25641 saved_regs_mask |= (1 << PC_REGNUM);
25642 return_in_pc = true;
25645 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
25647 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25648 if (saved_regs_mask & (1 << i))
25650 rtx addr = gen_rtx_MEM (SImode,
25651 gen_rtx_POST_INC (SImode,
25652 stack_pointer_rtx));
25653 set_mem_alias_set (addr, get_frame_alias_set ());
25655 if (i == PC_REGNUM)
25657 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25658 XVECEXP (insn, 0, 0) = ret_rtx;
25659 XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
25660 addr);
25661 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
25662 insn = emit_jump_insn (insn);
25664 else
25666 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
25667 addr));
25668 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25669 gen_rtx_REG (SImode, i),
25670 NULL_RTX);
25671 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25672 stack_pointer_rtx,
25673 stack_pointer_rtx);
25677 else
25679 if (TARGET_LDRD
25680 && current_tune->prefer_ldrd_strd
25681 && !optimize_function_for_size_p (cfun))
25683 if (TARGET_THUMB2)
25684 thumb2_emit_ldrd_pop (saved_regs_mask);
25685 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
25686 arm_emit_ldrd_pop (saved_regs_mask);
25687 else
25688 arm_emit_multi_reg_pop (saved_regs_mask);
25690 else
25691 arm_emit_multi_reg_pop (saved_regs_mask);
25694 if (return_in_pc)
25695 return;
25698 amount
25699 = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
25700 if (amount)
25702 int i, j;
25703 rtx dwarf = NULL_RTX;
25704 rtx_insn *tmp =
25705 emit_insn (gen_addsi3 (stack_pointer_rtx,
25706 stack_pointer_rtx,
25707 GEN_INT (amount)));
25709 RTX_FRAME_RELATED_P (tmp) = 1;
25711 if (cfun->machine->uses_anonymous_args)
25713 /* Restore pretend args. Refer arm_expand_prologue on how to save
25714 pretend_args in stack. */
25715 int num_regs = crtl->args.pretend_args_size / 4;
25716 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
25717 for (j = 0, i = 0; j < num_regs; i++)
25718 if (saved_regs_mask & (1 << i))
25720 rtx reg = gen_rtx_REG (SImode, i);
25721 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
25722 j++;
25724 REG_NOTES (tmp) = dwarf;
25726 arm_add_cfa_adjust_cfa_note (tmp, amount,
25727 stack_pointer_rtx, stack_pointer_rtx);
25730 /* Clear all caller-saved regs that are not used to return. */
25731 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25733 /* CMSE_ENTRY always returns. */
25734 gcc_assert (really_return);
25735 cmse_nonsecure_entry_clear_before_return ();
25738 if (!really_return)
25739 return;
25741 if (crtl->calls_eh_return)
25742 emit_insn (gen_addsi3 (stack_pointer_rtx,
25743 stack_pointer_rtx,
25744 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25746 if (IS_STACKALIGN (func_type))
25747 /* Restore the original stack pointer. Before prologue, the stack was
25748 realigned and the original stack pointer saved in r0. For details,
25749 see comment in arm_expand_prologue. */
25750 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25752 emit_jump_insn (simple_return_rtx);
25755 /* Implementation of insn prologue_thumb1_interwork. This is the first
25756 "instruction" of a function called in ARM mode. Swap to thumb mode. */
25758 const char *
25759 thumb1_output_interwork (void)
25761 const char * name;
25762 FILE *f = asm_out_file;
25764 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
25765 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
25766 == SYMBOL_REF);
25767 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25769 /* Generate code sequence to switch us into Thumb mode. */
25770 /* The .code 32 directive has already been emitted by
25771 ASM_DECLARE_FUNCTION_NAME. */
25772 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
25773 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
25775 /* Generate a label, so that the debugger will notice the
25776 change in instruction sets. This label is also used by
25777 the assembler to bypass the ARM code when this function
25778 is called from a Thumb encoded function elsewhere in the
25779 same file. Hence the definition of STUB_NAME here must
25780 agree with the definition in gas/config/tc-arm.c. */
25782 #define STUB_NAME ".real_start_of"
25784 fprintf (f, "\t.code\t16\n");
25785 #ifdef ARM_PE
25786 if (arm_dllexport_name_p (name))
25787 name = arm_strip_name_encoding (name);
25788 #endif
25789 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
25790 fprintf (f, "\t.thumb_func\n");
25791 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
25793 return "";
25796 /* Handle the case of a double word load into a low register from
25797 a computed memory address. The computed address may involve a
25798 register which is overwritten by the load. */
25799 const char *
25800 thumb_load_double_from_address (rtx *operands)
25802 rtx addr;
25803 rtx base;
25804 rtx offset;
25805 rtx arg1;
25806 rtx arg2;
25808 gcc_assert (REG_P (operands[0]));
25809 gcc_assert (MEM_P (operands[1]));
25811 /* Get the memory address. */
25812 addr = XEXP (operands[1], 0);
25814 /* Work out how the memory address is computed. */
25815 switch (GET_CODE (addr))
25817 case REG:
25818 operands[2] = adjust_address (operands[1], SImode, 4);
25820 if (REGNO (operands[0]) == REGNO (addr))
25822 output_asm_insn ("ldr\t%H0, %2", operands);
25823 output_asm_insn ("ldr\t%0, %1", operands);
25825 else
25827 output_asm_insn ("ldr\t%0, %1", operands);
25828 output_asm_insn ("ldr\t%H0, %2", operands);
25830 break;
25832 case CONST:
25833 /* Compute <address> + 4 for the high order load. */
25834 operands[2] = adjust_address (operands[1], SImode, 4);
25836 output_asm_insn ("ldr\t%0, %1", operands);
25837 output_asm_insn ("ldr\t%H0, %2", operands);
25838 break;
25840 case PLUS:
25841 arg1 = XEXP (addr, 0);
25842 arg2 = XEXP (addr, 1);
25844 if (CONSTANT_P (arg1))
25845 base = arg2, offset = arg1;
25846 else
25847 base = arg1, offset = arg2;
25849 gcc_assert (REG_P (base));
25851 /* Catch the case of <address> = <reg> + <reg> */
25852 if (REG_P (offset))
25854 int reg_offset = REGNO (offset);
25855 int reg_base = REGNO (base);
25856 int reg_dest = REGNO (operands[0]);
25858 /* Add the base and offset registers together into the
25859 higher destination register. */
25860 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
25861 reg_dest + 1, reg_base, reg_offset);
25863 /* Load the lower destination register from the address in
25864 the higher destination register. */
25865 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
25866 reg_dest, reg_dest + 1);
25868 /* Load the higher destination register from its own address
25869 plus 4. */
25870 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
25871 reg_dest + 1, reg_dest + 1);
25873 else
25875 /* Compute <address> + 4 for the high order load. */
25876 operands[2] = adjust_address (operands[1], SImode, 4);
25878 /* If the computed address is held in the low order register
25879 then load the high order register first, otherwise always
25880 load the low order register first. */
25881 if (REGNO (operands[0]) == REGNO (base))
25883 output_asm_insn ("ldr\t%H0, %2", operands);
25884 output_asm_insn ("ldr\t%0, %1", operands);
25886 else
25888 output_asm_insn ("ldr\t%0, %1", operands);
25889 output_asm_insn ("ldr\t%H0, %2", operands);
25892 break;
25894 case LABEL_REF:
25895 /* With no registers to worry about we can just load the value
25896 directly. */
25897 operands[2] = adjust_address (operands[1], SImode, 4);
25899 output_asm_insn ("ldr\t%H0, %2", operands);
25900 output_asm_insn ("ldr\t%0, %1", operands);
25901 break;
25903 default:
25904 gcc_unreachable ();
25907 return "";
25910 const char *
25911 thumb_output_move_mem_multiple (int n, rtx *operands)
25913 switch (n)
25915 case 2:
25916 if (REGNO (operands[4]) > REGNO (operands[5]))
25917 std::swap (operands[4], operands[5]);
25919 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
25920 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
25921 break;
25923 case 3:
25924 if (REGNO (operands[4]) > REGNO (operands[5]))
25925 std::swap (operands[4], operands[5]);
25926 if (REGNO (operands[5]) > REGNO (operands[6]))
25927 std::swap (operands[5], operands[6]);
25928 if (REGNO (operands[4]) > REGNO (operands[5]))
25929 std::swap (operands[4], operands[5]);
25931 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
25932 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
25933 break;
25935 default:
25936 gcc_unreachable ();
25939 return "";
25942 /* Output a call-via instruction for thumb state. */
25943 const char *
25944 thumb_call_via_reg (rtx reg)
25946 int regno = REGNO (reg);
25947 rtx *labelp;
25949 gcc_assert (regno < LR_REGNUM);
25951 /* If we are in the normal text section we can use a single instance
25952 per compilation unit. If we are doing function sections, then we need
25953 an entry per section, since we can't rely on reachability. */
25954 if (in_section == text_section)
25956 thumb_call_reg_needed = 1;
25958 if (thumb_call_via_label[regno] == NULL)
25959 thumb_call_via_label[regno] = gen_label_rtx ();
25960 labelp = thumb_call_via_label + regno;
25962 else
25964 if (cfun->machine->call_via[regno] == NULL)
25965 cfun->machine->call_via[regno] = gen_label_rtx ();
25966 labelp = cfun->machine->call_via + regno;
25969 output_asm_insn ("bl\t%a0", labelp);
25970 return "";
25973 /* Routines for generating rtl. */
25974 void
25975 thumb_expand_movmemqi (rtx *operands)
25977 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
25978 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
25979 HOST_WIDE_INT len = INTVAL (operands[2]);
25980 HOST_WIDE_INT offset = 0;
25982 while (len >= 12)
25984 emit_insn (gen_movmem12b (out, in, out, in));
25985 len -= 12;
25988 if (len >= 8)
25990 emit_insn (gen_movmem8b (out, in, out, in));
25991 len -= 8;
25994 if (len >= 4)
25996 rtx reg = gen_reg_rtx (SImode);
25997 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
25998 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
25999 len -= 4;
26000 offset += 4;
26003 if (len >= 2)
26005 rtx reg = gen_reg_rtx (HImode);
26006 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
26007 plus_constant (Pmode, in,
26008 offset))));
26009 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
26010 offset)),
26011 reg));
26012 len -= 2;
26013 offset += 2;
26016 if (len)
26018 rtx reg = gen_reg_rtx (QImode);
26019 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
26020 plus_constant (Pmode, in,
26021 offset))));
26022 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
26023 offset)),
26024 reg));
26028 void
26029 thumb_reload_out_hi (rtx *operands)
26031 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
26034 /* Return the length of a function name prefix
26035 that starts with the character 'c'. */
26036 static int
26037 arm_get_strip_length (int c)
26039 switch (c)
26041 ARM_NAME_ENCODING_LENGTHS
26042 default: return 0;
26046 /* Return a pointer to a function's name with any
26047 and all prefix encodings stripped from it. */
26048 const char *
26049 arm_strip_name_encoding (const char *name)
26051 int skip;
26053 while ((skip = arm_get_strip_length (* name)))
26054 name += skip;
26056 return name;
26059 /* If there is a '*' anywhere in the name's prefix, then
26060 emit the stripped name verbatim, otherwise prepend an
26061 underscore if leading underscores are being used. */
26062 void
26063 arm_asm_output_labelref (FILE *stream, const char *name)
26065 int skip;
26066 int verbatim = 0;
26068 while ((skip = arm_get_strip_length (* name)))
26070 verbatim |= (*name == '*');
26071 name += skip;
26074 if (verbatim)
26075 fputs (name, stream);
26076 else
26077 asm_fprintf (stream, "%U%s", name);
26080 /* This function is used to emit an EABI tag and its associated value.
26081 We emit the numerical value of the tag in case the assembler does not
26082 support textual tags. (Eg gas prior to 2.20). If requested we include
26083 the tag name in a comment so that anyone reading the assembler output
26084 will know which tag is being set.
26086 This function is not static because arm-c.c needs it too. */
26088 void
26089 arm_emit_eabi_attribute (const char *name, int num, int val)
26091 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
26092 if (flag_verbose_asm || flag_debug_asm)
26093 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
26094 asm_fprintf (asm_out_file, "\n");
26097 /* This function is used to print CPU tuning information as comment
26098 in assembler file. Pointers are not printed for now. */
26100 void
26101 arm_print_tune_info (void)
26103 asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune parameters\n");
26104 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "constant_limit:\t%d\n",
26105 current_tune->constant_limit);
26106 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26107 "max_insns_skipped:\t%d\n", current_tune->max_insns_skipped);
26108 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26109 "prefetch.num_slots:\t%d\n", current_tune->prefetch.num_slots);
26110 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26111 "prefetch.l1_cache_size:\t%d\n",
26112 current_tune->prefetch.l1_cache_size);
26113 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26114 "prefetch.l1_cache_line_size:\t%d\n",
26115 current_tune->prefetch.l1_cache_line_size);
26116 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26117 "prefer_constant_pool:\t%d\n",
26118 (int) current_tune->prefer_constant_pool);
26119 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26120 "branch_cost:\t(s:speed, p:predictable)\n");
26121 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\ts&p\tcost\n");
26122 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t00\t%d\n",
26123 current_tune->branch_cost (false, false));
26124 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t01\t%d\n",
26125 current_tune->branch_cost (false, true));
26126 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t10\t%d\n",
26127 current_tune->branch_cost (true, false));
26128 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t11\t%d\n",
26129 current_tune->branch_cost (true, true));
26130 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26131 "prefer_ldrd_strd:\t%d\n",
26132 (int) current_tune->prefer_ldrd_strd);
26133 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26134 "logical_op_non_short_circuit:\t[%d,%d]\n",
26135 (int) current_tune->logical_op_non_short_circuit_thumb,
26136 (int) current_tune->logical_op_non_short_circuit_arm);
26137 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26138 "prefer_neon_for_64bits:\t%d\n",
26139 (int) current_tune->prefer_neon_for_64bits);
26140 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26141 "disparage_flag_setting_t16_encodings:\t%d\n",
26142 (int) current_tune->disparage_flag_setting_t16_encodings);
26143 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26144 "string_ops_prefer_neon:\t%d\n",
26145 (int) current_tune->string_ops_prefer_neon);
26146 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26147 "max_insns_inline_memset:\t%d\n",
26148 current_tune->max_insns_inline_memset);
26149 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "fusible_ops:\t%u\n",
26150 current_tune->fusible_ops);
26151 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "sched_autopref:\t%d\n",
26152 (int) current_tune->sched_autopref);
26155 /* Print .arch and .arch_extension directives corresponding to the
26156 current architecture configuration. */
26157 static void
26158 arm_print_asm_arch_directives ()
26160 const arch_option *arch
26161 = arm_parse_arch_option_name (all_architectures, "-march",
26162 arm_active_target.arch_name);
26163 auto_sbitmap opt_bits (isa_num_bits);
26165 gcc_assert (arch);
26167 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_active_target.arch_name);
26168 if (!arch->common.extensions)
26169 return;
26171 for (const struct cpu_arch_extension *opt = arch->common.extensions;
26172 opt->name != NULL;
26173 opt++)
26175 if (!opt->remove)
26177 arm_initialize_isa (opt_bits, opt->isa_bits);
26179 /* If every feature bit of this option is set in the target
26180 ISA specification, print out the option name. However,
26181 don't print anything if all the bits are part of the
26182 FPU specification. */
26183 if (bitmap_subset_p (opt_bits, arm_active_target.isa)
26184 && !bitmap_subset_p (opt_bits, isa_all_fpubits))
26185 asm_fprintf (asm_out_file, "\t.arch_extension %s\n", opt->name);
26190 static void
26191 arm_file_start (void)
26193 int val;
26195 if (TARGET_BPABI)
26197 /* We don't have a specified CPU. Use the architecture to
26198 generate the tags.
26200 Note: it might be better to do this unconditionally, then the
26201 assembler would not need to know about all new CPU names as
26202 they are added. */
26203 if (!arm_active_target.core_name)
26205 /* armv7ve doesn't support any extensions. */
26206 if (strcmp (arm_active_target.arch_name, "armv7ve") == 0)
26208 /* Keep backward compatability for assemblers
26209 which don't support armv7ve. */
26210 asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
26211 asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
26212 asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
26213 asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
26214 asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
26216 else
26217 arm_print_asm_arch_directives ();
26219 else if (strncmp (arm_active_target.core_name, "generic", 7) == 0)
26220 asm_fprintf (asm_out_file, "\t.arch %s\n",
26221 arm_active_target.core_name + 8);
26222 else
26224 const char* truncated_name
26225 = arm_rewrite_selected_cpu (arm_active_target.core_name);
26226 asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
26229 if (print_tune_info)
26230 arm_print_tune_info ();
26232 if (! TARGET_SOFT_FLOAT)
26234 if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
26235 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
26237 if (TARGET_HARD_FLOAT_ABI)
26238 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
26241 /* Some of these attributes only apply when the corresponding features
26242 are used. However we don't have any easy way of figuring this out.
26243 Conservatively record the setting that would have been used. */
26245 if (flag_rounding_math)
26246 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
26248 if (!flag_unsafe_math_optimizations)
26250 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
26251 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
26253 if (flag_signaling_nans)
26254 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
26256 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
26257 flag_finite_math_only ? 1 : 3);
26259 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
26260 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
26261 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
26262 flag_short_enums ? 1 : 2);
26264 /* Tag_ABI_optimization_goals. */
26265 if (optimize_size)
26266 val = 4;
26267 else if (optimize >= 2)
26268 val = 2;
26269 else if (optimize)
26270 val = 1;
26271 else
26272 val = 6;
26273 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
26275 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
26276 unaligned_access);
26278 if (arm_fp16_format)
26279 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
26280 (int) arm_fp16_format);
26282 if (arm_lang_output_object_attributes_hook)
26283 arm_lang_output_object_attributes_hook();
26286 default_file_start ();
26289 static void
26290 arm_file_end (void)
26292 int regno;
26294 if (NEED_INDICATE_EXEC_STACK)
26295 /* Add .note.GNU-stack. */
26296 file_end_indicate_exec_stack ();
26298 if (! thumb_call_reg_needed)
26299 return;
26301 switch_to_section (text_section);
26302 asm_fprintf (asm_out_file, "\t.code 16\n");
26303 ASM_OUTPUT_ALIGN (asm_out_file, 1);
26305 for (regno = 0; regno < LR_REGNUM; regno++)
26307 rtx label = thumb_call_via_label[regno];
26309 if (label != 0)
26311 targetm.asm_out.internal_label (asm_out_file, "L",
26312 CODE_LABEL_NUMBER (label));
26313 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
26318 #ifndef ARM_PE
26319 /* Symbols in the text segment can be accessed without indirecting via the
26320 constant pool; it may take an extra binary operation, but this is still
26321 faster than indirecting via memory. Don't do this when not optimizing,
26322 since we won't be calculating al of the offsets necessary to do this
26323 simplification. */
26325 static void
26326 arm_encode_section_info (tree decl, rtx rtl, int first)
26328 if (optimize > 0 && TREE_CONSTANT (decl))
26329 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
26331 default_encode_section_info (decl, rtl, first);
26333 #endif /* !ARM_PE */
26335 static void
26336 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
26338 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
26339 && !strcmp (prefix, "L"))
26341 arm_ccfsm_state = 0;
26342 arm_target_insn = NULL;
26344 default_internal_label (stream, prefix, labelno);
26347 /* Output code to add DELTA to the first argument, and then jump
26348 to FUNCTION. Used for C++ multiple inheritance. */
26350 static void
26351 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26352 HOST_WIDE_INT, tree function)
26354 static int thunk_label = 0;
26355 char label[256];
26356 char labelpc[256];
26357 int mi_delta = delta;
26358 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
26359 int shift = 0;
26360 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
26361 ? 1 : 0);
26362 if (mi_delta < 0)
26363 mi_delta = - mi_delta;
26365 final_start_function (emit_barrier (), file, 1);
26367 if (TARGET_THUMB1)
26369 int labelno = thunk_label++;
26370 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
26371 /* Thunks are entered in arm mode when available. */
26372 if (TARGET_THUMB1_ONLY)
26374 /* push r3 so we can use it as a temporary. */
26375 /* TODO: Omit this save if r3 is not used. */
26376 fputs ("\tpush {r3}\n", file);
26377 fputs ("\tldr\tr3, ", file);
26379 else
26381 fputs ("\tldr\tr12, ", file);
26383 assemble_name (file, label);
26384 fputc ('\n', file);
26385 if (flag_pic)
26387 /* If we are generating PIC, the ldr instruction below loads
26388 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
26389 the address of the add + 8, so we have:
26391 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
26392 = target + 1.
26394 Note that we have "+ 1" because some versions of GNU ld
26395 don't set the low bit of the result for R_ARM_REL32
26396 relocations against thumb function symbols.
26397 On ARMv6M this is +4, not +8. */
26398 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
26399 assemble_name (file, labelpc);
26400 fputs (":\n", file);
26401 if (TARGET_THUMB1_ONLY)
26403 /* This is 2 insns after the start of the thunk, so we know it
26404 is 4-byte aligned. */
26405 fputs ("\tadd\tr3, pc, r3\n", file);
26406 fputs ("\tmov r12, r3\n", file);
26408 else
26409 fputs ("\tadd\tr12, pc, r12\n", file);
26411 else if (TARGET_THUMB1_ONLY)
26412 fputs ("\tmov r12, r3\n", file);
26414 if (TARGET_THUMB1_ONLY)
26416 if (mi_delta > 255)
26418 fputs ("\tldr\tr3, ", file);
26419 assemble_name (file, label);
26420 fputs ("+4\n", file);
26421 asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
26422 mi_op, this_regno, this_regno);
26424 else if (mi_delta != 0)
26426 /* Thumb1 unified syntax requires s suffix in instruction name when
26427 one of the operands is immediate. */
26428 asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
26429 mi_op, this_regno, this_regno,
26430 mi_delta);
26433 else
26435 /* TODO: Use movw/movt for large constants when available. */
26436 while (mi_delta != 0)
26438 if ((mi_delta & (3 << shift)) == 0)
26439 shift += 2;
26440 else
26442 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
26443 mi_op, this_regno, this_regno,
26444 mi_delta & (0xff << shift));
26445 mi_delta &= ~(0xff << shift);
26446 shift += 8;
26450 if (TARGET_THUMB1)
26452 if (TARGET_THUMB1_ONLY)
26453 fputs ("\tpop\t{r3}\n", file);
26455 fprintf (file, "\tbx\tr12\n");
26456 ASM_OUTPUT_ALIGN (file, 2);
26457 assemble_name (file, label);
26458 fputs (":\n", file);
26459 if (flag_pic)
26461 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
26462 rtx tem = XEXP (DECL_RTL (function), 0);
26463 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
26464 pipeline offset is four rather than eight. Adjust the offset
26465 accordingly. */
26466 tem = plus_constant (GET_MODE (tem), tem,
26467 TARGET_THUMB1_ONLY ? -3 : -7);
26468 tem = gen_rtx_MINUS (GET_MODE (tem),
26469 tem,
26470 gen_rtx_SYMBOL_REF (Pmode,
26471 ggc_strdup (labelpc)));
26472 assemble_integer (tem, 4, BITS_PER_WORD, 1);
26474 else
26475 /* Output ".word .LTHUNKn". */
26476 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
26478 if (TARGET_THUMB1_ONLY && mi_delta > 255)
26479 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
26481 else
26483 fputs ("\tb\t", file);
26484 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
26485 if (NEED_PLT_RELOC)
26486 fputs ("(PLT)", file);
26487 fputc ('\n', file);
26490 final_end_function ();
26493 /* MI thunk handling for TARGET_32BIT. */
26495 static void
26496 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26497 HOST_WIDE_INT vcall_offset, tree function)
26499 /* On ARM, this_regno is R0 or R1 depending on
26500 whether the function returns an aggregate or not.
26502 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
26503 function)
26504 ? R1_REGNUM : R0_REGNUM);
26506 rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
26507 rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
26508 reload_completed = 1;
26509 emit_note (NOTE_INSN_PROLOGUE_END);
26511 /* Add DELTA to THIS_RTX. */
26512 if (delta != 0)
26513 arm_split_constant (PLUS, Pmode, NULL_RTX,
26514 delta, this_rtx, this_rtx, false);
26516 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
26517 if (vcall_offset != 0)
26519 /* Load *THIS_RTX. */
26520 emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
26521 /* Compute *THIS_RTX + VCALL_OFFSET. */
26522 arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
26523 false);
26524 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
26525 emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
26526 emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
26529 /* Generate a tail call to the target function. */
26530 if (!TREE_USED (function))
26532 assemble_external (function);
26533 TREE_USED (function) = 1;
26535 rtx funexp = XEXP (DECL_RTL (function), 0);
26536 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
26537 rtx_insn * insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
26538 SIBLING_CALL_P (insn) = 1;
26540 insn = get_insns ();
26541 shorten_branches (insn);
26542 final_start_function (insn, file, 1);
26543 final (insn, file, 1);
26544 final_end_function ();
26546 /* Stop pretending this is a post-reload pass. */
26547 reload_completed = 0;
26550 /* Output code to add DELTA to the first argument, and then jump
26551 to FUNCTION. Used for C++ multiple inheritance. */
26553 static void
26554 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
26555 HOST_WIDE_INT vcall_offset, tree function)
26557 if (TARGET_32BIT)
26558 arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
26559 else
26560 arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
26564 arm_emit_vector_const (FILE *file, rtx x)
26566 int i;
26567 const char * pattern;
26569 gcc_assert (GET_CODE (x) == CONST_VECTOR);
26571 switch (GET_MODE (x))
26573 case E_V2SImode: pattern = "%08x"; break;
26574 case E_V4HImode: pattern = "%04x"; break;
26575 case E_V8QImode: pattern = "%02x"; break;
26576 default: gcc_unreachable ();
26579 fprintf (file, "0x");
26580 for (i = CONST_VECTOR_NUNITS (x); i--;)
26582 rtx element;
26584 element = CONST_VECTOR_ELT (x, i);
26585 fprintf (file, pattern, INTVAL (element));
26588 return 1;
26591 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
26592 HFmode constant pool entries are actually loaded with ldr. */
26593 void
26594 arm_emit_fp16_const (rtx c)
26596 long bits;
26598 bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
26599 if (WORDS_BIG_ENDIAN)
26600 assemble_zeros (2);
26601 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
26602 if (!WORDS_BIG_ENDIAN)
26603 assemble_zeros (2);
26606 const char *
26607 arm_output_load_gr (rtx *operands)
26609 rtx reg;
26610 rtx offset;
26611 rtx wcgr;
26612 rtx sum;
26614 if (!MEM_P (operands [1])
26615 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
26616 || !REG_P (reg = XEXP (sum, 0))
26617 || !CONST_INT_P (offset = XEXP (sum, 1))
26618 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
26619 return "wldrw%?\t%0, %1";
26621 /* Fix up an out-of-range load of a GR register. */
26622 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
26623 wcgr = operands[0];
26624 operands[0] = reg;
26625 output_asm_insn ("ldr%?\t%0, %1", operands);
26627 operands[0] = wcgr;
26628 operands[1] = reg;
26629 output_asm_insn ("tmcr%?\t%0, %1", operands);
26630 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
26632 return "";
26635 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
26637 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
26638 named arg and all anonymous args onto the stack.
26639 XXX I know the prologue shouldn't be pushing registers, but it is faster
26640 that way. */
26642 static void
26643 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
26644 machine_mode mode,
26645 tree type,
26646 int *pretend_size,
26647 int second_time ATTRIBUTE_UNUSED)
26649 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
26650 int nregs;
26652 cfun->machine->uses_anonymous_args = 1;
26653 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
26655 nregs = pcum->aapcs_ncrn;
26656 if (nregs & 1)
26658 int res = arm_needs_doubleword_align (mode, type);
26659 if (res < 0 && warn_psabi)
26660 inform (input_location, "parameter passing for argument of "
26661 "type %qT changed in GCC 7.1", type);
26662 else if (res > 0)
26663 nregs++;
26666 else
26667 nregs = pcum->nregs;
26669 if (nregs < NUM_ARG_REGS)
26670 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
26673 /* We can't rely on the caller doing the proper promotion when
26674 using APCS or ATPCS. */
26676 static bool
26677 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
26679 return !TARGET_AAPCS_BASED;
26682 static machine_mode
26683 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
26684 machine_mode mode,
26685 int *punsignedp ATTRIBUTE_UNUSED,
26686 const_tree fntype ATTRIBUTE_UNUSED,
26687 int for_return ATTRIBUTE_UNUSED)
26689 if (GET_MODE_CLASS (mode) == MODE_INT
26690 && GET_MODE_SIZE (mode) < 4)
26691 return SImode;
26693 return mode;
26697 static bool
26698 arm_default_short_enums (void)
26700 return ARM_DEFAULT_SHORT_ENUMS;
26704 /* AAPCS requires that anonymous bitfields affect structure alignment. */
26706 static bool
26707 arm_align_anon_bitfield (void)
26709 return TARGET_AAPCS_BASED;
26713 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
26715 static tree
26716 arm_cxx_guard_type (void)
26718 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
26722 /* The EABI says test the least significant bit of a guard variable. */
26724 static bool
26725 arm_cxx_guard_mask_bit (void)
26727 return TARGET_AAPCS_BASED;
26731 /* The EABI specifies that all array cookies are 8 bytes long. */
26733 static tree
26734 arm_get_cookie_size (tree type)
26736 tree size;
26738 if (!TARGET_AAPCS_BASED)
26739 return default_cxx_get_cookie_size (type);
26741 size = build_int_cst (sizetype, 8);
26742 return size;
26746 /* The EABI says that array cookies should also contain the element size. */
26748 static bool
26749 arm_cookie_has_size (void)
26751 return TARGET_AAPCS_BASED;
26755 /* The EABI says constructors and destructors should return a pointer to
26756 the object constructed/destroyed. */
26758 static bool
26759 arm_cxx_cdtor_returns_this (void)
26761 return TARGET_AAPCS_BASED;
26764 /* The EABI says that an inline function may never be the key
26765 method. */
26767 static bool
26768 arm_cxx_key_method_may_be_inline (void)
26770 return !TARGET_AAPCS_BASED;
26773 static void
26774 arm_cxx_determine_class_data_visibility (tree decl)
26776 if (!TARGET_AAPCS_BASED
26777 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
26778 return;
26780 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
26781 is exported. However, on systems without dynamic vague linkage,
26782 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
26783 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
26784 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
26785 else
26786 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
26787 DECL_VISIBILITY_SPECIFIED (decl) = 1;
26790 static bool
26791 arm_cxx_class_data_always_comdat (void)
26793 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26794 vague linkage if the class has no key function. */
26795 return !TARGET_AAPCS_BASED;
26799 /* The EABI says __aeabi_atexit should be used to register static
26800 destructors. */
26802 static bool
26803 arm_cxx_use_aeabi_atexit (void)
26805 return TARGET_AAPCS_BASED;
26809 void
26810 arm_set_return_address (rtx source, rtx scratch)
26812 arm_stack_offsets *offsets;
26813 HOST_WIDE_INT delta;
26814 rtx addr;
26815 unsigned long saved_regs;
26817 offsets = arm_get_frame_offsets ();
26818 saved_regs = offsets->saved_regs_mask;
26820 if ((saved_regs & (1 << LR_REGNUM)) == 0)
26821 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26822 else
26824 if (frame_pointer_needed)
26825 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
26826 else
26828 /* LR will be the first saved register. */
26829 delta = offsets->outgoing_args - (offsets->frame + 4);
26832 if (delta >= 4096)
26834 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
26835 GEN_INT (delta & ~4095)));
26836 addr = scratch;
26837 delta &= 4095;
26839 else
26840 addr = stack_pointer_rtx;
26842 addr = plus_constant (Pmode, addr, delta);
26844 /* The store needs to be marked as frame related in order to prevent
26845 DSE from deleting it as dead if it is based on fp. */
26846 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26847 RTX_FRAME_RELATED_P (insn) = 1;
26848 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26853 void
26854 thumb_set_return_address (rtx source, rtx scratch)
26856 arm_stack_offsets *offsets;
26857 HOST_WIDE_INT delta;
26858 HOST_WIDE_INT limit;
26859 int reg;
26860 rtx addr;
26861 unsigned long mask;
26863 emit_use (source);
26865 offsets = arm_get_frame_offsets ();
26866 mask = offsets->saved_regs_mask;
26867 if (mask & (1 << LR_REGNUM))
26869 limit = 1024;
26870 /* Find the saved regs. */
26871 if (frame_pointer_needed)
26873 delta = offsets->soft_frame - offsets->saved_args;
26874 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
26875 if (TARGET_THUMB1)
26876 limit = 128;
26878 else
26880 delta = offsets->outgoing_args - offsets->saved_args;
26881 reg = SP_REGNUM;
26883 /* Allow for the stack frame. */
26884 if (TARGET_THUMB1 && TARGET_BACKTRACE)
26885 delta -= 16;
26886 /* The link register is always the first saved register. */
26887 delta -= 4;
26889 /* Construct the address. */
26890 addr = gen_rtx_REG (SImode, reg);
26891 if (delta > limit)
26893 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
26894 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
26895 addr = scratch;
26897 else
26898 addr = plus_constant (Pmode, addr, delta);
26900 /* The store needs to be marked as frame related in order to prevent
26901 DSE from deleting it as dead if it is based on fp. */
26902 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26903 RTX_FRAME_RELATED_P (insn) = 1;
26904 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26906 else
26907 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26910 /* Implements target hook vector_mode_supported_p. */
26911 bool
26912 arm_vector_mode_supported_p (machine_mode mode)
26914 /* Neon also supports V2SImode, etc. listed in the clause below. */
26915 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
26916 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
26917 || mode == V2DImode || mode == V8HFmode))
26918 return true;
26920 if ((TARGET_NEON || TARGET_IWMMXT)
26921 && ((mode == V2SImode)
26922 || (mode == V4HImode)
26923 || (mode == V8QImode)))
26924 return true;
26926 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
26927 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
26928 || mode == V2HAmode))
26929 return true;
26931 return false;
26934 /* Implements target hook array_mode_supported_p. */
26936 static bool
26937 arm_array_mode_supported_p (machine_mode mode,
26938 unsigned HOST_WIDE_INT nelems)
26940 if (TARGET_NEON
26941 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
26942 && (nelems >= 2 && nelems <= 4))
26943 return true;
26945 return false;
26948 /* Use the option -mvectorize-with-neon-double to override the use of quardword
26949 registers when autovectorizing for Neon, at least until multiple vector
26950 widths are supported properly by the middle-end. */
26952 static machine_mode
26953 arm_preferred_simd_mode (scalar_mode mode)
26955 if (TARGET_NEON)
26956 switch (mode)
26958 case E_SFmode:
26959 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
26960 case E_SImode:
26961 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
26962 case E_HImode:
26963 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
26964 case E_QImode:
26965 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
26966 case E_DImode:
26967 if (!TARGET_NEON_VECTORIZE_DOUBLE)
26968 return V2DImode;
26969 break;
26971 default:;
26974 if (TARGET_REALLY_IWMMXT)
26975 switch (mode)
26977 case E_SImode:
26978 return V2SImode;
26979 case E_HImode:
26980 return V4HImode;
26981 case E_QImode:
26982 return V8QImode;
26984 default:;
26987 return word_mode;
26990 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
26992 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
26993 using r0-r4 for function arguments, r7 for the stack frame and don't have
26994 enough left over to do doubleword arithmetic. For Thumb-2 all the
26995 potentially problematic instructions accept high registers so this is not
26996 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
26997 that require many low registers. */
26998 static bool
26999 arm_class_likely_spilled_p (reg_class_t rclass)
27001 if ((TARGET_THUMB1 && rclass == LO_REGS)
27002 || rclass == CC_REG)
27003 return true;
27005 return false;
27008 /* Implements target hook small_register_classes_for_mode_p. */
27009 bool
27010 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
27012 return TARGET_THUMB1;
27015 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
27016 ARM insns and therefore guarantee that the shift count is modulo 256.
27017 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
27018 guarantee no particular behavior for out-of-range counts. */
27020 static unsigned HOST_WIDE_INT
27021 arm_shift_truncation_mask (machine_mode mode)
27023 return mode == SImode ? 255 : 0;
27027 /* Map internal gcc register numbers to DWARF2 register numbers. */
27029 unsigned int
27030 arm_dbx_register_number (unsigned int regno)
27032 if (regno < 16)
27033 return regno;
27035 if (IS_VFP_REGNUM (regno))
27037 /* See comment in arm_dwarf_register_span. */
27038 if (VFP_REGNO_OK_FOR_SINGLE (regno))
27039 return 64 + regno - FIRST_VFP_REGNUM;
27040 else
27041 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
27044 if (IS_IWMMXT_GR_REGNUM (regno))
27045 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
27047 if (IS_IWMMXT_REGNUM (regno))
27048 return 112 + regno - FIRST_IWMMXT_REGNUM;
27050 return DWARF_FRAME_REGISTERS;
27053 /* Dwarf models VFPv3 registers as 32 64-bit registers.
27054 GCC models tham as 64 32-bit registers, so we need to describe this to
27055 the DWARF generation code. Other registers can use the default. */
27056 static rtx
27057 arm_dwarf_register_span (rtx rtl)
27059 machine_mode mode;
27060 unsigned regno;
27061 rtx parts[16];
27062 int nregs;
27063 int i;
27065 regno = REGNO (rtl);
27066 if (!IS_VFP_REGNUM (regno))
27067 return NULL_RTX;
27069 /* XXX FIXME: The EABI defines two VFP register ranges:
27070 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
27071 256-287: D0-D31
27072 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
27073 corresponding D register. Until GDB supports this, we shall use the
27074 legacy encodings. We also use these encodings for D0-D15 for
27075 compatibility with older debuggers. */
27076 mode = GET_MODE (rtl);
27077 if (GET_MODE_SIZE (mode) < 8)
27078 return NULL_RTX;
27080 if (VFP_REGNO_OK_FOR_SINGLE (regno))
27082 nregs = GET_MODE_SIZE (mode) / 4;
27083 for (i = 0; i < nregs; i += 2)
27084 if (TARGET_BIG_END)
27086 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
27087 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
27089 else
27091 parts[i] = gen_rtx_REG (SImode, regno + i);
27092 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
27095 else
27097 nregs = GET_MODE_SIZE (mode) / 8;
27098 for (i = 0; i < nregs; i++)
27099 parts[i] = gen_rtx_REG (DImode, regno + i);
27102 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
27105 #if ARM_UNWIND_INFO
27106 /* Emit unwind directives for a store-multiple instruction or stack pointer
27107 push during alignment.
27108 These should only ever be generated by the function prologue code, so
27109 expect them to have a particular form.
27110 The store-multiple instruction sometimes pushes pc as the last register,
27111 although it should not be tracked into unwind information, or for -Os
27112 sometimes pushes some dummy registers before first register that needs
27113 to be tracked in unwind information; such dummy registers are there just
27114 to avoid separate stack adjustment, and will not be restored in the
27115 epilogue. */
27117 static void
27118 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
27120 int i;
27121 HOST_WIDE_INT offset;
27122 HOST_WIDE_INT nregs;
27123 int reg_size;
27124 unsigned reg;
27125 unsigned lastreg;
27126 unsigned padfirst = 0, padlast = 0;
27127 rtx e;
27129 e = XVECEXP (p, 0, 0);
27130 gcc_assert (GET_CODE (e) == SET);
27132 /* First insn will adjust the stack pointer. */
27133 gcc_assert (GET_CODE (e) == SET
27134 && REG_P (SET_DEST (e))
27135 && REGNO (SET_DEST (e)) == SP_REGNUM
27136 && GET_CODE (SET_SRC (e)) == PLUS);
27138 offset = -INTVAL (XEXP (SET_SRC (e), 1));
27139 nregs = XVECLEN (p, 0) - 1;
27140 gcc_assert (nregs);
27142 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
27143 if (reg < 16)
27145 /* For -Os dummy registers can be pushed at the beginning to
27146 avoid separate stack pointer adjustment. */
27147 e = XVECEXP (p, 0, 1);
27148 e = XEXP (SET_DEST (e), 0);
27149 if (GET_CODE (e) == PLUS)
27150 padfirst = INTVAL (XEXP (e, 1));
27151 gcc_assert (padfirst == 0 || optimize_size);
27152 /* The function prologue may also push pc, but not annotate it as it is
27153 never restored. We turn this into a stack pointer adjustment. */
27154 e = XVECEXP (p, 0, nregs);
27155 e = XEXP (SET_DEST (e), 0);
27156 if (GET_CODE (e) == PLUS)
27157 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
27158 else
27159 padlast = offset - 4;
27160 gcc_assert (padlast == 0 || padlast == 4);
27161 if (padlast == 4)
27162 fprintf (asm_out_file, "\t.pad #4\n");
27163 reg_size = 4;
27164 fprintf (asm_out_file, "\t.save {");
27166 else if (IS_VFP_REGNUM (reg))
27168 reg_size = 8;
27169 fprintf (asm_out_file, "\t.vsave {");
27171 else
27172 /* Unknown register type. */
27173 gcc_unreachable ();
27175 /* If the stack increment doesn't match the size of the saved registers,
27176 something has gone horribly wrong. */
27177 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
27179 offset = padfirst;
27180 lastreg = 0;
27181 /* The remaining insns will describe the stores. */
27182 for (i = 1; i <= nregs; i++)
27184 /* Expect (set (mem <addr>) (reg)).
27185 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
27186 e = XVECEXP (p, 0, i);
27187 gcc_assert (GET_CODE (e) == SET
27188 && MEM_P (SET_DEST (e))
27189 && REG_P (SET_SRC (e)));
27191 reg = REGNO (SET_SRC (e));
27192 gcc_assert (reg >= lastreg);
27194 if (i != 1)
27195 fprintf (asm_out_file, ", ");
27196 /* We can't use %r for vfp because we need to use the
27197 double precision register names. */
27198 if (IS_VFP_REGNUM (reg))
27199 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
27200 else
27201 asm_fprintf (asm_out_file, "%r", reg);
27203 if (flag_checking)
27205 /* Check that the addresses are consecutive. */
27206 e = XEXP (SET_DEST (e), 0);
27207 if (GET_CODE (e) == PLUS)
27208 gcc_assert (REG_P (XEXP (e, 0))
27209 && REGNO (XEXP (e, 0)) == SP_REGNUM
27210 && CONST_INT_P (XEXP (e, 1))
27211 && offset == INTVAL (XEXP (e, 1)));
27212 else
27213 gcc_assert (i == 1
27214 && REG_P (e)
27215 && REGNO (e) == SP_REGNUM);
27216 offset += reg_size;
27219 fprintf (asm_out_file, "}\n");
27220 if (padfirst)
27221 fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
27224 /* Emit unwind directives for a SET. */
27226 static void
27227 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
27229 rtx e0;
27230 rtx e1;
27231 unsigned reg;
27233 e0 = XEXP (p, 0);
27234 e1 = XEXP (p, 1);
27235 switch (GET_CODE (e0))
27237 case MEM:
27238 /* Pushing a single register. */
27239 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
27240 || !REG_P (XEXP (XEXP (e0, 0), 0))
27241 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
27242 abort ();
27244 asm_fprintf (asm_out_file, "\t.save ");
27245 if (IS_VFP_REGNUM (REGNO (e1)))
27246 asm_fprintf(asm_out_file, "{d%d}\n",
27247 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
27248 else
27249 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
27250 break;
27252 case REG:
27253 if (REGNO (e0) == SP_REGNUM)
27255 /* A stack increment. */
27256 if (GET_CODE (e1) != PLUS
27257 || !REG_P (XEXP (e1, 0))
27258 || REGNO (XEXP (e1, 0)) != SP_REGNUM
27259 || !CONST_INT_P (XEXP (e1, 1)))
27260 abort ();
27262 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
27263 -INTVAL (XEXP (e1, 1)));
27265 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
27267 HOST_WIDE_INT offset;
27269 if (GET_CODE (e1) == PLUS)
27271 if (!REG_P (XEXP (e1, 0))
27272 || !CONST_INT_P (XEXP (e1, 1)))
27273 abort ();
27274 reg = REGNO (XEXP (e1, 0));
27275 offset = INTVAL (XEXP (e1, 1));
27276 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
27277 HARD_FRAME_POINTER_REGNUM, reg,
27278 offset);
27280 else if (REG_P (e1))
27282 reg = REGNO (e1);
27283 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
27284 HARD_FRAME_POINTER_REGNUM, reg);
27286 else
27287 abort ();
27289 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
27291 /* Move from sp to reg. */
27292 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
27294 else if (GET_CODE (e1) == PLUS
27295 && REG_P (XEXP (e1, 0))
27296 && REGNO (XEXP (e1, 0)) == SP_REGNUM
27297 && CONST_INT_P (XEXP (e1, 1)))
27299 /* Set reg to offset from sp. */
27300 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
27301 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
27303 else
27304 abort ();
27305 break;
27307 default:
27308 abort ();
27313 /* Emit unwind directives for the given insn. */
27315 static void
27316 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
27318 rtx note, pat;
27319 bool handled_one = false;
27321 if (arm_except_unwind_info (&global_options) != UI_TARGET)
27322 return;
27324 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27325 && (TREE_NOTHROW (current_function_decl)
27326 || crtl->all_throwers_are_sibcalls))
27327 return;
27329 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
27330 return;
27332 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
27334 switch (REG_NOTE_KIND (note))
27336 case REG_FRAME_RELATED_EXPR:
27337 pat = XEXP (note, 0);
27338 goto found;
27340 case REG_CFA_REGISTER:
27341 pat = XEXP (note, 0);
27342 if (pat == NULL)
27344 pat = PATTERN (insn);
27345 if (GET_CODE (pat) == PARALLEL)
27346 pat = XVECEXP (pat, 0, 0);
27349 /* Only emitted for IS_STACKALIGN re-alignment. */
27351 rtx dest, src;
27352 unsigned reg;
27354 src = SET_SRC (pat);
27355 dest = SET_DEST (pat);
27357 gcc_assert (src == stack_pointer_rtx);
27358 reg = REGNO (dest);
27359 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
27360 reg + 0x90, reg);
27362 handled_one = true;
27363 break;
27365 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
27366 to get correct dwarf information for shrink-wrap. We should not
27367 emit unwind information for it because these are used either for
27368 pretend arguments or notes to adjust sp and restore registers from
27369 stack. */
27370 case REG_CFA_DEF_CFA:
27371 case REG_CFA_ADJUST_CFA:
27372 case REG_CFA_RESTORE:
27373 return;
27375 case REG_CFA_EXPRESSION:
27376 case REG_CFA_OFFSET:
27377 /* ??? Only handling here what we actually emit. */
27378 gcc_unreachable ();
27380 default:
27381 break;
27384 if (handled_one)
27385 return;
27386 pat = PATTERN (insn);
27387 found:
27389 switch (GET_CODE (pat))
27391 case SET:
27392 arm_unwind_emit_set (asm_out_file, pat);
27393 break;
27395 case SEQUENCE:
27396 /* Store multiple. */
27397 arm_unwind_emit_sequence (asm_out_file, pat);
27398 break;
27400 default:
27401 abort();
27406 /* Output a reference from a function exception table to the type_info
27407 object X. The EABI specifies that the symbol should be relocated by
27408 an R_ARM_TARGET2 relocation. */
27410 static bool
27411 arm_output_ttype (rtx x)
27413 fputs ("\t.word\t", asm_out_file);
27414 output_addr_const (asm_out_file, x);
27415 /* Use special relocations for symbol references. */
27416 if (!CONST_INT_P (x))
27417 fputs ("(TARGET2)", asm_out_file);
27418 fputc ('\n', asm_out_file);
27420 return TRUE;
27423 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
27425 static void
27426 arm_asm_emit_except_personality (rtx personality)
27428 fputs ("\t.personality\t", asm_out_file);
27429 output_addr_const (asm_out_file, personality);
27430 fputc ('\n', asm_out_file);
27432 #endif /* ARM_UNWIND_INFO */
27434 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
27436 static void
27437 arm_asm_init_sections (void)
27439 #if ARM_UNWIND_INFO
27440 exception_section = get_unnamed_section (0, output_section_asm_op,
27441 "\t.handlerdata");
27442 #endif /* ARM_UNWIND_INFO */
27444 #ifdef OBJECT_FORMAT_ELF
27445 if (target_pure_code)
27446 text_section->unnamed.data = "\t.section .text,\"0x20000006\",%progbits";
27447 #endif
27450 /* Output unwind directives for the start/end of a function. */
27452 void
27453 arm_output_fn_unwind (FILE * f, bool prologue)
27455 if (arm_except_unwind_info (&global_options) != UI_TARGET)
27456 return;
27458 if (prologue)
27459 fputs ("\t.fnstart\n", f);
27460 else
27462 /* If this function will never be unwound, then mark it as such.
27463 The came condition is used in arm_unwind_emit to suppress
27464 the frame annotations. */
27465 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27466 && (TREE_NOTHROW (current_function_decl)
27467 || crtl->all_throwers_are_sibcalls))
27468 fputs("\t.cantunwind\n", f);
27470 fputs ("\t.fnend\n", f);
27474 static bool
27475 arm_emit_tls_decoration (FILE *fp, rtx x)
27477 enum tls_reloc reloc;
27478 rtx val;
27480 val = XVECEXP (x, 0, 0);
27481 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
27483 output_addr_const (fp, val);
27485 switch (reloc)
27487 case TLS_GD32:
27488 fputs ("(tlsgd)", fp);
27489 break;
27490 case TLS_LDM32:
27491 fputs ("(tlsldm)", fp);
27492 break;
27493 case TLS_LDO32:
27494 fputs ("(tlsldo)", fp);
27495 break;
27496 case TLS_IE32:
27497 fputs ("(gottpoff)", fp);
27498 break;
27499 case TLS_LE32:
27500 fputs ("(tpoff)", fp);
27501 break;
27502 case TLS_DESCSEQ:
27503 fputs ("(tlsdesc)", fp);
27504 break;
27505 default:
27506 gcc_unreachable ();
27509 switch (reloc)
27511 case TLS_GD32:
27512 case TLS_LDM32:
27513 case TLS_IE32:
27514 case TLS_DESCSEQ:
27515 fputs (" + (. - ", fp);
27516 output_addr_const (fp, XVECEXP (x, 0, 2));
27517 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
27518 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
27519 output_addr_const (fp, XVECEXP (x, 0, 3));
27520 fputc (')', fp);
27521 break;
27522 default:
27523 break;
27526 return TRUE;
27529 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
27531 static void
27532 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
27534 gcc_assert (size == 4);
27535 fputs ("\t.word\t", file);
27536 output_addr_const (file, x);
27537 fputs ("(tlsldo)", file);
27540 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
27542 static bool
27543 arm_output_addr_const_extra (FILE *fp, rtx x)
27545 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
27546 return arm_emit_tls_decoration (fp, x);
27547 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
27549 char label[256];
27550 int labelno = INTVAL (XVECEXP (x, 0, 0));
27552 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
27553 assemble_name_raw (fp, label);
27555 return TRUE;
27557 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
27559 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
27560 if (GOT_PCREL)
27561 fputs ("+.", fp);
27562 fputs ("-(", fp);
27563 output_addr_const (fp, XVECEXP (x, 0, 0));
27564 fputc (')', fp);
27565 return TRUE;
27567 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
27569 output_addr_const (fp, XVECEXP (x, 0, 0));
27570 if (GOT_PCREL)
27571 fputs ("+.", fp);
27572 fputs ("-(", fp);
27573 output_addr_const (fp, XVECEXP (x, 0, 1));
27574 fputc (')', fp);
27575 return TRUE;
27577 else if (GET_CODE (x) == CONST_VECTOR)
27578 return arm_emit_vector_const (fp, x);
27580 return FALSE;
27583 /* Output assembly for a shift instruction.
27584 SET_FLAGS determines how the instruction modifies the condition codes.
27585 0 - Do not set condition codes.
27586 1 - Set condition codes.
27587 2 - Use smallest instruction. */
27588 const char *
27589 arm_output_shift(rtx * operands, int set_flags)
27591 char pattern[100];
27592 static const char flag_chars[3] = {'?', '.', '!'};
27593 const char *shift;
27594 HOST_WIDE_INT val;
27595 char c;
27597 c = flag_chars[set_flags];
27598 shift = shift_op(operands[3], &val);
27599 if (shift)
27601 if (val != -1)
27602 operands[2] = GEN_INT(val);
27603 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
27605 else
27606 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
27608 output_asm_insn (pattern, operands);
27609 return "";
27612 /* Output assembly for a WMMX immediate shift instruction. */
27613 const char *
27614 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
27616 int shift = INTVAL (operands[2]);
27617 char templ[50];
27618 machine_mode opmode = GET_MODE (operands[0]);
27620 gcc_assert (shift >= 0);
27622 /* If the shift value in the register versions is > 63 (for D qualifier),
27623 31 (for W qualifier) or 15 (for H qualifier). */
27624 if (((opmode == V4HImode) && (shift > 15))
27625 || ((opmode == V2SImode) && (shift > 31))
27626 || ((opmode == DImode) && (shift > 63)))
27628 if (wror_or_wsra)
27630 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27631 output_asm_insn (templ, operands);
27632 if (opmode == DImode)
27634 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
27635 output_asm_insn (templ, operands);
27638 else
27640 /* The destination register will contain all zeros. */
27641 sprintf (templ, "wzero\t%%0");
27642 output_asm_insn (templ, operands);
27644 return "";
27647 if ((opmode == DImode) && (shift > 32))
27649 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27650 output_asm_insn (templ, operands);
27651 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
27652 output_asm_insn (templ, operands);
27654 else
27656 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
27657 output_asm_insn (templ, operands);
27659 return "";
27662 /* Output assembly for a WMMX tinsr instruction. */
27663 const char *
27664 arm_output_iwmmxt_tinsr (rtx *operands)
27666 int mask = INTVAL (operands[3]);
27667 int i;
27668 char templ[50];
27669 int units = mode_nunits[GET_MODE (operands[0])];
27670 gcc_assert ((mask & (mask - 1)) == 0);
27671 for (i = 0; i < units; ++i)
27673 if ((mask & 0x01) == 1)
27675 break;
27677 mask >>= 1;
27679 gcc_assert (i < units);
27681 switch (GET_MODE (operands[0]))
27683 case E_V8QImode:
27684 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
27685 break;
27686 case E_V4HImode:
27687 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
27688 break;
27689 case E_V2SImode:
27690 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
27691 break;
27692 default:
27693 gcc_unreachable ();
27694 break;
27696 output_asm_insn (templ, operands);
27698 return "";
27701 /* Output a Thumb-1 casesi dispatch sequence. */
27702 const char *
27703 thumb1_output_casesi (rtx *operands)
27705 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
27707 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27709 switch (GET_MODE(diff_vec))
27711 case E_QImode:
27712 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27713 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
27714 case E_HImode:
27715 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27716 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
27717 case E_SImode:
27718 return "bl\t%___gnu_thumb1_case_si";
27719 default:
27720 gcc_unreachable ();
27724 /* Output a Thumb-2 casesi instruction. */
27725 const char *
27726 thumb2_output_casesi (rtx *operands)
27728 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
27730 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27732 output_asm_insn ("cmp\t%0, %1", operands);
27733 output_asm_insn ("bhi\t%l3", operands);
27734 switch (GET_MODE(diff_vec))
27736 case E_QImode:
27737 return "tbb\t[%|pc, %0]";
27738 case E_HImode:
27739 return "tbh\t[%|pc, %0, lsl #1]";
27740 case E_SImode:
27741 if (flag_pic)
27743 output_asm_insn ("adr\t%4, %l2", operands);
27744 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
27745 output_asm_insn ("add\t%4, %4, %5", operands);
27746 return "bx\t%4";
27748 else
27750 output_asm_insn ("adr\t%4, %l2", operands);
27751 return "ldr\t%|pc, [%4, %0, lsl #2]";
27753 default:
27754 gcc_unreachable ();
27758 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
27759 per-core tuning structs. */
27760 static int
27761 arm_issue_rate (void)
27763 return current_tune->issue_rate;
27766 /* Return how many instructions should scheduler lookahead to choose the
27767 best one. */
27768 static int
27769 arm_first_cycle_multipass_dfa_lookahead (void)
27771 int issue_rate = arm_issue_rate ();
27773 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
27776 /* Enable modeling of L2 auto-prefetcher. */
27777 static int
27778 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
27780 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
27783 const char *
27784 arm_mangle_type (const_tree type)
27786 /* The ARM ABI documents (10th October 2008) say that "__va_list"
27787 has to be managled as if it is in the "std" namespace. */
27788 if (TARGET_AAPCS_BASED
27789 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
27790 return "St9__va_list";
27792 /* Half-precision float. */
27793 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
27794 return "Dh";
27796 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
27797 builtin type. */
27798 if (TYPE_NAME (type) != NULL)
27799 return arm_mangle_builtin_type (type);
27801 /* Use the default mangling. */
27802 return NULL;
27805 /* Order of allocation of core registers for Thumb: this allocation is
27806 written over the corresponding initial entries of the array
27807 initialized with REG_ALLOC_ORDER. We allocate all low registers
27808 first. Saving and restoring a low register is usually cheaper than
27809 using a call-clobbered high register. */
27811 static const int thumb_core_reg_alloc_order[] =
27813 3, 2, 1, 0, 4, 5, 6, 7,
27814 12, 14, 8, 9, 10, 11
27817 /* Adjust register allocation order when compiling for Thumb. */
27819 void
27820 arm_order_regs_for_local_alloc (void)
27822 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
27823 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
27824 if (TARGET_THUMB)
27825 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
27826 sizeof (thumb_core_reg_alloc_order));
27829 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
27831 bool
27832 arm_frame_pointer_required (void)
27834 if (SUBTARGET_FRAME_POINTER_REQUIRED)
27835 return true;
27837 /* If the function receives nonlocal gotos, it needs to save the frame
27838 pointer in the nonlocal_goto_save_area object. */
27839 if (cfun->has_nonlocal_label)
27840 return true;
27842 /* The frame pointer is required for non-leaf APCS frames. */
27843 if (TARGET_ARM && TARGET_APCS_FRAME && !crtl->is_leaf)
27844 return true;
27846 /* If we are probing the stack in the prologue, we will have a faulting
27847 instruction prior to the stack adjustment and this requires a frame
27848 pointer if we want to catch the exception using the EABI unwinder. */
27849 if (!IS_INTERRUPT (arm_current_func_type ())
27850 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK
27851 && arm_except_unwind_info (&global_options) == UI_TARGET
27852 && cfun->can_throw_non_call_exceptions)
27854 HOST_WIDE_INT size = get_frame_size ();
27856 /* That's irrelevant if there is no stack adjustment. */
27857 if (size <= 0)
27858 return false;
27860 /* That's relevant only if there is a stack probe. */
27861 if (crtl->is_leaf && !cfun->calls_alloca)
27863 /* We don't have the final size of the frame so adjust. */
27864 size += 32 * UNITS_PER_WORD;
27865 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
27866 return true;
27868 else
27869 return true;
27872 return false;
27875 /* Only thumb1 can't support conditional execution, so return true if
27876 the target is not thumb1. */
27877 static bool
27878 arm_have_conditional_execution (void)
27880 return !TARGET_THUMB1;
27883 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
27884 static HOST_WIDE_INT
27885 arm_vector_alignment (const_tree type)
27887 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
27889 if (TARGET_AAPCS_BASED)
27890 align = MIN (align, 64);
27892 return align;
27895 static unsigned int
27896 arm_autovectorize_vector_sizes (void)
27898 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
27901 static bool
27902 arm_vector_alignment_reachable (const_tree type, bool is_packed)
27904 /* Vectors which aren't in packed structures will not be less aligned than
27905 the natural alignment of their element type, so this is safe. */
27906 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27907 return !is_packed;
27909 return default_builtin_vector_alignment_reachable (type, is_packed);
27912 static bool
27913 arm_builtin_support_vector_misalignment (machine_mode mode,
27914 const_tree type, int misalignment,
27915 bool is_packed)
27917 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27919 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
27921 if (is_packed)
27922 return align == 1;
27924 /* If the misalignment is unknown, we should be able to handle the access
27925 so long as it is not to a member of a packed data structure. */
27926 if (misalignment == -1)
27927 return true;
27929 /* Return true if the misalignment is a multiple of the natural alignment
27930 of the vector's element type. This is probably always going to be
27931 true in practice, since we've already established that this isn't a
27932 packed access. */
27933 return ((misalignment % align) == 0);
27936 return default_builtin_support_vector_misalignment (mode, type, misalignment,
27937 is_packed);
27940 static void
27941 arm_conditional_register_usage (void)
27943 int regno;
27945 if (TARGET_THUMB1 && optimize_size)
27947 /* When optimizing for size on Thumb-1, it's better not
27948 to use the HI regs, because of the overhead of
27949 stacking them. */
27950 for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
27951 fixed_regs[regno] = call_used_regs[regno] = 1;
27954 /* The link register can be clobbered by any branch insn,
27955 but we have no way to track that at present, so mark
27956 it as unavailable. */
27957 if (TARGET_THUMB1)
27958 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
27960 if (TARGET_32BIT && TARGET_HARD_FLOAT)
27962 /* VFPv3 registers are disabled when earlier VFP
27963 versions are selected due to the definition of
27964 LAST_VFP_REGNUM. */
27965 for (regno = FIRST_VFP_REGNUM;
27966 regno <= LAST_VFP_REGNUM; ++ regno)
27968 fixed_regs[regno] = 0;
27969 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
27970 || regno >= FIRST_VFP_REGNUM + 32;
27974 if (TARGET_REALLY_IWMMXT)
27976 regno = FIRST_IWMMXT_GR_REGNUM;
27977 /* The 2002/10/09 revision of the XScale ABI has wCG0
27978 and wCG1 as call-preserved registers. The 2002/11/21
27979 revision changed this so that all wCG registers are
27980 scratch registers. */
27981 for (regno = FIRST_IWMMXT_GR_REGNUM;
27982 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
27983 fixed_regs[regno] = 0;
27984 /* The XScale ABI has wR0 - wR9 as scratch registers,
27985 the rest as call-preserved registers. */
27986 for (regno = FIRST_IWMMXT_REGNUM;
27987 regno <= LAST_IWMMXT_REGNUM; ++ regno)
27989 fixed_regs[regno] = 0;
27990 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
27994 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
27996 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27997 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27999 else if (TARGET_APCS_STACK)
28001 fixed_regs[10] = 1;
28002 call_used_regs[10] = 1;
28004 /* -mcaller-super-interworking reserves r11 for calls to
28005 _interwork_r11_call_via_rN(). Making the register global
28006 is an easy way of ensuring that it remains valid for all
28007 calls. */
28008 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
28009 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
28011 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28012 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28013 if (TARGET_CALLER_INTERWORKING)
28014 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28016 SUBTARGET_CONDITIONAL_REGISTER_USAGE
28019 static reg_class_t
28020 arm_preferred_rename_class (reg_class_t rclass)
28022 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
28023 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
28024 and code size can be reduced. */
28025 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
28026 return LO_REGS;
28027 else
28028 return NO_REGS;
28031 /* Compute the attribute "length" of insn "*push_multi".
28032 So this function MUST be kept in sync with that insn pattern. */
28034 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
28036 int i, regno, hi_reg;
28037 int num_saves = XVECLEN (parallel_op, 0);
28039 /* ARM mode. */
28040 if (TARGET_ARM)
28041 return 4;
28042 /* Thumb1 mode. */
28043 if (TARGET_THUMB1)
28044 return 2;
28046 /* Thumb2 mode. */
28047 regno = REGNO (first_op);
28048 /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
28049 list is 8-bit. Normally this means all registers in the list must be
28050 LO_REGS, that is (R0 -R7). If any HI_REGS used, then we must use 32-bit
28051 encodings. There is one exception for PUSH that LR in HI_REGS can be used
28052 with 16-bit encoding. */
28053 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28054 for (i = 1; i < num_saves && !hi_reg; i++)
28056 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
28057 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28060 if (!hi_reg)
28061 return 2;
28062 return 4;
28065 /* Compute the attribute "length" of insn. Currently, this function is used
28066 for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
28067 "*pop_multiple_with_writeback_and_return". OPERANDS is the toplevel PARALLEL
28068 rtx, RETURN_PC is true if OPERANDS contains return insn. WRITE_BACK_P is
28069 true if OPERANDS contains insn which explicit updates base register. */
28072 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
28074 /* ARM mode. */
28075 if (TARGET_ARM)
28076 return 4;
28077 /* Thumb1 mode. */
28078 if (TARGET_THUMB1)
28079 return 2;
28081 rtx parallel_op = operands[0];
28082 /* Initialize to elements number of PARALLEL. */
28083 unsigned indx = XVECLEN (parallel_op, 0) - 1;
28084 /* Initialize the value to base register. */
28085 unsigned regno = REGNO (operands[1]);
28086 /* Skip return and write back pattern.
28087 We only need register pop pattern for later analysis. */
28088 unsigned first_indx = 0;
28089 first_indx += return_pc ? 1 : 0;
28090 first_indx += write_back_p ? 1 : 0;
28092 /* A pop operation can be done through LDM or POP. If the base register is SP
28093 and if it's with write back, then a LDM will be alias of POP. */
28094 bool pop_p = (regno == SP_REGNUM && write_back_p);
28095 bool ldm_p = !pop_p;
28097 /* Check base register for LDM. */
28098 if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
28099 return 4;
28101 /* Check each register in the list. */
28102 for (; indx >= first_indx; indx--)
28104 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
28105 /* For POP, PC in HI_REGS can be used with 16-bit encoding. See similar
28106 comment in arm_attr_length_push_multi. */
28107 if (REGNO_REG_CLASS (regno) == HI_REGS
28108 && (regno != PC_REGNUM || ldm_p))
28109 return 4;
28112 return 2;
28115 /* Compute the number of instructions emitted by output_move_double. */
28117 arm_count_output_move_double_insns (rtx *operands)
28119 int count;
28120 rtx ops[2];
28121 /* output_move_double may modify the operands array, so call it
28122 here on a copy of the array. */
28123 ops[0] = operands[0];
28124 ops[1] = operands[1];
28125 output_move_double (ops, false, &count);
28126 return count;
28130 vfp3_const_double_for_fract_bits (rtx operand)
28132 REAL_VALUE_TYPE r0;
28134 if (!CONST_DOUBLE_P (operand))
28135 return 0;
28137 r0 = *CONST_DOUBLE_REAL_VALUE (operand);
28138 if (exact_real_inverse (DFmode, &r0)
28139 && !REAL_VALUE_NEGATIVE (r0))
28141 if (exact_real_truncate (DFmode, &r0))
28143 HOST_WIDE_INT value = real_to_integer (&r0);
28144 value = value & 0xffffffff;
28145 if ((value != 0) && ( (value & (value - 1)) == 0))
28147 int ret = exact_log2 (value);
28148 gcc_assert (IN_RANGE (ret, 0, 31));
28149 return ret;
28153 return 0;
28156 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
28157 log2 is in [1, 32], return that log2. Otherwise return -1.
28158 This is used in the patterns for vcvt.s32.f32 floating-point to
28159 fixed-point conversions. */
28162 vfp3_const_double_for_bits (rtx x)
28164 const REAL_VALUE_TYPE *r;
28166 if (!CONST_DOUBLE_P (x))
28167 return -1;
28169 r = CONST_DOUBLE_REAL_VALUE (x);
28171 if (REAL_VALUE_NEGATIVE (*r)
28172 || REAL_VALUE_ISNAN (*r)
28173 || REAL_VALUE_ISINF (*r)
28174 || !real_isinteger (r, SFmode))
28175 return -1;
28177 HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
28179 /* The exact_log2 above will have returned -1 if this is
28180 not an exact log2. */
28181 if (!IN_RANGE (hwint, 1, 32))
28182 return -1;
28184 return hwint;
28188 /* Emit a memory barrier around an atomic sequence according to MODEL. */
28190 static void
28191 arm_pre_atomic_barrier (enum memmodel model)
28193 if (need_atomic_barrier_p (model, true))
28194 emit_insn (gen_memory_barrier ());
28197 static void
28198 arm_post_atomic_barrier (enum memmodel model)
28200 if (need_atomic_barrier_p (model, false))
28201 emit_insn (gen_memory_barrier ());
28204 /* Emit the load-exclusive and store-exclusive instructions.
28205 Use acquire and release versions if necessary. */
28207 static void
28208 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
28210 rtx (*gen) (rtx, rtx);
28212 if (acq)
28214 switch (mode)
28216 case E_QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
28217 case E_HImode: gen = gen_arm_load_acquire_exclusivehi; break;
28218 case E_SImode: gen = gen_arm_load_acquire_exclusivesi; break;
28219 case E_DImode: gen = gen_arm_load_acquire_exclusivedi; break;
28220 default:
28221 gcc_unreachable ();
28224 else
28226 switch (mode)
28228 case E_QImode: gen = gen_arm_load_exclusiveqi; break;
28229 case E_HImode: gen = gen_arm_load_exclusivehi; break;
28230 case E_SImode: gen = gen_arm_load_exclusivesi; break;
28231 case E_DImode: gen = gen_arm_load_exclusivedi; break;
28232 default:
28233 gcc_unreachable ();
28237 emit_insn (gen (rval, mem));
28240 static void
28241 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
28242 rtx mem, bool rel)
28244 rtx (*gen) (rtx, rtx, rtx);
28246 if (rel)
28248 switch (mode)
28250 case E_QImode: gen = gen_arm_store_release_exclusiveqi; break;
28251 case E_HImode: gen = gen_arm_store_release_exclusivehi; break;
28252 case E_SImode: gen = gen_arm_store_release_exclusivesi; break;
28253 case E_DImode: gen = gen_arm_store_release_exclusivedi; break;
28254 default:
28255 gcc_unreachable ();
28258 else
28260 switch (mode)
28262 case E_QImode: gen = gen_arm_store_exclusiveqi; break;
28263 case E_HImode: gen = gen_arm_store_exclusivehi; break;
28264 case E_SImode: gen = gen_arm_store_exclusivesi; break;
28265 case E_DImode: gen = gen_arm_store_exclusivedi; break;
28266 default:
28267 gcc_unreachable ();
28271 emit_insn (gen (bval, rval, mem));
28274 /* Mark the previous jump instruction as unlikely. */
28276 static void
28277 emit_unlikely_jump (rtx insn)
28279 rtx_insn *jump = emit_jump_insn (insn);
28280 add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
28283 /* Expand a compare and swap pattern. */
28285 void
28286 arm_expand_compare_and_swap (rtx operands[])
28288 rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
28289 machine_mode mode;
28290 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx);
28292 bval = operands[0];
28293 rval = operands[1];
28294 mem = operands[2];
28295 oldval = operands[3];
28296 newval = operands[4];
28297 is_weak = operands[5];
28298 mod_s = operands[6];
28299 mod_f = operands[7];
28300 mode = GET_MODE (mem);
28302 /* Normally the succ memory model must be stronger than fail, but in the
28303 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
28304 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
28306 if (TARGET_HAVE_LDACQ
28307 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
28308 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
28309 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
28311 switch (mode)
28313 case E_QImode:
28314 case E_HImode:
28315 /* For narrow modes, we're going to perform the comparison in SImode,
28316 so do the zero-extension now. */
28317 rval = gen_reg_rtx (SImode);
28318 oldval = convert_modes (SImode, mode, oldval, true);
28319 /* FALLTHRU */
28321 case E_SImode:
28322 /* Force the value into a register if needed. We waited until after
28323 the zero-extension above to do this properly. */
28324 if (!arm_add_operand (oldval, SImode))
28325 oldval = force_reg (SImode, oldval);
28326 break;
28328 case E_DImode:
28329 if (!cmpdi_operand (oldval, mode))
28330 oldval = force_reg (mode, oldval);
28331 break;
28333 default:
28334 gcc_unreachable ();
28337 if (TARGET_THUMB1)
28339 switch (mode)
28341 case E_QImode: gen = gen_atomic_compare_and_swapt1qi_1; break;
28342 case E_HImode: gen = gen_atomic_compare_and_swapt1hi_1; break;
28343 case E_SImode: gen = gen_atomic_compare_and_swapt1si_1; break;
28344 case E_DImode: gen = gen_atomic_compare_and_swapt1di_1; break;
28345 default:
28346 gcc_unreachable ();
28349 else
28351 switch (mode)
28353 case E_QImode: gen = gen_atomic_compare_and_swap32qi_1; break;
28354 case E_HImode: gen = gen_atomic_compare_and_swap32hi_1; break;
28355 case E_SImode: gen = gen_atomic_compare_and_swap32si_1; break;
28356 case E_DImode: gen = gen_atomic_compare_and_swap32di_1; break;
28357 default:
28358 gcc_unreachable ();
28362 bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CC_Zmode, CC_REGNUM);
28363 emit_insn (gen (bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f));
28365 if (mode == QImode || mode == HImode)
28366 emit_move_insn (operands[1], gen_lowpart (mode, rval));
28368 /* In all cases, we arrange for success to be signaled by Z set.
28369 This arrangement allows for the boolean result to be used directly
28370 in a subsequent branch, post optimization. For Thumb-1 targets, the
28371 boolean negation of the result is also stored in bval because Thumb-1
28372 backend lacks dependency tracking for CC flag due to flag-setting not
28373 being represented at RTL level. */
28374 if (TARGET_THUMB1)
28375 emit_insn (gen_cstoresi_eq0_thumb1 (bval, bdst));
28376 else
28378 x = gen_rtx_EQ (SImode, bdst, const0_rtx);
28379 emit_insn (gen_rtx_SET (bval, x));
28383 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
28384 another memory store between the load-exclusive and store-exclusive can
28385 reset the monitor from Exclusive to Open state. This means we must wait
28386 until after reload to split the pattern, lest we get a register spill in
28387 the middle of the atomic sequence. Success of the compare and swap is
28388 indicated by the Z flag set for 32bit targets and by neg_bval being zero
28389 for Thumb-1 targets (ie. negation of the boolean value returned by
28390 atomic_compare_and_swapmode standard pattern in operand 0). */
28392 void
28393 arm_split_compare_and_swap (rtx operands[])
28395 rtx rval, mem, oldval, newval, neg_bval;
28396 machine_mode mode;
28397 enum memmodel mod_s, mod_f;
28398 bool is_weak;
28399 rtx_code_label *label1, *label2;
28400 rtx x, cond;
28402 rval = operands[1];
28403 mem = operands[2];
28404 oldval = operands[3];
28405 newval = operands[4];
28406 is_weak = (operands[5] != const0_rtx);
28407 mod_s = memmodel_from_int (INTVAL (operands[6]));
28408 mod_f = memmodel_from_int (INTVAL (operands[7]));
28409 neg_bval = TARGET_THUMB1 ? operands[0] : operands[8];
28410 mode = GET_MODE (mem);
28412 bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
28414 bool use_acquire = TARGET_HAVE_LDACQ
28415 && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28416 || is_mm_release (mod_s));
28418 bool use_release = TARGET_HAVE_LDACQ
28419 && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28420 || is_mm_acquire (mod_s));
28422 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
28423 a full barrier is emitted after the store-release. */
28424 if (is_armv8_sync)
28425 use_acquire = false;
28427 /* Checks whether a barrier is needed and emits one accordingly. */
28428 if (!(use_acquire || use_release))
28429 arm_pre_atomic_barrier (mod_s);
28431 label1 = NULL;
28432 if (!is_weak)
28434 label1 = gen_label_rtx ();
28435 emit_label (label1);
28437 label2 = gen_label_rtx ();
28439 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
28441 /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
28442 as required to communicate with arm_expand_compare_and_swap. */
28443 if (TARGET_32BIT)
28445 cond = arm_gen_compare_reg (NE, rval, oldval, neg_bval);
28446 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28447 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
28448 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
28449 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
28451 else
28453 emit_move_insn (neg_bval, const1_rtx);
28454 cond = gen_rtx_NE (VOIDmode, rval, oldval);
28455 if (thumb1_cmpneg_operand (oldval, SImode))
28456 emit_unlikely_jump (gen_cbranchsi4_scratch (neg_bval, rval, oldval,
28457 label2, cond));
28458 else
28459 emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2));
28462 arm_emit_store_exclusive (mode, neg_bval, mem, newval, use_release);
28464 /* Weak or strong, we want EQ to be true for success, so that we
28465 match the flags that we got from the compare above. */
28466 if (TARGET_32BIT)
28468 cond = gen_rtx_REG (CCmode, CC_REGNUM);
28469 x = gen_rtx_COMPARE (CCmode, neg_bval, const0_rtx);
28470 emit_insn (gen_rtx_SET (cond, x));
28473 if (!is_weak)
28475 /* Z is set to boolean value of !neg_bval, as required to communicate
28476 with arm_expand_compare_and_swap. */
28477 x = gen_rtx_NE (VOIDmode, neg_bval, const0_rtx);
28478 emit_unlikely_jump (gen_cbranchsi4 (x, neg_bval, const0_rtx, label1));
28481 if (!is_mm_relaxed (mod_f))
28482 emit_label (label2);
28484 /* Checks whether a barrier is needed and emits one accordingly. */
28485 if (is_armv8_sync
28486 || !(use_acquire || use_release))
28487 arm_post_atomic_barrier (mod_s);
28489 if (is_mm_relaxed (mod_f))
28490 emit_label (label2);
28493 /* Split an atomic operation pattern. Operation is given by CODE and is one
28494 of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
28495 operation). Operation is performed on the content at MEM and on VALUE
28496 following the memory model MODEL_RTX. The content at MEM before and after
28497 the operation is returned in OLD_OUT and NEW_OUT respectively while the
28498 success of the operation is returned in COND. Using a scratch register or
28499 an operand register for these determines what result is returned for that
28500 pattern. */
28502 void
28503 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
28504 rtx value, rtx model_rtx, rtx cond)
28506 enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
28507 machine_mode mode = GET_MODE (mem);
28508 machine_mode wmode = (mode == DImode ? DImode : SImode);
28509 rtx_code_label *label;
28510 bool all_low_regs, bind_old_new;
28511 rtx x;
28513 bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
28515 bool use_acquire = TARGET_HAVE_LDACQ
28516 && !(is_mm_relaxed (model) || is_mm_consume (model)
28517 || is_mm_release (model));
28519 bool use_release = TARGET_HAVE_LDACQ
28520 && !(is_mm_relaxed (model) || is_mm_consume (model)
28521 || is_mm_acquire (model));
28523 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
28524 a full barrier is emitted after the store-release. */
28525 if (is_armv8_sync)
28526 use_acquire = false;
28528 /* Checks whether a barrier is needed and emits one accordingly. */
28529 if (!(use_acquire || use_release))
28530 arm_pre_atomic_barrier (model);
28532 label = gen_label_rtx ();
28533 emit_label (label);
28535 if (new_out)
28536 new_out = gen_lowpart (wmode, new_out);
28537 if (old_out)
28538 old_out = gen_lowpart (wmode, old_out);
28539 else
28540 old_out = new_out;
28541 value = simplify_gen_subreg (wmode, value, mode, 0);
28543 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
28545 /* Does the operation require destination and first operand to use the same
28546 register? This is decided by register constraints of relevant insn
28547 patterns in thumb1.md. */
28548 gcc_assert (!new_out || REG_P (new_out));
28549 all_low_regs = REG_P (value) && REGNO_REG_CLASS (REGNO (value)) == LO_REGS
28550 && new_out && REGNO_REG_CLASS (REGNO (new_out)) == LO_REGS
28551 && REGNO_REG_CLASS (REGNO (old_out)) == LO_REGS;
28552 bind_old_new =
28553 (TARGET_THUMB1
28554 && code != SET
28555 && code != MINUS
28556 && (code != PLUS || (!all_low_regs && !satisfies_constraint_L (value))));
28558 /* We want to return the old value while putting the result of the operation
28559 in the same register as the old value so copy the old value over to the
28560 destination register and use that register for the operation. */
28561 if (old_out && bind_old_new)
28563 emit_move_insn (new_out, old_out);
28564 old_out = new_out;
28567 switch (code)
28569 case SET:
28570 new_out = value;
28571 break;
28573 case NOT:
28574 x = gen_rtx_AND (wmode, old_out, value);
28575 emit_insn (gen_rtx_SET (new_out, x));
28576 x = gen_rtx_NOT (wmode, new_out);
28577 emit_insn (gen_rtx_SET (new_out, x));
28578 break;
28580 case MINUS:
28581 if (CONST_INT_P (value))
28583 value = GEN_INT (-INTVAL (value));
28584 code = PLUS;
28586 /* FALLTHRU */
28588 case PLUS:
28589 if (mode == DImode)
28591 /* DImode plus/minus need to clobber flags. */
28592 /* The adddi3 and subdi3 patterns are incorrectly written so that
28593 they require matching operands, even when we could easily support
28594 three operands. Thankfully, this can be fixed up post-splitting,
28595 as the individual add+adc patterns do accept three operands and
28596 post-reload cprop can make these moves go away. */
28597 emit_move_insn (new_out, old_out);
28598 if (code == PLUS)
28599 x = gen_adddi3 (new_out, new_out, value);
28600 else
28601 x = gen_subdi3 (new_out, new_out, value);
28602 emit_insn (x);
28603 break;
28605 /* FALLTHRU */
28607 default:
28608 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
28609 emit_insn (gen_rtx_SET (new_out, x));
28610 break;
28613 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
28614 use_release);
28616 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28617 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
28619 /* Checks whether a barrier is needed and emits one accordingly. */
28620 if (is_armv8_sync
28621 || !(use_acquire || use_release))
28622 arm_post_atomic_barrier (model);
28625 #define MAX_VECT_LEN 16
28627 struct expand_vec_perm_d
28629 rtx target, op0, op1;
28630 unsigned char perm[MAX_VECT_LEN];
28631 machine_mode vmode;
28632 unsigned char nelt;
28633 bool one_vector_p;
28634 bool testing_p;
28637 /* Generate a variable permutation. */
28639 static void
28640 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
28642 machine_mode vmode = GET_MODE (target);
28643 bool one_vector_p = rtx_equal_p (op0, op1);
28645 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
28646 gcc_checking_assert (GET_MODE (op0) == vmode);
28647 gcc_checking_assert (GET_MODE (op1) == vmode);
28648 gcc_checking_assert (GET_MODE (sel) == vmode);
28649 gcc_checking_assert (TARGET_NEON);
28651 if (one_vector_p)
28653 if (vmode == V8QImode)
28654 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
28655 else
28656 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
28658 else
28660 rtx pair;
28662 if (vmode == V8QImode)
28664 pair = gen_reg_rtx (V16QImode);
28665 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
28666 pair = gen_lowpart (TImode, pair);
28667 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
28669 else
28671 pair = gen_reg_rtx (OImode);
28672 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
28673 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
28678 void
28679 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
28681 machine_mode vmode = GET_MODE (target);
28682 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
28683 bool one_vector_p = rtx_equal_p (op0, op1);
28684 rtx rmask[MAX_VECT_LEN], mask;
28686 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28687 numbering of elements for big-endian, we must reverse the order. */
28688 gcc_checking_assert (!BYTES_BIG_ENDIAN);
28690 /* The VTBL instruction does not use a modulo index, so we must take care
28691 of that ourselves. */
28692 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
28693 for (i = 0; i < nelt; ++i)
28694 rmask[i] = mask;
28695 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
28696 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
28698 arm_expand_vec_perm_1 (target, op0, op1, sel);
28701 /* Map lane ordering between architectural lane order, and GCC lane order,
28702 taking into account ABI. See comment above output_move_neon for details. */
28704 static int
28705 neon_endian_lane_map (machine_mode mode, int lane)
28707 if (BYTES_BIG_ENDIAN)
28709 int nelems = GET_MODE_NUNITS (mode);
28710 /* Reverse lane order. */
28711 lane = (nelems - 1 - lane);
28712 /* Reverse D register order, to match ABI. */
28713 if (GET_MODE_SIZE (mode) == 16)
28714 lane = lane ^ (nelems / 2);
28716 return lane;
28719 /* Some permutations index into pairs of vectors, this is a helper function
28720 to map indexes into those pairs of vectors. */
28722 static int
28723 neon_pair_endian_lane_map (machine_mode mode, int lane)
28725 int nelem = GET_MODE_NUNITS (mode);
28726 if (BYTES_BIG_ENDIAN)
28727 lane =
28728 neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
28729 return lane;
28732 /* Generate or test for an insn that supports a constant permutation. */
28734 /* Recognize patterns for the VUZP insns. */
28736 static bool
28737 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
28739 unsigned int i, odd, mask, nelt = d->nelt;
28740 rtx out0, out1, in0, in1;
28741 rtx (*gen)(rtx, rtx, rtx, rtx);
28742 int first_elem;
28743 int swap_nelt;
28745 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28746 return false;
28748 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
28749 big endian pattern on 64 bit vectors, so we correct for that. */
28750 swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
28751 && GET_MODE_SIZE (d->vmode) == 8 ? d->nelt : 0;
28753 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
28755 if (first_elem == neon_endian_lane_map (d->vmode, 0))
28756 odd = 0;
28757 else if (first_elem == neon_endian_lane_map (d->vmode, 1))
28758 odd = 1;
28759 else
28760 return false;
28761 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28763 for (i = 0; i < nelt; i++)
28765 unsigned elt =
28766 (neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
28767 if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
28768 return false;
28771 /* Success! */
28772 if (d->testing_p)
28773 return true;
28775 switch (d->vmode)
28777 case E_V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
28778 case E_V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
28779 case E_V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
28780 case E_V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
28781 case E_V8HFmode: gen = gen_neon_vuzpv8hf_internal; break;
28782 case E_V4HFmode: gen = gen_neon_vuzpv4hf_internal; break;
28783 case E_V4SImode: gen = gen_neon_vuzpv4si_internal; break;
28784 case E_V2SImode: gen = gen_neon_vuzpv2si_internal; break;
28785 case E_V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
28786 case E_V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
28787 default:
28788 gcc_unreachable ();
28791 in0 = d->op0;
28792 in1 = d->op1;
28793 if (swap_nelt != 0)
28794 std::swap (in0, in1);
28796 out0 = d->target;
28797 out1 = gen_reg_rtx (d->vmode);
28798 if (odd)
28799 std::swap (out0, out1);
28801 emit_insn (gen (out0, in0, in1, out1));
28802 return true;
28805 /* Recognize patterns for the VZIP insns. */
28807 static bool
28808 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
28810 unsigned int i, high, mask, nelt = d->nelt;
28811 rtx out0, out1, in0, in1;
28812 rtx (*gen)(rtx, rtx, rtx, rtx);
28813 int first_elem;
28814 bool is_swapped;
28816 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28817 return false;
28819 is_swapped = BYTES_BIG_ENDIAN;
28821 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
28823 high = nelt / 2;
28824 if (first_elem == neon_endian_lane_map (d->vmode, high))
28826 else if (first_elem == neon_endian_lane_map (d->vmode, 0))
28827 high = 0;
28828 else
28829 return false;
28830 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28832 for (i = 0; i < nelt / 2; i++)
28834 unsigned elt =
28835 neon_pair_endian_lane_map (d->vmode, i + high) & mask;
28836 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
28837 != elt)
28838 return false;
28839 elt =
28840 neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
28841 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
28842 != elt)
28843 return false;
28846 /* Success! */
28847 if (d->testing_p)
28848 return true;
28850 switch (d->vmode)
28852 case E_V16QImode: gen = gen_neon_vzipv16qi_internal; break;
28853 case E_V8QImode: gen = gen_neon_vzipv8qi_internal; break;
28854 case E_V8HImode: gen = gen_neon_vzipv8hi_internal; break;
28855 case E_V4HImode: gen = gen_neon_vzipv4hi_internal; break;
28856 case E_V8HFmode: gen = gen_neon_vzipv8hf_internal; break;
28857 case E_V4HFmode: gen = gen_neon_vzipv4hf_internal; break;
28858 case E_V4SImode: gen = gen_neon_vzipv4si_internal; break;
28859 case E_V2SImode: gen = gen_neon_vzipv2si_internal; break;
28860 case E_V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
28861 case E_V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
28862 default:
28863 gcc_unreachable ();
28866 in0 = d->op0;
28867 in1 = d->op1;
28868 if (is_swapped)
28869 std::swap (in0, in1);
28871 out0 = d->target;
28872 out1 = gen_reg_rtx (d->vmode);
28873 if (high)
28874 std::swap (out0, out1);
28876 emit_insn (gen (out0, in0, in1, out1));
28877 return true;
28880 /* Recognize patterns for the VREV insns. */
28882 static bool
28883 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
28885 unsigned int i, j, diff, nelt = d->nelt;
28886 rtx (*gen)(rtx, rtx);
28888 if (!d->one_vector_p)
28889 return false;
28891 diff = d->perm[0];
28892 switch (diff)
28894 case 7:
28895 switch (d->vmode)
28897 case E_V16QImode: gen = gen_neon_vrev64v16qi; break;
28898 case E_V8QImode: gen = gen_neon_vrev64v8qi; break;
28899 default:
28900 return false;
28902 break;
28903 case 3:
28904 switch (d->vmode)
28906 case E_V16QImode: gen = gen_neon_vrev32v16qi; break;
28907 case E_V8QImode: gen = gen_neon_vrev32v8qi; break;
28908 case E_V8HImode: gen = gen_neon_vrev64v8hi; break;
28909 case E_V4HImode: gen = gen_neon_vrev64v4hi; break;
28910 case E_V8HFmode: gen = gen_neon_vrev64v8hf; break;
28911 case E_V4HFmode: gen = gen_neon_vrev64v4hf; break;
28912 default:
28913 return false;
28915 break;
28916 case 1:
28917 switch (d->vmode)
28919 case E_V16QImode: gen = gen_neon_vrev16v16qi; break;
28920 case E_V8QImode: gen = gen_neon_vrev16v8qi; break;
28921 case E_V8HImode: gen = gen_neon_vrev32v8hi; break;
28922 case E_V4HImode: gen = gen_neon_vrev32v4hi; break;
28923 case E_V4SImode: gen = gen_neon_vrev64v4si; break;
28924 case E_V2SImode: gen = gen_neon_vrev64v2si; break;
28925 case E_V4SFmode: gen = gen_neon_vrev64v4sf; break;
28926 case E_V2SFmode: gen = gen_neon_vrev64v2sf; break;
28927 default:
28928 return false;
28930 break;
28931 default:
28932 return false;
28935 for (i = 0; i < nelt ; i += diff + 1)
28936 for (j = 0; j <= diff; j += 1)
28938 /* This is guaranteed to be true as the value of diff
28939 is 7, 3, 1 and we should have enough elements in the
28940 queue to generate this. Getting a vector mask with a
28941 value of diff other than these values implies that
28942 something is wrong by the time we get here. */
28943 gcc_assert (i + j < nelt);
28944 if (d->perm[i + j] != i + diff - j)
28945 return false;
28948 /* Success! */
28949 if (d->testing_p)
28950 return true;
28952 emit_insn (gen (d->target, d->op0));
28953 return true;
28956 /* Recognize patterns for the VTRN insns. */
28958 static bool
28959 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
28961 unsigned int i, odd, mask, nelt = d->nelt;
28962 rtx out0, out1, in0, in1;
28963 rtx (*gen)(rtx, rtx, rtx, rtx);
28965 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28966 return false;
28968 /* Note that these are little-endian tests. Adjust for big-endian later. */
28969 if (d->perm[0] == 0)
28970 odd = 0;
28971 else if (d->perm[0] == 1)
28972 odd = 1;
28973 else
28974 return false;
28975 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28977 for (i = 0; i < nelt; i += 2)
28979 if (d->perm[i] != i + odd)
28980 return false;
28981 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
28982 return false;
28985 /* Success! */
28986 if (d->testing_p)
28987 return true;
28989 switch (d->vmode)
28991 case E_V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
28992 case E_V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
28993 case E_V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
28994 case E_V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
28995 case E_V8HFmode: gen = gen_neon_vtrnv8hf_internal; break;
28996 case E_V4HFmode: gen = gen_neon_vtrnv4hf_internal; break;
28997 case E_V4SImode: gen = gen_neon_vtrnv4si_internal; break;
28998 case E_V2SImode: gen = gen_neon_vtrnv2si_internal; break;
28999 case E_V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
29000 case E_V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
29001 default:
29002 gcc_unreachable ();
29005 in0 = d->op0;
29006 in1 = d->op1;
29007 if (BYTES_BIG_ENDIAN)
29009 std::swap (in0, in1);
29010 odd = !odd;
29013 out0 = d->target;
29014 out1 = gen_reg_rtx (d->vmode);
29015 if (odd)
29016 std::swap (out0, out1);
29018 emit_insn (gen (out0, in0, in1, out1));
29019 return true;
29022 /* Recognize patterns for the VEXT insns. */
29024 static bool
29025 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
29027 unsigned int i, nelt = d->nelt;
29028 rtx (*gen) (rtx, rtx, rtx, rtx);
29029 rtx offset;
29031 unsigned int location;
29033 unsigned int next = d->perm[0] + 1;
29035 /* TODO: Handle GCC's numbering of elements for big-endian. */
29036 if (BYTES_BIG_ENDIAN)
29037 return false;
29039 /* Check if the extracted indexes are increasing by one. */
29040 for (i = 1; i < nelt; next++, i++)
29042 /* If we hit the most significant element of the 2nd vector in
29043 the previous iteration, no need to test further. */
29044 if (next == 2 * nelt)
29045 return false;
29047 /* If we are operating on only one vector: it could be a
29048 rotation. If there are only two elements of size < 64, let
29049 arm_evpc_neon_vrev catch it. */
29050 if (d->one_vector_p && (next == nelt))
29052 if ((nelt == 2) && (d->vmode != V2DImode))
29053 return false;
29054 else
29055 next = 0;
29058 if (d->perm[i] != next)
29059 return false;
29062 location = d->perm[0];
29064 switch (d->vmode)
29066 case E_V16QImode: gen = gen_neon_vextv16qi; break;
29067 case E_V8QImode: gen = gen_neon_vextv8qi; break;
29068 case E_V4HImode: gen = gen_neon_vextv4hi; break;
29069 case E_V8HImode: gen = gen_neon_vextv8hi; break;
29070 case E_V2SImode: gen = gen_neon_vextv2si; break;
29071 case E_V4SImode: gen = gen_neon_vextv4si; break;
29072 case E_V4HFmode: gen = gen_neon_vextv4hf; break;
29073 case E_V8HFmode: gen = gen_neon_vextv8hf; break;
29074 case E_V2SFmode: gen = gen_neon_vextv2sf; break;
29075 case E_V4SFmode: gen = gen_neon_vextv4sf; break;
29076 case E_V2DImode: gen = gen_neon_vextv2di; break;
29077 default:
29078 return false;
29081 /* Success! */
29082 if (d->testing_p)
29083 return true;
29085 offset = GEN_INT (location);
29086 emit_insn (gen (d->target, d->op0, d->op1, offset));
29087 return true;
29090 /* The NEON VTBL instruction is a fully variable permuation that's even
29091 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
29092 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
29093 can do slightly better by expanding this as a constant where we don't
29094 have to apply a mask. */
29096 static bool
29097 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
29099 rtx rperm[MAX_VECT_LEN], sel;
29100 machine_mode vmode = d->vmode;
29101 unsigned int i, nelt = d->nelt;
29103 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
29104 numbering of elements for big-endian, we must reverse the order. */
29105 if (BYTES_BIG_ENDIAN)
29106 return false;
29108 if (d->testing_p)
29109 return true;
29111 /* Generic code will try constant permutation twice. Once with the
29112 original mode and again with the elements lowered to QImode.
29113 So wait and don't do the selector expansion ourselves. */
29114 if (vmode != V8QImode && vmode != V16QImode)
29115 return false;
29117 for (i = 0; i < nelt; ++i)
29118 rperm[i] = GEN_INT (d->perm[i]);
29119 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
29120 sel = force_reg (vmode, sel);
29122 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
29123 return true;
29126 static bool
29127 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
29129 /* Check if the input mask matches vext before reordering the
29130 operands. */
29131 if (TARGET_NEON)
29132 if (arm_evpc_neon_vext (d))
29133 return true;
29135 /* The pattern matching functions above are written to look for a small
29136 number to begin the sequence (0, 1, N/2). If we begin with an index
29137 from the second operand, we can swap the operands. */
29138 if (d->perm[0] >= d->nelt)
29140 unsigned i, nelt = d->nelt;
29142 for (i = 0; i < nelt; ++i)
29143 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
29145 std::swap (d->op0, d->op1);
29148 if (TARGET_NEON)
29150 if (arm_evpc_neon_vuzp (d))
29151 return true;
29152 if (arm_evpc_neon_vzip (d))
29153 return true;
29154 if (arm_evpc_neon_vrev (d))
29155 return true;
29156 if (arm_evpc_neon_vtrn (d))
29157 return true;
29158 return arm_evpc_neon_vtbl (d);
29160 return false;
29163 /* Expand a vec_perm_const pattern. */
29165 bool
29166 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
29168 struct expand_vec_perm_d d;
29169 int i, nelt, which;
29171 d.target = target;
29172 d.op0 = op0;
29173 d.op1 = op1;
29175 d.vmode = GET_MODE (target);
29176 gcc_assert (VECTOR_MODE_P (d.vmode));
29177 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
29178 d.testing_p = false;
29180 for (i = which = 0; i < nelt; ++i)
29182 rtx e = XVECEXP (sel, 0, i);
29183 int ei = INTVAL (e) & (2 * nelt - 1);
29184 which |= (ei < nelt ? 1 : 2);
29185 d.perm[i] = ei;
29188 switch (which)
29190 default:
29191 gcc_unreachable();
29193 case 3:
29194 d.one_vector_p = false;
29195 if (!rtx_equal_p (op0, op1))
29196 break;
29198 /* The elements of PERM do not suggest that only the first operand
29199 is used, but both operands are identical. Allow easier matching
29200 of the permutation by folding the permutation into the single
29201 input vector. */
29202 /* FALLTHRU */
29203 case 2:
29204 for (i = 0; i < nelt; ++i)
29205 d.perm[i] &= nelt - 1;
29206 d.op0 = op1;
29207 d.one_vector_p = true;
29208 break;
29210 case 1:
29211 d.op1 = op0;
29212 d.one_vector_p = true;
29213 break;
29216 return arm_expand_vec_perm_const_1 (&d);
29219 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
29221 static bool
29222 arm_vectorize_vec_perm_const_ok (machine_mode vmode,
29223 const unsigned char *sel)
29225 struct expand_vec_perm_d d;
29226 unsigned int i, nelt, which;
29227 bool ret;
29229 d.vmode = vmode;
29230 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
29231 d.testing_p = true;
29232 memcpy (d.perm, sel, nelt);
29234 /* Categorize the set of elements in the selector. */
29235 for (i = which = 0; i < nelt; ++i)
29237 unsigned char e = d.perm[i];
29238 gcc_assert (e < 2 * nelt);
29239 which |= (e < nelt ? 1 : 2);
29242 /* For all elements from second vector, fold the elements to first. */
29243 if (which == 2)
29244 for (i = 0; i < nelt; ++i)
29245 d.perm[i] -= nelt;
29247 /* Check whether the mask can be applied to the vector type. */
29248 d.one_vector_p = (which != 3);
29250 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
29251 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
29252 if (!d.one_vector_p)
29253 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
29255 start_sequence ();
29256 ret = arm_expand_vec_perm_const_1 (&d);
29257 end_sequence ();
29259 return ret;
29262 bool
29263 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
29265 /* If we are soft float and we do not have ldrd
29266 then all auto increment forms are ok. */
29267 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
29268 return true;
29270 switch (code)
29272 /* Post increment and Pre Decrement are supported for all
29273 instruction forms except for vector forms. */
29274 case ARM_POST_INC:
29275 case ARM_PRE_DEC:
29276 if (VECTOR_MODE_P (mode))
29278 if (code != ARM_PRE_DEC)
29279 return true;
29280 else
29281 return false;
29284 return true;
29286 case ARM_POST_DEC:
29287 case ARM_PRE_INC:
29288 /* Without LDRD and mode size greater than
29289 word size, there is no point in auto-incrementing
29290 because ldm and stm will not have these forms. */
29291 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
29292 return false;
29294 /* Vector and floating point modes do not support
29295 these auto increment forms. */
29296 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
29297 return false;
29299 return true;
29301 default:
29302 return false;
29306 return false;
29309 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
29310 on ARM, since we know that shifts by negative amounts are no-ops.
29311 Additionally, the default expansion code is not available or suitable
29312 for post-reload insn splits (this can occur when the register allocator
29313 chooses not to do a shift in NEON).
29315 This function is used in both initial expand and post-reload splits, and
29316 handles all kinds of 64-bit shifts.
29318 Input requirements:
29319 - It is safe for the input and output to be the same register, but
29320 early-clobber rules apply for the shift amount and scratch registers.
29321 - Shift by register requires both scratch registers. In all other cases
29322 the scratch registers may be NULL.
29323 - Ashiftrt by a register also clobbers the CC register. */
29324 void
29325 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
29326 rtx amount, rtx scratch1, rtx scratch2)
29328 rtx out_high = gen_highpart (SImode, out);
29329 rtx out_low = gen_lowpart (SImode, out);
29330 rtx in_high = gen_highpart (SImode, in);
29331 rtx in_low = gen_lowpart (SImode, in);
29333 /* Terminology:
29334 in = the register pair containing the input value.
29335 out = the destination register pair.
29336 up = the high- or low-part of each pair.
29337 down = the opposite part to "up".
29338 In a shift, we can consider bits to shift from "up"-stream to
29339 "down"-stream, so in a left-shift "up" is the low-part and "down"
29340 is the high-part of each register pair. */
29342 rtx out_up = code == ASHIFT ? out_low : out_high;
29343 rtx out_down = code == ASHIFT ? out_high : out_low;
29344 rtx in_up = code == ASHIFT ? in_low : in_high;
29345 rtx in_down = code == ASHIFT ? in_high : in_low;
29347 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
29348 gcc_assert (out
29349 && (REG_P (out) || GET_CODE (out) == SUBREG)
29350 && GET_MODE (out) == DImode);
29351 gcc_assert (in
29352 && (REG_P (in) || GET_CODE (in) == SUBREG)
29353 && GET_MODE (in) == DImode);
29354 gcc_assert (amount
29355 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
29356 && GET_MODE (amount) == SImode)
29357 || CONST_INT_P (amount)));
29358 gcc_assert (scratch1 == NULL
29359 || (GET_CODE (scratch1) == SCRATCH)
29360 || (GET_MODE (scratch1) == SImode
29361 && REG_P (scratch1)));
29362 gcc_assert (scratch2 == NULL
29363 || (GET_CODE (scratch2) == SCRATCH)
29364 || (GET_MODE (scratch2) == SImode
29365 && REG_P (scratch2)));
29366 gcc_assert (!REG_P (out) || !REG_P (amount)
29367 || !HARD_REGISTER_P (out)
29368 || (REGNO (out) != REGNO (amount)
29369 && REGNO (out) + 1 != REGNO (amount)));
29371 /* Macros to make following code more readable. */
29372 #define SUB_32(DEST,SRC) \
29373 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
29374 #define RSB_32(DEST,SRC) \
29375 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
29376 #define SUB_S_32(DEST,SRC) \
29377 gen_addsi3_compare0 ((DEST), (SRC), \
29378 GEN_INT (-32))
29379 #define SET(DEST,SRC) \
29380 gen_rtx_SET ((DEST), (SRC))
29381 #define SHIFT(CODE,SRC,AMOUNT) \
29382 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
29383 #define LSHIFT(CODE,SRC,AMOUNT) \
29384 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
29385 SImode, (SRC), (AMOUNT))
29386 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
29387 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
29388 SImode, (SRC), (AMOUNT))
29389 #define ORR(A,B) \
29390 gen_rtx_IOR (SImode, (A), (B))
29391 #define BRANCH(COND,LABEL) \
29392 gen_arm_cond_branch ((LABEL), \
29393 gen_rtx_ ## COND (CCmode, cc_reg, \
29394 const0_rtx), \
29395 cc_reg)
29397 /* Shifts by register and shifts by constant are handled separately. */
29398 if (CONST_INT_P (amount))
29400 /* We have a shift-by-constant. */
29402 /* First, handle out-of-range shift amounts.
29403 In both cases we try to match the result an ARM instruction in a
29404 shift-by-register would give. This helps reduce execution
29405 differences between optimization levels, but it won't stop other
29406 parts of the compiler doing different things. This is "undefined
29407 behavior, in any case. */
29408 if (INTVAL (amount) <= 0)
29409 emit_insn (gen_movdi (out, in));
29410 else if (INTVAL (amount) >= 64)
29412 if (code == ASHIFTRT)
29414 rtx const31_rtx = GEN_INT (31);
29415 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
29416 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
29418 else
29419 emit_insn (gen_movdi (out, const0_rtx));
29422 /* Now handle valid shifts. */
29423 else if (INTVAL (amount) < 32)
29425 /* Shifts by a constant less than 32. */
29426 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
29428 /* Clearing the out register in DImode first avoids lots
29429 of spilling and results in less stack usage.
29430 Later this redundant insn is completely removed.
29431 Do that only if "in" and "out" are different registers. */
29432 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29433 emit_insn (SET (out, const0_rtx));
29434 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29435 emit_insn (SET (out_down,
29436 ORR (REV_LSHIFT (code, in_up, reverse_amount),
29437 out_down)));
29438 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29440 else
29442 /* Shifts by a constant greater than 31. */
29443 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
29445 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29446 emit_insn (SET (out, const0_rtx));
29447 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
29448 if (code == ASHIFTRT)
29449 emit_insn (gen_ashrsi3 (out_up, in_up,
29450 GEN_INT (31)));
29451 else
29452 emit_insn (SET (out_up, const0_rtx));
29455 else
29457 /* We have a shift-by-register. */
29458 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
29460 /* This alternative requires the scratch registers. */
29461 gcc_assert (scratch1 && REG_P (scratch1));
29462 gcc_assert (scratch2 && REG_P (scratch2));
29464 /* We will need the values "amount-32" and "32-amount" later.
29465 Swapping them around now allows the later code to be more general. */
29466 switch (code)
29468 case ASHIFT:
29469 emit_insn (SUB_32 (scratch1, amount));
29470 emit_insn (RSB_32 (scratch2, amount));
29471 break;
29472 case ASHIFTRT:
29473 emit_insn (RSB_32 (scratch1, amount));
29474 /* Also set CC = amount > 32. */
29475 emit_insn (SUB_S_32 (scratch2, amount));
29476 break;
29477 case LSHIFTRT:
29478 emit_insn (RSB_32 (scratch1, amount));
29479 emit_insn (SUB_32 (scratch2, amount));
29480 break;
29481 default:
29482 gcc_unreachable ();
29485 /* Emit code like this:
29487 arithmetic-left:
29488 out_down = in_down << amount;
29489 out_down = (in_up << (amount - 32)) | out_down;
29490 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
29491 out_up = in_up << amount;
29493 arithmetic-right:
29494 out_down = in_down >> amount;
29495 out_down = (in_up << (32 - amount)) | out_down;
29496 if (amount < 32)
29497 out_down = ((signed)in_up >> (amount - 32)) | out_down;
29498 out_up = in_up << amount;
29500 logical-right:
29501 out_down = in_down >> amount;
29502 out_down = (in_up << (32 - amount)) | out_down;
29503 if (amount < 32)
29504 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
29505 out_up = in_up << amount;
29507 The ARM and Thumb2 variants are the same but implemented slightly
29508 differently. If this were only called during expand we could just
29509 use the Thumb2 case and let combine do the right thing, but this
29510 can also be called from post-reload splitters. */
29512 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29514 if (!TARGET_THUMB2)
29516 /* Emit code for ARM mode. */
29517 emit_insn (SET (out_down,
29518 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
29519 if (code == ASHIFTRT)
29521 rtx_code_label *done_label = gen_label_rtx ();
29522 emit_jump_insn (BRANCH (LT, done_label));
29523 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
29524 out_down)));
29525 emit_label (done_label);
29527 else
29528 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
29529 out_down)));
29531 else
29533 /* Emit code for Thumb2 mode.
29534 Thumb2 can't do shift and or in one insn. */
29535 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
29536 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
29538 if (code == ASHIFTRT)
29540 rtx_code_label *done_label = gen_label_rtx ();
29541 emit_jump_insn (BRANCH (LT, done_label));
29542 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
29543 emit_insn (SET (out_down, ORR (out_down, scratch2)));
29544 emit_label (done_label);
29546 else
29548 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
29549 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
29553 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29556 #undef SUB_32
29557 #undef RSB_32
29558 #undef SUB_S_32
29559 #undef SET
29560 #undef SHIFT
29561 #undef LSHIFT
29562 #undef REV_LSHIFT
29563 #undef ORR
29564 #undef BRANCH
29567 /* Returns true if the pattern is a valid symbolic address, which is either a
29568 symbol_ref or (symbol_ref + addend).
29570 According to the ARM ELF ABI, the initial addend of REL-type relocations
29571 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
29572 literal field of the instruction as a 16-bit signed value in the range
29573 -32768 <= A < 32768. */
29575 bool
29576 arm_valid_symbolic_address_p (rtx addr)
29578 rtx xop0, xop1 = NULL_RTX;
29579 rtx tmp = addr;
29581 if (GET_CODE (tmp) == SYMBOL_REF || GET_CODE (tmp) == LABEL_REF)
29582 return true;
29584 /* (const (plus: symbol_ref const_int)) */
29585 if (GET_CODE (addr) == CONST)
29586 tmp = XEXP (addr, 0);
29588 if (GET_CODE (tmp) == PLUS)
29590 xop0 = XEXP (tmp, 0);
29591 xop1 = XEXP (tmp, 1);
29593 if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
29594 return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
29597 return false;
29600 /* Returns true if a valid comparison operation and makes
29601 the operands in a form that is valid. */
29602 bool
29603 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
29605 enum rtx_code code = GET_CODE (*comparison);
29606 int code_int;
29607 machine_mode mode = (GET_MODE (*op1) == VOIDmode)
29608 ? GET_MODE (*op2) : GET_MODE (*op1);
29610 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
29612 if (code == UNEQ || code == LTGT)
29613 return false;
29615 code_int = (int)code;
29616 arm_canonicalize_comparison (&code_int, op1, op2, 0);
29617 PUT_CODE (*comparison, (enum rtx_code)code_int);
29619 switch (mode)
29621 case E_SImode:
29622 if (!arm_add_operand (*op1, mode))
29623 *op1 = force_reg (mode, *op1);
29624 if (!arm_add_operand (*op2, mode))
29625 *op2 = force_reg (mode, *op2);
29626 return true;
29628 case E_DImode:
29629 if (!cmpdi_operand (*op1, mode))
29630 *op1 = force_reg (mode, *op1);
29631 if (!cmpdi_operand (*op2, mode))
29632 *op2 = force_reg (mode, *op2);
29633 return true;
29635 case E_HFmode:
29636 if (!TARGET_VFP_FP16INST)
29637 break;
29638 /* FP16 comparisons are done in SF mode. */
29639 mode = SFmode;
29640 *op1 = convert_to_mode (mode, *op1, 1);
29641 *op2 = convert_to_mode (mode, *op2, 1);
29642 /* Fall through. */
29643 case E_SFmode:
29644 case E_DFmode:
29645 if (!vfp_compare_operand (*op1, mode))
29646 *op1 = force_reg (mode, *op1);
29647 if (!vfp_compare_operand (*op2, mode))
29648 *op2 = force_reg (mode, *op2);
29649 return true;
29650 default:
29651 break;
29654 return false;
29658 /* Maximum number of instructions to set block of memory. */
29659 static int
29660 arm_block_set_max_insns (void)
29662 if (optimize_function_for_size_p (cfun))
29663 return 4;
29664 else
29665 return current_tune->max_insns_inline_memset;
29668 /* Return TRUE if it's profitable to set block of memory for
29669 non-vectorized case. VAL is the value to set the memory
29670 with. LENGTH is the number of bytes to set. ALIGN is the
29671 alignment of the destination memory in bytes. UNALIGNED_P
29672 is TRUE if we can only set the memory with instructions
29673 meeting alignment requirements. USE_STRD_P is TRUE if we
29674 can use strd to set the memory. */
29675 static bool
29676 arm_block_set_non_vect_profit_p (rtx val,
29677 unsigned HOST_WIDE_INT length,
29678 unsigned HOST_WIDE_INT align,
29679 bool unaligned_p, bool use_strd_p)
29681 int num = 0;
29682 /* For leftovers in bytes of 0-7, we can set the memory block using
29683 strb/strh/str with minimum instruction number. */
29684 const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
29686 if (unaligned_p)
29688 num = arm_const_inline_cost (SET, val);
29689 num += length / align + length % align;
29691 else if (use_strd_p)
29693 num = arm_const_double_inline_cost (val);
29694 num += (length >> 3) + leftover[length & 7];
29696 else
29698 num = arm_const_inline_cost (SET, val);
29699 num += (length >> 2) + leftover[length & 3];
29702 /* We may be able to combine last pair STRH/STRB into a single STR
29703 by shifting one byte back. */
29704 if (unaligned_access && length > 3 && (length & 3) == 3)
29705 num--;
29707 return (num <= arm_block_set_max_insns ());
29710 /* Return TRUE if it's profitable to set block of memory for
29711 vectorized case. LENGTH is the number of bytes to set.
29712 ALIGN is the alignment of destination memory in bytes.
29713 MODE is the vector mode used to set the memory. */
29714 static bool
29715 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
29716 unsigned HOST_WIDE_INT align,
29717 machine_mode mode)
29719 int num;
29720 bool unaligned_p = ((align & 3) != 0);
29721 unsigned int nelt = GET_MODE_NUNITS (mode);
29723 /* Instruction loading constant value. */
29724 num = 1;
29725 /* Instructions storing the memory. */
29726 num += (length + nelt - 1) / nelt;
29727 /* Instructions adjusting the address expression. Only need to
29728 adjust address expression if it's 4 bytes aligned and bytes
29729 leftover can only be stored by mis-aligned store instruction. */
29730 if (!unaligned_p && (length & 3) != 0)
29731 num++;
29733 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
29734 if (!unaligned_p && mode == V16QImode)
29735 num--;
29737 return (num <= arm_block_set_max_insns ());
29740 /* Set a block of memory using vectorization instructions for the
29741 unaligned case. We fill the first LENGTH bytes of the memory
29742 area starting from DSTBASE with byte constant VALUE. ALIGN is
29743 the alignment requirement of memory. Return TRUE if succeeded. */
29744 static bool
29745 arm_block_set_unaligned_vect (rtx dstbase,
29746 unsigned HOST_WIDE_INT length,
29747 unsigned HOST_WIDE_INT value,
29748 unsigned HOST_WIDE_INT align)
29750 unsigned int i, j, nelt_v16, nelt_v8, nelt_mode;
29751 rtx dst, mem;
29752 rtx val_elt, val_vec, reg;
29753 rtx rval[MAX_VECT_LEN];
29754 rtx (*gen_func) (rtx, rtx);
29755 machine_mode mode;
29756 unsigned HOST_WIDE_INT v = value;
29757 unsigned int offset = 0;
29758 gcc_assert ((align & 0x3) != 0);
29759 nelt_v8 = GET_MODE_NUNITS (V8QImode);
29760 nelt_v16 = GET_MODE_NUNITS (V16QImode);
29761 if (length >= nelt_v16)
29763 mode = V16QImode;
29764 gen_func = gen_movmisalignv16qi;
29766 else
29768 mode = V8QImode;
29769 gen_func = gen_movmisalignv8qi;
29771 nelt_mode = GET_MODE_NUNITS (mode);
29772 gcc_assert (length >= nelt_mode);
29773 /* Skip if it isn't profitable. */
29774 if (!arm_block_set_vect_profit_p (length, align, mode))
29775 return false;
29777 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29778 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29780 v = sext_hwi (v, BITS_PER_WORD);
29781 val_elt = GEN_INT (v);
29782 for (j = 0; j < nelt_mode; j++)
29783 rval[j] = val_elt;
29785 reg = gen_reg_rtx (mode);
29786 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
29787 /* Emit instruction loading the constant value. */
29788 emit_move_insn (reg, val_vec);
29790 /* Handle nelt_mode bytes in a vector. */
29791 for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
29793 emit_insn ((*gen_func) (mem, reg));
29794 if (i + 2 * nelt_mode <= length)
29796 emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
29797 offset += nelt_mode;
29798 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29802 /* If there are not less than nelt_v8 bytes leftover, we must be in
29803 V16QI mode. */
29804 gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
29806 /* Handle (8, 16) bytes leftover. */
29807 if (i + nelt_v8 < length)
29809 emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
29810 offset += length - i;
29811 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29813 /* We are shifting bytes back, set the alignment accordingly. */
29814 if ((length & 1) != 0 && align >= 2)
29815 set_mem_align (mem, BITS_PER_UNIT);
29817 emit_insn (gen_movmisalignv16qi (mem, reg));
29819 /* Handle (0, 8] bytes leftover. */
29820 else if (i < length && i + nelt_v8 >= length)
29822 if (mode == V16QImode)
29823 reg = gen_lowpart (V8QImode, reg);
29825 emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
29826 + (nelt_mode - nelt_v8))));
29827 offset += (length - i) + (nelt_mode - nelt_v8);
29828 mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
29830 /* We are shifting bytes back, set the alignment accordingly. */
29831 if ((length & 1) != 0 && align >= 2)
29832 set_mem_align (mem, BITS_PER_UNIT);
29834 emit_insn (gen_movmisalignv8qi (mem, reg));
29837 return true;
29840 /* Set a block of memory using vectorization instructions for the
29841 aligned case. We fill the first LENGTH bytes of the memory area
29842 starting from DSTBASE with byte constant VALUE. ALIGN is the
29843 alignment requirement of memory. Return TRUE if succeeded. */
29844 static bool
29845 arm_block_set_aligned_vect (rtx dstbase,
29846 unsigned HOST_WIDE_INT length,
29847 unsigned HOST_WIDE_INT value,
29848 unsigned HOST_WIDE_INT align)
29850 unsigned int i, j, nelt_v8, nelt_v16, nelt_mode;
29851 rtx dst, addr, mem;
29852 rtx val_elt, val_vec, reg;
29853 rtx rval[MAX_VECT_LEN];
29854 machine_mode mode;
29855 unsigned HOST_WIDE_INT v = value;
29856 unsigned int offset = 0;
29858 gcc_assert ((align & 0x3) == 0);
29859 nelt_v8 = GET_MODE_NUNITS (V8QImode);
29860 nelt_v16 = GET_MODE_NUNITS (V16QImode);
29861 if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
29862 mode = V16QImode;
29863 else
29864 mode = V8QImode;
29866 nelt_mode = GET_MODE_NUNITS (mode);
29867 gcc_assert (length >= nelt_mode);
29868 /* Skip if it isn't profitable. */
29869 if (!arm_block_set_vect_profit_p (length, align, mode))
29870 return false;
29872 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29874 v = sext_hwi (v, BITS_PER_WORD);
29875 val_elt = GEN_INT (v);
29876 for (j = 0; j < nelt_mode; j++)
29877 rval[j] = val_elt;
29879 reg = gen_reg_rtx (mode);
29880 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
29881 /* Emit instruction loading the constant value. */
29882 emit_move_insn (reg, val_vec);
29884 i = 0;
29885 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
29886 if (mode == V16QImode)
29888 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29889 emit_insn (gen_movmisalignv16qi (mem, reg));
29890 i += nelt_mode;
29891 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
29892 if (i + nelt_v8 < length && i + nelt_v16 > length)
29894 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
29895 offset += length - nelt_mode;
29896 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29897 /* We are shifting bytes back, set the alignment accordingly. */
29898 if ((length & 0x3) == 0)
29899 set_mem_align (mem, BITS_PER_UNIT * 4);
29900 else if ((length & 0x1) == 0)
29901 set_mem_align (mem, BITS_PER_UNIT * 2);
29902 else
29903 set_mem_align (mem, BITS_PER_UNIT);
29905 emit_insn (gen_movmisalignv16qi (mem, reg));
29906 return true;
29908 /* Fall through for bytes leftover. */
29909 mode = V8QImode;
29910 nelt_mode = GET_MODE_NUNITS (mode);
29911 reg = gen_lowpart (V8QImode, reg);
29914 /* Handle 8 bytes in a vector. */
29915 for (; (i + nelt_mode <= length); i += nelt_mode)
29917 addr = plus_constant (Pmode, dst, i);
29918 mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
29919 emit_move_insn (mem, reg);
29922 /* Handle single word leftover by shifting 4 bytes back. We can
29923 use aligned access for this case. */
29924 if (i + UNITS_PER_WORD == length)
29926 addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
29927 offset += i - UNITS_PER_WORD;
29928 mem = adjust_automodify_address (dstbase, mode, addr, offset);
29929 /* We are shifting 4 bytes back, set the alignment accordingly. */
29930 if (align > UNITS_PER_WORD)
29931 set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
29933 emit_move_insn (mem, reg);
29935 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
29936 We have to use unaligned access for this case. */
29937 else if (i < length)
29939 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
29940 offset += length - nelt_mode;
29941 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29942 /* We are shifting bytes back, set the alignment accordingly. */
29943 if ((length & 1) == 0)
29944 set_mem_align (mem, BITS_PER_UNIT * 2);
29945 else
29946 set_mem_align (mem, BITS_PER_UNIT);
29948 emit_insn (gen_movmisalignv8qi (mem, reg));
29951 return true;
29954 /* Set a block of memory using plain strh/strb instructions, only
29955 using instructions allowed by ALIGN on processor. We fill the
29956 first LENGTH bytes of the memory area starting from DSTBASE
29957 with byte constant VALUE. ALIGN is the alignment requirement
29958 of memory. */
29959 static bool
29960 arm_block_set_unaligned_non_vect (rtx dstbase,
29961 unsigned HOST_WIDE_INT length,
29962 unsigned HOST_WIDE_INT value,
29963 unsigned HOST_WIDE_INT align)
29965 unsigned int i;
29966 rtx dst, addr, mem;
29967 rtx val_exp, val_reg, reg;
29968 machine_mode mode;
29969 HOST_WIDE_INT v = value;
29971 gcc_assert (align == 1 || align == 2);
29973 if (align == 2)
29974 v |= (value << BITS_PER_UNIT);
29976 v = sext_hwi (v, BITS_PER_WORD);
29977 val_exp = GEN_INT (v);
29978 /* Skip if it isn't profitable. */
29979 if (!arm_block_set_non_vect_profit_p (val_exp, length,
29980 align, true, false))
29981 return false;
29983 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29984 mode = (align == 2 ? HImode : QImode);
29985 val_reg = force_reg (SImode, val_exp);
29986 reg = gen_lowpart (mode, val_reg);
29988 for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
29990 addr = plus_constant (Pmode, dst, i);
29991 mem = adjust_automodify_address (dstbase, mode, addr, i);
29992 emit_move_insn (mem, reg);
29995 /* Handle single byte leftover. */
29996 if (i + 1 == length)
29998 reg = gen_lowpart (QImode, val_reg);
29999 addr = plus_constant (Pmode, dst, i);
30000 mem = adjust_automodify_address (dstbase, QImode, addr, i);
30001 emit_move_insn (mem, reg);
30002 i++;
30005 gcc_assert (i == length);
30006 return true;
30009 /* Set a block of memory using plain strd/str/strh/strb instructions,
30010 to permit unaligned copies on processors which support unaligned
30011 semantics for those instructions. We fill the first LENGTH bytes
30012 of the memory area starting from DSTBASE with byte constant VALUE.
30013 ALIGN is the alignment requirement of memory. */
30014 static bool
30015 arm_block_set_aligned_non_vect (rtx dstbase,
30016 unsigned HOST_WIDE_INT length,
30017 unsigned HOST_WIDE_INT value,
30018 unsigned HOST_WIDE_INT align)
30020 unsigned int i;
30021 rtx dst, addr, mem;
30022 rtx val_exp, val_reg, reg;
30023 unsigned HOST_WIDE_INT v;
30024 bool use_strd_p;
30026 use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
30027 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
30029 v = (value | (value << 8) | (value << 16) | (value << 24));
30030 if (length < UNITS_PER_WORD)
30031 v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
30033 if (use_strd_p)
30034 v |= (v << BITS_PER_WORD);
30035 else
30036 v = sext_hwi (v, BITS_PER_WORD);
30038 val_exp = GEN_INT (v);
30039 /* Skip if it isn't profitable. */
30040 if (!arm_block_set_non_vect_profit_p (val_exp, length,
30041 align, false, use_strd_p))
30043 if (!use_strd_p)
30044 return false;
30046 /* Try without strd. */
30047 v = (v >> BITS_PER_WORD);
30048 v = sext_hwi (v, BITS_PER_WORD);
30049 val_exp = GEN_INT (v);
30050 use_strd_p = false;
30051 if (!arm_block_set_non_vect_profit_p (val_exp, length,
30052 align, false, use_strd_p))
30053 return false;
30056 i = 0;
30057 dst = copy_addr_to_reg (XEXP (dstbase, 0));
30058 /* Handle double words using strd if possible. */
30059 if (use_strd_p)
30061 val_reg = force_reg (DImode, val_exp);
30062 reg = val_reg;
30063 for (; (i + 8 <= length); i += 8)
30065 addr = plus_constant (Pmode, dst, i);
30066 mem = adjust_automodify_address (dstbase, DImode, addr, i);
30067 emit_move_insn (mem, reg);
30070 else
30071 val_reg = force_reg (SImode, val_exp);
30073 /* Handle words. */
30074 reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
30075 for (; (i + 4 <= length); i += 4)
30077 addr = plus_constant (Pmode, dst, i);
30078 mem = adjust_automodify_address (dstbase, SImode, addr, i);
30079 if ((align & 3) == 0)
30080 emit_move_insn (mem, reg);
30081 else
30082 emit_insn (gen_unaligned_storesi (mem, reg));
30085 /* Merge last pair of STRH and STRB into a STR if possible. */
30086 if (unaligned_access && i > 0 && (i + 3) == length)
30088 addr = plus_constant (Pmode, dst, i - 1);
30089 mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
30090 /* We are shifting one byte back, set the alignment accordingly. */
30091 if ((align & 1) == 0)
30092 set_mem_align (mem, BITS_PER_UNIT);
30094 /* Most likely this is an unaligned access, and we can't tell at
30095 compilation time. */
30096 emit_insn (gen_unaligned_storesi (mem, reg));
30097 return true;
30100 /* Handle half word leftover. */
30101 if (i + 2 <= length)
30103 reg = gen_lowpart (HImode, val_reg);
30104 addr = plus_constant (Pmode, dst, i);
30105 mem = adjust_automodify_address (dstbase, HImode, addr, i);
30106 if ((align & 1) == 0)
30107 emit_move_insn (mem, reg);
30108 else
30109 emit_insn (gen_unaligned_storehi (mem, reg));
30111 i += 2;
30114 /* Handle single byte leftover. */
30115 if (i + 1 == length)
30117 reg = gen_lowpart (QImode, val_reg);
30118 addr = plus_constant (Pmode, dst, i);
30119 mem = adjust_automodify_address (dstbase, QImode, addr, i);
30120 emit_move_insn (mem, reg);
30123 return true;
30126 /* Set a block of memory using vectorization instructions for both
30127 aligned and unaligned cases. We fill the first LENGTH bytes of
30128 the memory area starting from DSTBASE with byte constant VALUE.
30129 ALIGN is the alignment requirement of memory. */
30130 static bool
30131 arm_block_set_vect (rtx dstbase,
30132 unsigned HOST_WIDE_INT length,
30133 unsigned HOST_WIDE_INT value,
30134 unsigned HOST_WIDE_INT align)
30136 /* Check whether we need to use unaligned store instruction. */
30137 if (((align & 3) != 0 || (length & 3) != 0)
30138 /* Check whether unaligned store instruction is available. */
30139 && (!unaligned_access || BYTES_BIG_ENDIAN))
30140 return false;
30142 if ((align & 3) == 0)
30143 return arm_block_set_aligned_vect (dstbase, length, value, align);
30144 else
30145 return arm_block_set_unaligned_vect (dstbase, length, value, align);
30148 /* Expand string store operation. Firstly we try to do that by using
30149 vectorization instructions, then try with ARM unaligned access and
30150 double-word store if profitable. OPERANDS[0] is the destination,
30151 OPERANDS[1] is the number of bytes, operands[2] is the value to
30152 initialize the memory, OPERANDS[3] is the known alignment of the
30153 destination. */
30154 bool
30155 arm_gen_setmem (rtx *operands)
30157 rtx dstbase = operands[0];
30158 unsigned HOST_WIDE_INT length;
30159 unsigned HOST_WIDE_INT value;
30160 unsigned HOST_WIDE_INT align;
30162 if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
30163 return false;
30165 length = UINTVAL (operands[1]);
30166 if (length > 64)
30167 return false;
30169 value = (UINTVAL (operands[2]) & 0xFF);
30170 align = UINTVAL (operands[3]);
30171 if (TARGET_NEON && length >= 8
30172 && current_tune->string_ops_prefer_neon
30173 && arm_block_set_vect (dstbase, length, value, align))
30174 return true;
30176 if (!unaligned_access && (align & 3) != 0)
30177 return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
30179 return arm_block_set_aligned_non_vect (dstbase, length, value, align);
30183 static bool
30184 arm_macro_fusion_p (void)
30186 return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
30189 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
30190 for MOVW / MOVT macro fusion. */
30192 static bool
30193 arm_sets_movw_movt_fusible_p (rtx prev_set, rtx curr_set)
30195 /* We are trying to fuse
30196 movw imm / movt imm
30197 instructions as a group that gets scheduled together. */
30199 rtx set_dest = SET_DEST (curr_set);
30201 if (GET_MODE (set_dest) != SImode)
30202 return false;
30204 /* We are trying to match:
30205 prev (movw) == (set (reg r0) (const_int imm16))
30206 curr (movt) == (set (zero_extract (reg r0)
30207 (const_int 16)
30208 (const_int 16))
30209 (const_int imm16_1))
30211 prev (movw) == (set (reg r1)
30212 (high (symbol_ref ("SYM"))))
30213 curr (movt) == (set (reg r0)
30214 (lo_sum (reg r1)
30215 (symbol_ref ("SYM")))) */
30217 if (GET_CODE (set_dest) == ZERO_EXTRACT)
30219 if (CONST_INT_P (SET_SRC (curr_set))
30220 && CONST_INT_P (SET_SRC (prev_set))
30221 && REG_P (XEXP (set_dest, 0))
30222 && REG_P (SET_DEST (prev_set))
30223 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
30224 return true;
30227 else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
30228 && REG_P (SET_DEST (curr_set))
30229 && REG_P (SET_DEST (prev_set))
30230 && GET_CODE (SET_SRC (prev_set)) == HIGH
30231 && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
30232 return true;
30234 return false;
30237 static bool
30238 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
30240 rtx prev_set = single_set (prev);
30241 rtx curr_set = single_set (curr);
30243 if (!prev_set
30244 || !curr_set)
30245 return false;
30247 if (any_condjump_p (curr))
30248 return false;
30250 if (!arm_macro_fusion_p ())
30251 return false;
30253 if (current_tune->fusible_ops & tune_params::FUSE_AES_AESMC
30254 && aarch_crypto_can_dual_issue (prev, curr))
30255 return true;
30257 if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT
30258 && arm_sets_movw_movt_fusible_p (prev_set, curr_set))
30259 return true;
30261 return false;
30264 /* Return true iff the instruction fusion described by OP is enabled. */
30265 bool
30266 arm_fusion_enabled_p (tune_params::fuse_ops op)
30268 return current_tune->fusible_ops & op;
30271 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
30272 scheduled for speculative execution. Reject the long-running division
30273 and square-root instructions. */
30275 static bool
30276 arm_sched_can_speculate_insn (rtx_insn *insn)
30278 switch (get_attr_type (insn))
30280 case TYPE_SDIV:
30281 case TYPE_UDIV:
30282 case TYPE_FDIVS:
30283 case TYPE_FDIVD:
30284 case TYPE_FSQRTS:
30285 case TYPE_FSQRTD:
30286 case TYPE_NEON_FP_SQRT_S:
30287 case TYPE_NEON_FP_SQRT_D:
30288 case TYPE_NEON_FP_SQRT_S_Q:
30289 case TYPE_NEON_FP_SQRT_D_Q:
30290 case TYPE_NEON_FP_DIV_S:
30291 case TYPE_NEON_FP_DIV_D:
30292 case TYPE_NEON_FP_DIV_S_Q:
30293 case TYPE_NEON_FP_DIV_D_Q:
30294 return false;
30295 default:
30296 return true;
30300 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
30302 static unsigned HOST_WIDE_INT
30303 arm_asan_shadow_offset (void)
30305 return HOST_WIDE_INT_1U << 29;
30309 /* This is a temporary fix for PR60655. Ideally we need
30310 to handle most of these cases in the generic part but
30311 currently we reject minus (..) (sym_ref). We try to
30312 ameliorate the case with minus (sym_ref1) (sym_ref2)
30313 where they are in the same section. */
30315 static bool
30316 arm_const_not_ok_for_debug_p (rtx p)
30318 tree decl_op0 = NULL;
30319 tree decl_op1 = NULL;
30321 if (GET_CODE (p) == MINUS)
30323 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
30325 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
30326 if (decl_op1
30327 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
30328 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
30330 if ((VAR_P (decl_op1)
30331 || TREE_CODE (decl_op1) == CONST_DECL)
30332 && (VAR_P (decl_op0)
30333 || TREE_CODE (decl_op0) == CONST_DECL))
30334 return (get_variable_section (decl_op1, false)
30335 != get_variable_section (decl_op0, false));
30337 if (TREE_CODE (decl_op1) == LABEL_DECL
30338 && TREE_CODE (decl_op0) == LABEL_DECL)
30339 return (DECL_CONTEXT (decl_op1)
30340 != DECL_CONTEXT (decl_op0));
30343 return true;
30347 return false;
30350 /* return TRUE if x is a reference to a value in a constant pool */
30351 extern bool
30352 arm_is_constant_pool_ref (rtx x)
30354 return (MEM_P (x)
30355 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
30356 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
30359 /* Remember the last target of arm_set_current_function. */
30360 static GTY(()) tree arm_previous_fndecl;
30362 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
30364 void
30365 save_restore_target_globals (tree new_tree)
30367 /* If we have a previous state, use it. */
30368 if (TREE_TARGET_GLOBALS (new_tree))
30369 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
30370 else if (new_tree == target_option_default_node)
30371 restore_target_globals (&default_target_globals);
30372 else
30374 /* Call target_reinit and save the state for TARGET_GLOBALS. */
30375 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
30378 arm_option_params_internal ();
30381 /* Invalidate arm_previous_fndecl. */
30383 void
30384 arm_reset_previous_fndecl (void)
30386 arm_previous_fndecl = NULL_TREE;
30389 /* Establish appropriate back-end context for processing the function
30390 FNDECL. The argument might be NULL to indicate processing at top
30391 level, outside of any function scope. */
30393 static void
30394 arm_set_current_function (tree fndecl)
30396 if (!fndecl || fndecl == arm_previous_fndecl)
30397 return;
30399 tree old_tree = (arm_previous_fndecl
30400 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
30401 : NULL_TREE);
30403 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30405 /* If current function has no attributes but previous one did,
30406 use the default node. */
30407 if (! new_tree && old_tree)
30408 new_tree = target_option_default_node;
30410 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
30411 the default have been handled by save_restore_target_globals from
30412 arm_pragma_target_parse. */
30413 if (old_tree == new_tree)
30414 return;
30416 arm_previous_fndecl = fndecl;
30418 /* First set the target options. */
30419 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
30421 save_restore_target_globals (new_tree);
30424 /* Implement TARGET_OPTION_PRINT. */
30426 static void
30427 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
30429 int flags = ptr->x_target_flags;
30430 const char *fpu_name;
30432 fpu_name = (ptr->x_arm_fpu_index == TARGET_FPU_auto
30433 ? "auto" : all_fpus[ptr->x_arm_fpu_index].name);
30435 fprintf (file, "%*sselected isa %s\n", indent, "",
30436 TARGET_THUMB2_P (flags) ? "thumb2" :
30437 TARGET_THUMB_P (flags) ? "thumb1" :
30438 "arm");
30440 if (ptr->x_arm_arch_string)
30441 fprintf (file, "%*sselected architecture %s\n", indent, "",
30442 ptr->x_arm_arch_string);
30444 if (ptr->x_arm_cpu_string)
30445 fprintf (file, "%*sselected CPU %s\n", indent, "",
30446 ptr->x_arm_cpu_string);
30448 if (ptr->x_arm_tune_string)
30449 fprintf (file, "%*sselected tune %s\n", indent, "",
30450 ptr->x_arm_tune_string);
30452 fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_name);
30455 /* Hook to determine if one function can safely inline another. */
30457 static bool
30458 arm_can_inline_p (tree caller, tree callee)
30460 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
30461 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
30462 bool can_inline = true;
30464 struct cl_target_option *caller_opts
30465 = TREE_TARGET_OPTION (caller_tree ? caller_tree
30466 : target_option_default_node);
30468 struct cl_target_option *callee_opts
30469 = TREE_TARGET_OPTION (callee_tree ? callee_tree
30470 : target_option_default_node);
30472 if (callee_opts == caller_opts)
30473 return true;
30475 /* Callee's ISA features should be a subset of the caller's. */
30476 struct arm_build_target caller_target;
30477 struct arm_build_target callee_target;
30478 caller_target.isa = sbitmap_alloc (isa_num_bits);
30479 callee_target.isa = sbitmap_alloc (isa_num_bits);
30481 arm_configure_build_target (&caller_target, caller_opts, &global_options_set,
30482 false);
30483 arm_configure_build_target (&callee_target, callee_opts, &global_options_set,
30484 false);
30485 if (!bitmap_subset_p (callee_target.isa, caller_target.isa))
30486 can_inline = false;
30488 sbitmap_free (caller_target.isa);
30489 sbitmap_free (callee_target.isa);
30491 /* OK to inline between different modes.
30492 Function with mode specific instructions, e.g using asm,
30493 must be explicitly protected with noinline. */
30494 return can_inline;
30497 /* Hook to fix function's alignment affected by target attribute. */
30499 static void
30500 arm_relayout_function (tree fndecl)
30502 if (DECL_USER_ALIGN (fndecl))
30503 return;
30505 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30507 if (!callee_tree)
30508 callee_tree = target_option_default_node;
30510 struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
30511 SET_DECL_ALIGN
30512 (fndecl,
30513 FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts->x_target_flags)));
30516 /* Inner function to process the attribute((target(...))), take an argument and
30517 set the current options from the argument. If we have a list, recursively
30518 go over the list. */
30520 static bool
30521 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
30523 if (TREE_CODE (args) == TREE_LIST)
30525 bool ret = true;
30527 for (; args; args = TREE_CHAIN (args))
30528 if (TREE_VALUE (args)
30529 && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
30530 ret = false;
30531 return ret;
30534 else if (TREE_CODE (args) != STRING_CST)
30536 error ("attribute %<target%> argument not a string");
30537 return false;
30540 char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
30541 char *q;
30543 while ((q = strtok (argstr, ",")) != NULL)
30545 while (ISSPACE (*q)) ++q;
30547 argstr = NULL;
30548 if (!strncmp (q, "thumb", 5))
30549 opts->x_target_flags |= MASK_THUMB;
30551 else if (!strncmp (q, "arm", 3))
30552 opts->x_target_flags &= ~MASK_THUMB;
30554 else if (!strncmp (q, "fpu=", 4))
30556 int fpu_index;
30557 if (! opt_enum_arg_to_value (OPT_mfpu_, q+4,
30558 &fpu_index, CL_TARGET))
30560 error ("invalid fpu for attribute(target(\"%s\"))", q);
30561 return false;
30563 if (fpu_index == TARGET_FPU_auto)
30565 /* This doesn't really make sense until we support
30566 general dynamic selection of the architecture and all
30567 sub-features. */
30568 sorry ("auto fpu selection not currently permitted here");
30569 return false;
30571 opts->x_arm_fpu_index = (enum fpu_type) fpu_index;
30573 else
30575 error ("attribute(target(\"%s\")) is unknown", q);
30576 return false;
30580 return true;
30583 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
30585 tree
30586 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
30587 struct gcc_options *opts_set)
30589 struct cl_target_option cl_opts;
30591 if (!arm_valid_target_attribute_rec (args, opts))
30592 return NULL_TREE;
30594 cl_target_option_save (&cl_opts, opts);
30595 arm_configure_build_target (&arm_active_target, &cl_opts, opts_set, false);
30596 arm_option_check_internal (opts);
30597 /* Do any overrides, such as global options arch=xxx. */
30598 arm_option_override_internal (opts, opts_set);
30600 return build_target_option_node (opts);
30603 static void
30604 add_attribute (const char * mode, tree *attributes)
30606 size_t len = strlen (mode);
30607 tree value = build_string (len, mode);
30609 TREE_TYPE (value) = build_array_type (char_type_node,
30610 build_index_type (size_int (len)));
30612 *attributes = tree_cons (get_identifier ("target"),
30613 build_tree_list (NULL_TREE, value),
30614 *attributes);
30617 /* For testing. Insert thumb or arm modes alternatively on functions. */
30619 static void
30620 arm_insert_attributes (tree fndecl, tree * attributes)
30622 const char *mode;
30624 if (! TARGET_FLIP_THUMB)
30625 return;
30627 if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
30628 || DECL_BUILT_IN (fndecl) || DECL_ARTIFICIAL (fndecl))
30629 return;
30631 /* Nested definitions must inherit mode. */
30632 if (current_function_decl)
30634 mode = TARGET_THUMB ? "thumb" : "arm";
30635 add_attribute (mode, attributes);
30636 return;
30639 /* If there is already a setting don't change it. */
30640 if (lookup_attribute ("target", *attributes) != NULL)
30641 return;
30643 mode = thumb_flipper ? "thumb" : "arm";
30644 add_attribute (mode, attributes);
30646 thumb_flipper = !thumb_flipper;
30649 /* Hook to validate attribute((target("string"))). */
30651 static bool
30652 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
30653 tree args, int ARG_UNUSED (flags))
30655 bool ret = true;
30656 struct gcc_options func_options;
30657 tree cur_tree, new_optimize;
30658 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
30660 /* Get the optimization options of the current function. */
30661 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
30663 /* If the function changed the optimization levels as well as setting target
30664 options, start with the optimizations specified. */
30665 if (!func_optimize)
30666 func_optimize = optimization_default_node;
30668 /* Init func_options. */
30669 memset (&func_options, 0, sizeof (func_options));
30670 init_options_struct (&func_options, NULL);
30671 lang_hooks.init_options_struct (&func_options);
30673 /* Initialize func_options to the defaults. */
30674 cl_optimization_restore (&func_options,
30675 TREE_OPTIMIZATION (func_optimize));
30677 cl_target_option_restore (&func_options,
30678 TREE_TARGET_OPTION (target_option_default_node));
30680 /* Set func_options flags with new target mode. */
30681 cur_tree = arm_valid_target_attribute_tree (args, &func_options,
30682 &global_options_set);
30684 if (cur_tree == NULL_TREE)
30685 ret = false;
30687 new_optimize = build_optimization_node (&func_options);
30689 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
30691 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
30693 finalize_options_struct (&func_options);
30695 return ret;
30698 /* Match an ISA feature bitmap to a named FPU. We always use the
30699 first entry that exactly matches the feature set, so that we
30700 effectively canonicalize the FPU name for the assembler. */
30701 static const char*
30702 arm_identify_fpu_from_isa (sbitmap isa)
30704 auto_sbitmap fpubits (isa_num_bits);
30705 auto_sbitmap cand_fpubits (isa_num_bits);
30707 bitmap_and (fpubits, isa, isa_all_fpubits);
30709 /* If there are no ISA feature bits relating to the FPU, we must be
30710 doing soft-float. */
30711 if (bitmap_empty_p (fpubits))
30712 return "softvfp";
30714 for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
30716 arm_initialize_isa (cand_fpubits, all_fpus[i].isa_bits);
30717 if (bitmap_equal_p (fpubits, cand_fpubits))
30718 return all_fpus[i].name;
30720 /* We must find an entry, or things have gone wrong. */
30721 gcc_unreachable ();
30724 void
30725 arm_declare_function_name (FILE *stream, const char *name, tree decl)
30728 fprintf (stream, "\t.syntax unified\n");
30730 if (TARGET_THUMB)
30732 if (is_called_in_ARM_mode (decl)
30733 || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
30734 && cfun->is_thunk))
30735 fprintf (stream, "\t.code 32\n");
30736 else if (TARGET_THUMB1)
30737 fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
30738 else
30739 fprintf (stream, "\t.thumb\n\t.thumb_func\n");
30741 else
30742 fprintf (stream, "\t.arm\n");
30744 asm_fprintf (asm_out_file, "\t.fpu %s\n",
30745 (TARGET_SOFT_FLOAT
30746 ? "softvfp"
30747 : arm_identify_fpu_from_isa (arm_active_target.isa)));
30749 if (TARGET_POKE_FUNCTION_NAME)
30750 arm_poke_function_name (stream, (const char *) name);
30753 /* If MEM is in the form of [base+offset], extract the two parts
30754 of address and set to BASE and OFFSET, otherwise return false
30755 after clearing BASE and OFFSET. */
30757 static bool
30758 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
30760 rtx addr;
30762 gcc_assert (MEM_P (mem));
30764 addr = XEXP (mem, 0);
30766 /* Strip off const from addresses like (const (addr)). */
30767 if (GET_CODE (addr) == CONST)
30768 addr = XEXP (addr, 0);
30770 if (GET_CODE (addr) == REG)
30772 *base = addr;
30773 *offset = const0_rtx;
30774 return true;
30777 if (GET_CODE (addr) == PLUS
30778 && GET_CODE (XEXP (addr, 0)) == REG
30779 && CONST_INT_P (XEXP (addr, 1)))
30781 *base = XEXP (addr, 0);
30782 *offset = XEXP (addr, 1);
30783 return true;
30786 *base = NULL_RTX;
30787 *offset = NULL_RTX;
30789 return false;
30792 /* If INSN is a load or store of address in the form of [base+offset],
30793 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
30794 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
30795 otherwise return FALSE. */
30797 static bool
30798 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
30800 rtx x, dest, src;
30802 gcc_assert (INSN_P (insn));
30803 x = PATTERN (insn);
30804 if (GET_CODE (x) != SET)
30805 return false;
30807 src = SET_SRC (x);
30808 dest = SET_DEST (x);
30809 if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
30811 *is_load = false;
30812 extract_base_offset_in_addr (dest, base, offset);
30814 else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
30816 *is_load = true;
30817 extract_base_offset_in_addr (src, base, offset);
30819 else
30820 return false;
30822 return (*base != NULL_RTX && *offset != NULL_RTX);
30825 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
30827 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
30828 and PRI are only calculated for these instructions. For other instruction,
30829 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
30830 instruction fusion can be supported by returning different priorities.
30832 It's important that irrelevant instructions get the largest FUSION_PRI. */
30834 static void
30835 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
30836 int *fusion_pri, int *pri)
30838 int tmp, off_val;
30839 bool is_load;
30840 rtx base, offset;
30842 gcc_assert (INSN_P (insn));
30844 tmp = max_pri - 1;
30845 if (!fusion_load_store (insn, &base, &offset, &is_load))
30847 *pri = tmp;
30848 *fusion_pri = tmp;
30849 return;
30852 /* Load goes first. */
30853 if (is_load)
30854 *fusion_pri = tmp - 1;
30855 else
30856 *fusion_pri = tmp - 2;
30858 tmp /= 2;
30860 /* INSN with smaller base register goes first. */
30861 tmp -= ((REGNO (base) & 0xff) << 20);
30863 /* INSN with smaller offset goes first. */
30864 off_val = (int)(INTVAL (offset));
30865 if (off_val >= 0)
30866 tmp -= (off_val & 0xfffff);
30867 else
30868 tmp += ((- off_val) & 0xfffff);
30870 *pri = tmp;
30871 return;
30875 /* Construct and return a PARALLEL RTX vector with elements numbering the
30876 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
30877 the vector - from the perspective of the architecture. This does not
30878 line up with GCC's perspective on lane numbers, so we end up with
30879 different masks depending on our target endian-ness. The diagram
30880 below may help. We must draw the distinction when building masks
30881 which select one half of the vector. An instruction selecting
30882 architectural low-lanes for a big-endian target, must be described using
30883 a mask selecting GCC high-lanes.
30885 Big-Endian Little-Endian
30887 GCC 0 1 2 3 3 2 1 0
30888 | x | x | x | x | | x | x | x | x |
30889 Architecture 3 2 1 0 3 2 1 0
30891 Low Mask: { 2, 3 } { 0, 1 }
30892 High Mask: { 0, 1 } { 2, 3 }
30896 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
30898 int nunits = GET_MODE_NUNITS (mode);
30899 rtvec v = rtvec_alloc (nunits / 2);
30900 int high_base = nunits / 2;
30901 int low_base = 0;
30902 int base;
30903 rtx t1;
30904 int i;
30906 if (BYTES_BIG_ENDIAN)
30907 base = high ? low_base : high_base;
30908 else
30909 base = high ? high_base : low_base;
30911 for (i = 0; i < nunits / 2; i++)
30912 RTVEC_ELT (v, i) = GEN_INT (base + i);
30914 t1 = gen_rtx_PARALLEL (mode, v);
30915 return t1;
30918 /* Check OP for validity as a PARALLEL RTX vector with elements
30919 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
30920 from the perspective of the architecture. See the diagram above
30921 arm_simd_vect_par_cnst_half_p for more details. */
30923 bool
30924 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
30925 bool high)
30927 rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
30928 HOST_WIDE_INT count_op = XVECLEN (op, 0);
30929 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
30930 int i = 0;
30932 if (!VECTOR_MODE_P (mode))
30933 return false;
30935 if (count_op != count_ideal)
30936 return false;
30938 for (i = 0; i < count_ideal; i++)
30940 rtx elt_op = XVECEXP (op, 0, i);
30941 rtx elt_ideal = XVECEXP (ideal, 0, i);
30943 if (!CONST_INT_P (elt_op)
30944 || INTVAL (elt_ideal) != INTVAL (elt_op))
30945 return false;
30947 return true;
30950 /* Can output mi_thunk for all cases except for non-zero vcall_offset
30951 in Thumb1. */
30952 static bool
30953 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
30954 const_tree)
30956 /* For now, we punt and not handle this for TARGET_THUMB1. */
30957 if (vcall_offset && TARGET_THUMB1)
30958 return false;
30960 /* Otherwise ok. */
30961 return true;
30964 /* Generate RTL for a conditional branch with rtx comparison CODE in
30965 mode CC_MODE. The destination of the unlikely conditional branch
30966 is LABEL_REF. */
30968 void
30969 arm_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
30970 rtx label_ref)
30972 rtx x;
30973 x = gen_rtx_fmt_ee (code, VOIDmode,
30974 gen_rtx_REG (cc_mode, CC_REGNUM),
30975 const0_rtx);
30977 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
30978 gen_rtx_LABEL_REF (VOIDmode, label_ref),
30979 pc_rtx);
30980 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
30983 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
30985 For pure-code sections there is no letter code for this attribute, so
30986 output all the section flags numerically when this is needed. */
30988 static bool
30989 arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num)
30992 if (flags & SECTION_ARM_PURECODE)
30994 *num = 0x20000000;
30996 if (!(flags & SECTION_DEBUG))
30997 *num |= 0x2;
30998 if (flags & SECTION_EXCLUDE)
30999 *num |= 0x80000000;
31000 if (flags & SECTION_WRITE)
31001 *num |= 0x1;
31002 if (flags & SECTION_CODE)
31003 *num |= 0x4;
31004 if (flags & SECTION_MERGE)
31005 *num |= 0x10;
31006 if (flags & SECTION_STRINGS)
31007 *num |= 0x20;
31008 if (flags & SECTION_TLS)
31009 *num |= 0x400;
31010 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
31011 *num |= 0x200;
31013 return true;
31016 return false;
31019 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
31021 If pure-code is passed as an option, make sure all functions are in
31022 sections that have the SHF_ARM_PURECODE attribute. */
31024 static section *
31025 arm_function_section (tree decl, enum node_frequency freq,
31026 bool startup, bool exit)
31028 const char * section_name;
31029 section * sec;
31031 if (!decl || TREE_CODE (decl) != FUNCTION_DECL)
31032 return default_function_section (decl, freq, startup, exit);
31034 if (!target_pure_code)
31035 return default_function_section (decl, freq, startup, exit);
31038 section_name = DECL_SECTION_NAME (decl);
31040 /* If a function is not in a named section then it falls under the 'default'
31041 text section, also known as '.text'. We can preserve previous behavior as
31042 the default text section already has the SHF_ARM_PURECODE section
31043 attribute. */
31044 if (!section_name)
31046 section *default_sec = default_function_section (decl, freq, startup,
31047 exit);
31049 /* If default_sec is not null, then it must be a special section like for
31050 example .text.startup. We set the pure-code attribute and return the
31051 same section to preserve existing behavior. */
31052 if (default_sec)
31053 default_sec->common.flags |= SECTION_ARM_PURECODE;
31054 return default_sec;
31057 /* Otherwise look whether a section has already been created with
31058 'section_name'. */
31059 sec = get_named_section (decl, section_name, 0);
31060 if (!sec)
31061 /* If that is not the case passing NULL as the section's name to
31062 'get_named_section' will create a section with the declaration's
31063 section name. */
31064 sec = get_named_section (decl, NULL, 0);
31066 /* Set the SHF_ARM_PURECODE attribute. */
31067 sec->common.flags |= SECTION_ARM_PURECODE;
31069 return sec;
31072 /* Implements the TARGET_SECTION_FLAGS hook.
31074 If DECL is a function declaration and pure-code is passed as an option
31075 then add the SFH_ARM_PURECODE attribute to the section flags. NAME is the
31076 section's name and RELOC indicates whether the declarations initializer may
31077 contain runtime relocations. */
31079 static unsigned int
31080 arm_elf_section_type_flags (tree decl, const char *name, int reloc)
31082 unsigned int flags = default_section_type_flags (decl, name, reloc);
31084 if (decl && TREE_CODE (decl) == FUNCTION_DECL && target_pure_code)
31085 flags |= SECTION_ARM_PURECODE;
31087 return flags;
31090 /* Generate call to __aeabi_[mode]divmod (op0, op1). */
31092 static void
31093 arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
31094 rtx op0, rtx op1,
31095 rtx *quot_p, rtx *rem_p)
31097 if (mode == SImode)
31098 gcc_assert (!TARGET_IDIV);
31100 scalar_int_mode libval_mode
31101 = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode));
31103 rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
31104 libval_mode,
31105 op0, GET_MODE (op0),
31106 op1, GET_MODE (op1));
31108 rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
31109 rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
31110 GET_MODE_SIZE (mode));
31112 gcc_assert (quotient);
31113 gcc_assert (remainder);
31115 *quot_p = quotient;
31116 *rem_p = remainder;
31119 /* This function checks for the availability of the coprocessor builtin passed
31120 in BUILTIN for the current target. Returns true if it is available and
31121 false otherwise. If a BUILTIN is passed for which this function has not
31122 been implemented it will cause an exception. */
31124 bool
31125 arm_coproc_builtin_available (enum unspecv builtin)
31127 /* None of these builtins are available in Thumb mode if the target only
31128 supports Thumb-1. */
31129 if (TARGET_THUMB1)
31130 return false;
31132 switch (builtin)
31134 case VUNSPEC_CDP:
31135 case VUNSPEC_LDC:
31136 case VUNSPEC_LDCL:
31137 case VUNSPEC_STC:
31138 case VUNSPEC_STCL:
31139 case VUNSPEC_MCR:
31140 case VUNSPEC_MRC:
31141 if (arm_arch4)
31142 return true;
31143 break;
31144 case VUNSPEC_CDP2:
31145 case VUNSPEC_LDC2:
31146 case VUNSPEC_LDC2L:
31147 case VUNSPEC_STC2:
31148 case VUNSPEC_STC2L:
31149 case VUNSPEC_MCR2:
31150 case VUNSPEC_MRC2:
31151 /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
31152 ARMv8-{A,M}. */
31153 if (arm_arch5)
31154 return true;
31155 break;
31156 case VUNSPEC_MCRR:
31157 case VUNSPEC_MRRC:
31158 /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
31159 ARMv8-{A,M}. */
31160 if (arm_arch6 || arm_arch5te)
31161 return true;
31162 break;
31163 case VUNSPEC_MCRR2:
31164 case VUNSPEC_MRRC2:
31165 if (arm_arch6)
31166 return true;
31167 break;
31168 default:
31169 gcc_unreachable ();
31171 return false;
31174 /* This function returns true if OP is a valid memory operand for the ldc and
31175 stc coprocessor instructions and false otherwise. */
31177 bool
31178 arm_coproc_ldc_stc_legitimate_address (rtx op)
31180 HOST_WIDE_INT range;
31181 /* Has to be a memory operand. */
31182 if (!MEM_P (op))
31183 return false;
31185 op = XEXP (op, 0);
31187 /* We accept registers. */
31188 if (REG_P (op))
31189 return true;
31191 switch GET_CODE (op)
31193 case PLUS:
31195 /* Or registers with an offset. */
31196 if (!REG_P (XEXP (op, 0)))
31197 return false;
31199 op = XEXP (op, 1);
31201 /* The offset must be an immediate though. */
31202 if (!CONST_INT_P (op))
31203 return false;
31205 range = INTVAL (op);
31207 /* Within the range of [-1020,1020]. */
31208 if (!IN_RANGE (range, -1020, 1020))
31209 return false;
31211 /* And a multiple of 4. */
31212 return (range % 4) == 0;
31214 case PRE_INC:
31215 case POST_INC:
31216 case PRE_DEC:
31217 case POST_DEC:
31218 return REG_P (XEXP (op, 0));
31219 default:
31220 gcc_unreachable ();
31222 return false;
31225 #if CHECKING_P
31226 namespace selftest {
31228 /* Scan the static data tables generated by parsecpu.awk looking for
31229 potential issues with the data. We primarily check for
31230 inconsistencies in the option extensions at present (extensions
31231 that duplicate others but aren't marked as aliases). Furthermore,
31232 for correct canonicalization later options must never be a subset
31233 of an earlier option. Any extension should also only specify other
31234 feature bits and never an architecture bit. The architecture is inferred
31235 from the declaration of the extension. */
31236 static void
31237 arm_test_cpu_arch_data (void)
31239 const arch_option *arch;
31240 const cpu_option *cpu;
31241 auto_sbitmap target_isa (isa_num_bits);
31242 auto_sbitmap isa1 (isa_num_bits);
31243 auto_sbitmap isa2 (isa_num_bits);
31245 for (arch = all_architectures; arch->common.name != NULL; ++arch)
31247 const cpu_arch_extension *ext1, *ext2;
31249 if (arch->common.extensions == NULL)
31250 continue;
31252 arm_initialize_isa (target_isa, arch->common.isa_bits);
31254 for (ext1 = arch->common.extensions; ext1->name != NULL; ++ext1)
31256 if (ext1->alias)
31257 continue;
31259 arm_initialize_isa (isa1, ext1->isa_bits);
31260 for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
31262 if (ext2->alias || ext1->remove != ext2->remove)
31263 continue;
31265 arm_initialize_isa (isa2, ext2->isa_bits);
31266 /* If the option is a subset of the parent option, it doesn't
31267 add anything and so isn't useful. */
31268 ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
31270 /* If the extension specifies any architectural bits then
31271 disallow it. Extensions should only specify feature bits. */
31272 ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
31277 for (cpu = all_cores; cpu->common.name != NULL; ++cpu)
31279 const cpu_arch_extension *ext1, *ext2;
31281 if (cpu->common.extensions == NULL)
31282 continue;
31284 arm_initialize_isa (target_isa, arch->common.isa_bits);
31286 for (ext1 = cpu->common.extensions; ext1->name != NULL; ++ext1)
31288 if (ext1->alias)
31289 continue;
31291 arm_initialize_isa (isa1, ext1->isa_bits);
31292 for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
31294 if (ext2->alias || ext1->remove != ext2->remove)
31295 continue;
31297 arm_initialize_isa (isa2, ext2->isa_bits);
31298 /* If the option is a subset of the parent option, it doesn't
31299 add anything and so isn't useful. */
31300 ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
31302 /* If the extension specifies any architectural bits then
31303 disallow it. Extensions should only specify feature bits. */
31304 ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
31310 static void
31311 arm_run_selftests (void)
31313 arm_test_cpu_arch_data ();
31315 } /* Namespace selftest. */
31317 #undef TARGET_RUN_TARGET_SELFTESTS
31318 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
31319 #endif /* CHECKING_P */
31321 struct gcc_target targetm = TARGET_INITIALIZER;
31323 #include "gt-arm.h"