Turn FUNCTION_ARG_PADDING into a target hook
[official-gcc.git] / gcc / config / arm / arm.c
blobe31ab608dd346d7bfd4e90b6cc8c9d5c03636808
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2017 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "target.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "memmodel.h"
31 #include "cfghooks.h"
32 #include "df.h"
33 #include "tm_p.h"
34 #include "stringpool.h"
35 #include "attribs.h"
36 #include "optabs.h"
37 #include "regs.h"
38 #include "emit-rtl.h"
39 #include "recog.h"
40 #include "cgraph.h"
41 #include "diagnostic-core.h"
42 #include "alias.h"
43 #include "fold-const.h"
44 #include "stor-layout.h"
45 #include "calls.h"
46 #include "varasm.h"
47 #include "output.h"
48 #include "insn-attr.h"
49 #include "flags.h"
50 #include "reload.h"
51 #include "explow.h"
52 #include "expr.h"
53 #include "cfgrtl.h"
54 #include "sched-int.h"
55 #include "common/common-target.h"
56 #include "langhooks.h"
57 #include "intl.h"
58 #include "libfuncs.h"
59 #include "params.h"
60 #include "opts.h"
61 #include "dumpfile.h"
62 #include "target-globals.h"
63 #include "builtins.h"
64 #include "tm-constrs.h"
65 #include "rtl-iter.h"
66 #include "optabs-libfuncs.h"
67 #include "gimplify.h"
68 #include "gimple.h"
69 #include "selftest.h"
71 /* This file should be included last. */
72 #include "target-def.h"
74 /* Forward definitions of types. */
75 typedef struct minipool_node Mnode;
76 typedef struct minipool_fixup Mfix;
78 void (*arm_lang_output_object_attributes_hook)(void);
80 struct four_ints
82 int i[4];
85 /* Forward function declarations. */
86 static bool arm_const_not_ok_for_debug_p (rtx);
87 static int arm_needs_doubleword_align (machine_mode, const_tree);
88 static int arm_compute_static_chain_stack_bytes (void);
89 static arm_stack_offsets *arm_get_frame_offsets (void);
90 static void arm_compute_frame_layout (void);
91 static void arm_add_gc_roots (void);
92 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
93 unsigned HOST_WIDE_INT, rtx, rtx, int, int);
94 static unsigned bit_count (unsigned long);
95 static unsigned bitmap_popcount (const sbitmap);
96 static int arm_address_register_rtx_p (rtx, int);
97 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
98 static bool is_called_in_ARM_mode (tree);
99 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
100 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
101 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
102 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
103 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
104 inline static int thumb1_index_register_rtx_p (rtx, int);
105 static int thumb_far_jump_used_p (void);
106 static bool thumb_force_lr_save (void);
107 static unsigned arm_size_return_regs (void);
108 static bool arm_assemble_integer (rtx, unsigned int, int);
109 static void arm_print_operand (FILE *, rtx, int);
110 static void arm_print_operand_address (FILE *, machine_mode, rtx);
111 static bool arm_print_operand_punct_valid_p (unsigned char code);
112 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
113 static arm_cc get_arm_condition_code (rtx);
114 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
115 static const char *output_multi_immediate (rtx *, const char *, const char *,
116 int, HOST_WIDE_INT);
117 static const char *shift_op (rtx, HOST_WIDE_INT *);
118 static struct machine_function *arm_init_machine_status (void);
119 static void thumb_exit (FILE *, int);
120 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
121 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
122 static Mnode *add_minipool_forward_ref (Mfix *);
123 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
124 static Mnode *add_minipool_backward_ref (Mfix *);
125 static void assign_minipool_offsets (Mfix *);
126 static void arm_print_value (FILE *, rtx);
127 static void dump_minipool (rtx_insn *);
128 static int arm_barrier_cost (rtx_insn *);
129 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
130 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
131 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
132 machine_mode, rtx);
133 static void arm_reorg (void);
134 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
135 static unsigned long arm_compute_save_reg0_reg12_mask (void);
136 static unsigned long arm_compute_save_core_reg_mask (void);
137 static unsigned long arm_isr_value (tree);
138 static unsigned long arm_compute_func_type (void);
139 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
140 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
141 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
142 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
143 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
144 #endif
145 static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *);
146 static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree, int, bool *);
147 static void arm_output_function_epilogue (FILE *);
148 static void arm_output_function_prologue (FILE *);
149 static int arm_comp_type_attributes (const_tree, const_tree);
150 static void arm_set_default_type_attributes (tree);
151 static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
152 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
153 static int optimal_immediate_sequence (enum rtx_code code,
154 unsigned HOST_WIDE_INT val,
155 struct four_ints *return_sequence);
156 static int optimal_immediate_sequence_1 (enum rtx_code code,
157 unsigned HOST_WIDE_INT val,
158 struct four_ints *return_sequence,
159 int i);
160 static int arm_get_strip_length (int);
161 static bool arm_function_ok_for_sibcall (tree, tree);
162 static machine_mode arm_promote_function_mode (const_tree,
163 machine_mode, int *,
164 const_tree, int);
165 static bool arm_return_in_memory (const_tree, const_tree);
166 static rtx arm_function_value (const_tree, const_tree, bool);
167 static rtx arm_libcall_value_1 (machine_mode);
168 static rtx arm_libcall_value (machine_mode, const_rtx);
169 static bool arm_function_value_regno_p (const unsigned int);
170 static void arm_internal_label (FILE *, const char *, unsigned long);
171 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
172 tree);
173 static bool arm_have_conditional_execution (void);
174 static bool arm_cannot_force_const_mem (machine_mode, rtx);
175 static bool arm_legitimate_constant_p (machine_mode, rtx);
176 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
177 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
178 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
179 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
180 static void emit_constant_insn (rtx cond, rtx pattern);
181 static rtx_insn *emit_set_insn (rtx, rtx);
182 static rtx emit_multi_reg_push (unsigned long, unsigned long);
183 static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
184 tree, bool);
185 static rtx arm_function_arg (cumulative_args_t, machine_mode,
186 const_tree, bool);
187 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
188 const_tree, bool);
189 static pad_direction arm_function_arg_padding (machine_mode, const_tree);
190 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
191 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
192 const_tree);
193 static rtx aapcs_libcall_value (machine_mode);
194 static int aapcs_select_return_coproc (const_tree, const_tree);
196 #ifdef OBJECT_FORMAT_ELF
197 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
198 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
199 #endif
200 #ifndef ARM_PE
201 static void arm_encode_section_info (tree, rtx, int);
202 #endif
204 static void arm_file_end (void);
205 static void arm_file_start (void);
206 static void arm_insert_attributes (tree, tree *);
208 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
209 tree, int *, int);
210 static bool arm_pass_by_reference (cumulative_args_t,
211 machine_mode, const_tree, bool);
212 static bool arm_promote_prototypes (const_tree);
213 static bool arm_default_short_enums (void);
214 static bool arm_align_anon_bitfield (void);
215 static bool arm_return_in_msb (const_tree);
216 static bool arm_must_pass_in_stack (machine_mode, const_tree);
217 static bool arm_return_in_memory (const_tree, const_tree);
218 #if ARM_UNWIND_INFO
219 static void arm_unwind_emit (FILE *, rtx_insn *);
220 static bool arm_output_ttype (rtx);
221 static void arm_asm_emit_except_personality (rtx);
222 #endif
223 static void arm_asm_init_sections (void);
224 static rtx arm_dwarf_register_span (rtx);
226 static tree arm_cxx_guard_type (void);
227 static bool arm_cxx_guard_mask_bit (void);
228 static tree arm_get_cookie_size (tree);
229 static bool arm_cookie_has_size (void);
230 static bool arm_cxx_cdtor_returns_this (void);
231 static bool arm_cxx_key_method_may_be_inline (void);
232 static void arm_cxx_determine_class_data_visibility (tree);
233 static bool arm_cxx_class_data_always_comdat (void);
234 static bool arm_cxx_use_aeabi_atexit (void);
235 static void arm_init_libfuncs (void);
236 static tree arm_build_builtin_va_list (void);
237 static void arm_expand_builtin_va_start (tree, rtx);
238 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
239 static void arm_option_override (void);
240 static void arm_option_save (struct cl_target_option *, struct gcc_options *);
241 static void arm_option_restore (struct gcc_options *,
242 struct cl_target_option *);
243 static void arm_override_options_after_change (void);
244 static void arm_option_print (FILE *, int, struct cl_target_option *);
245 static void arm_set_current_function (tree);
246 static bool arm_can_inline_p (tree, tree);
247 static void arm_relayout_function (tree);
248 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
249 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
250 static bool arm_sched_can_speculate_insn (rtx_insn *);
251 static bool arm_macro_fusion_p (void);
252 static bool arm_cannot_copy_insn_p (rtx_insn *);
253 static int arm_issue_rate (void);
254 static int arm_first_cycle_multipass_dfa_lookahead (void);
255 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
256 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
257 static bool arm_output_addr_const_extra (FILE *, rtx);
258 static bool arm_allocate_stack_slots_for_args (void);
259 static bool arm_warn_func_return (tree);
260 static tree arm_promoted_type (const_tree t);
261 static bool arm_scalar_mode_supported_p (scalar_mode);
262 static bool arm_frame_pointer_required (void);
263 static bool arm_can_eliminate (const int, const int);
264 static void arm_asm_trampoline_template (FILE *);
265 static void arm_trampoline_init (rtx, tree, rtx);
266 static rtx arm_trampoline_adjust_address (rtx);
267 static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg);
268 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
269 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
270 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
271 static bool arm_array_mode_supported_p (machine_mode,
272 unsigned HOST_WIDE_INT);
273 static machine_mode arm_preferred_simd_mode (scalar_mode);
274 static bool arm_class_likely_spilled_p (reg_class_t);
275 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
276 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
277 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
278 const_tree type,
279 int misalignment,
280 bool is_packed);
281 static void arm_conditional_register_usage (void);
282 static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
283 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
284 static unsigned int arm_autovectorize_vector_sizes (void);
285 static int arm_default_branch_cost (bool, bool);
286 static int arm_cortex_a5_branch_cost (bool, bool);
287 static int arm_cortex_m_branch_cost (bool, bool);
288 static int arm_cortex_m7_branch_cost (bool, bool);
290 static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode,
291 const unsigned char *sel);
293 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
295 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
296 tree vectype,
297 int misalign ATTRIBUTE_UNUSED);
298 static unsigned arm_add_stmt_cost (void *data, int count,
299 enum vect_cost_for_stmt kind,
300 struct _stmt_vec_info *stmt_info,
301 int misalign,
302 enum vect_cost_model_location where);
304 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
305 bool op0_preserve_value);
306 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
308 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
309 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
310 const_tree);
311 static section *arm_function_section (tree, enum node_frequency, bool, bool);
312 static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num);
313 static unsigned int arm_elf_section_type_flags (tree decl, const char *name,
314 int reloc);
315 static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *);
316 static opt_scalar_float_mode arm_floatn_mode (int, bool);
317 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode);
318 static bool arm_modes_tieable_p (machine_mode, machine_mode);
320 /* Table of machine attributes. */
321 static const struct attribute_spec arm_attribute_table[] =
323 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
324 affects_type_identity } */
325 /* Function calls made to this symbol must be done indirectly, because
326 it may lie outside of the 26 bit addressing range of a normal function
327 call. */
328 { "long_call", 0, 0, false, true, true, NULL, false },
329 /* Whereas these functions are always known to reside within the 26 bit
330 addressing range. */
331 { "short_call", 0, 0, false, true, true, NULL, false },
332 /* Specify the procedure call conventions for a function. */
333 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
334 false },
335 /* Interrupt Service Routines have special prologue and epilogue requirements. */
336 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
337 false },
338 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
339 false },
340 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
341 false },
342 #ifdef ARM_PE
343 /* ARM/PE has three new attributes:
344 interfacearm - ?
345 dllexport - for exporting a function/variable that will live in a dll
346 dllimport - for importing a function/variable from a dll
348 Microsoft allows multiple declspecs in one __declspec, separating
349 them with spaces. We do NOT support this. Instead, use __declspec
350 multiple times.
352 { "dllimport", 0, 0, true, false, false, NULL, false },
353 { "dllexport", 0, 0, true, false, false, NULL, false },
354 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
355 false },
356 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
357 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
358 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
359 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
360 false },
361 #endif
362 /* ARMv8-M Security Extensions support. */
363 { "cmse_nonsecure_entry", 0, 0, true, false, false,
364 arm_handle_cmse_nonsecure_entry, false },
365 { "cmse_nonsecure_call", 0, 0, true, false, false,
366 arm_handle_cmse_nonsecure_call, true },
367 { NULL, 0, 0, false, false, false, NULL, false }
370 /* Initialize the GCC target structure. */
371 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
372 #undef TARGET_MERGE_DECL_ATTRIBUTES
373 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
374 #endif
376 #undef TARGET_LEGITIMIZE_ADDRESS
377 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
379 #undef TARGET_ATTRIBUTE_TABLE
380 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
382 #undef TARGET_INSERT_ATTRIBUTES
383 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
385 #undef TARGET_ASM_FILE_START
386 #define TARGET_ASM_FILE_START arm_file_start
387 #undef TARGET_ASM_FILE_END
388 #define TARGET_ASM_FILE_END arm_file_end
390 #undef TARGET_ASM_ALIGNED_SI_OP
391 #define TARGET_ASM_ALIGNED_SI_OP NULL
392 #undef TARGET_ASM_INTEGER
393 #define TARGET_ASM_INTEGER arm_assemble_integer
395 #undef TARGET_PRINT_OPERAND
396 #define TARGET_PRINT_OPERAND arm_print_operand
397 #undef TARGET_PRINT_OPERAND_ADDRESS
398 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
399 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
400 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
402 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
403 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
405 #undef TARGET_ASM_FUNCTION_PROLOGUE
406 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
408 #undef TARGET_ASM_FUNCTION_EPILOGUE
409 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
411 #undef TARGET_CAN_INLINE_P
412 #define TARGET_CAN_INLINE_P arm_can_inline_p
414 #undef TARGET_RELAYOUT_FUNCTION
415 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
417 #undef TARGET_OPTION_OVERRIDE
418 #define TARGET_OPTION_OVERRIDE arm_option_override
420 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
421 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
423 #undef TARGET_OPTION_SAVE
424 #define TARGET_OPTION_SAVE arm_option_save
426 #undef TARGET_OPTION_RESTORE
427 #define TARGET_OPTION_RESTORE arm_option_restore
429 #undef TARGET_OPTION_PRINT
430 #define TARGET_OPTION_PRINT arm_option_print
432 #undef TARGET_COMP_TYPE_ATTRIBUTES
433 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
435 #undef TARGET_SCHED_CAN_SPECULATE_INSN
436 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
438 #undef TARGET_SCHED_MACRO_FUSION_P
439 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
441 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
442 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
444 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
445 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
447 #undef TARGET_SCHED_ADJUST_COST
448 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
450 #undef TARGET_SET_CURRENT_FUNCTION
451 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
453 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
454 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
456 #undef TARGET_SCHED_REORDER
457 #define TARGET_SCHED_REORDER arm_sched_reorder
459 #undef TARGET_REGISTER_MOVE_COST
460 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
462 #undef TARGET_MEMORY_MOVE_COST
463 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
465 #undef TARGET_ENCODE_SECTION_INFO
466 #ifdef ARM_PE
467 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
468 #else
469 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
470 #endif
472 #undef TARGET_STRIP_NAME_ENCODING
473 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
475 #undef TARGET_ASM_INTERNAL_LABEL
476 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
478 #undef TARGET_FLOATN_MODE
479 #define TARGET_FLOATN_MODE arm_floatn_mode
481 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
482 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
484 #undef TARGET_FUNCTION_VALUE
485 #define TARGET_FUNCTION_VALUE arm_function_value
487 #undef TARGET_LIBCALL_VALUE
488 #define TARGET_LIBCALL_VALUE arm_libcall_value
490 #undef TARGET_FUNCTION_VALUE_REGNO_P
491 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
493 #undef TARGET_ASM_OUTPUT_MI_THUNK
494 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
495 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
496 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
498 #undef TARGET_RTX_COSTS
499 #define TARGET_RTX_COSTS arm_rtx_costs
500 #undef TARGET_ADDRESS_COST
501 #define TARGET_ADDRESS_COST arm_address_cost
503 #undef TARGET_SHIFT_TRUNCATION_MASK
504 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
505 #undef TARGET_VECTOR_MODE_SUPPORTED_P
506 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
507 #undef TARGET_ARRAY_MODE_SUPPORTED_P
508 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
509 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
510 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
511 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
512 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
513 arm_autovectorize_vector_sizes
515 #undef TARGET_MACHINE_DEPENDENT_REORG
516 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
518 #undef TARGET_INIT_BUILTINS
519 #define TARGET_INIT_BUILTINS arm_init_builtins
520 #undef TARGET_EXPAND_BUILTIN
521 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
522 #undef TARGET_BUILTIN_DECL
523 #define TARGET_BUILTIN_DECL arm_builtin_decl
525 #undef TARGET_INIT_LIBFUNCS
526 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
528 #undef TARGET_PROMOTE_FUNCTION_MODE
529 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
530 #undef TARGET_PROMOTE_PROTOTYPES
531 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
532 #undef TARGET_PASS_BY_REFERENCE
533 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
534 #undef TARGET_ARG_PARTIAL_BYTES
535 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
536 #undef TARGET_FUNCTION_ARG
537 #define TARGET_FUNCTION_ARG arm_function_arg
538 #undef TARGET_FUNCTION_ARG_ADVANCE
539 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
540 #undef TARGET_FUNCTION_ARG_PADDING
541 #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
542 #undef TARGET_FUNCTION_ARG_BOUNDARY
543 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
545 #undef TARGET_SETUP_INCOMING_VARARGS
546 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
548 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
549 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
551 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
552 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
553 #undef TARGET_TRAMPOLINE_INIT
554 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
555 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
556 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
558 #undef TARGET_WARN_FUNC_RETURN
559 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
561 #undef TARGET_DEFAULT_SHORT_ENUMS
562 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
564 #undef TARGET_ALIGN_ANON_BITFIELD
565 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
567 #undef TARGET_NARROW_VOLATILE_BITFIELD
568 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
570 #undef TARGET_CXX_GUARD_TYPE
571 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
573 #undef TARGET_CXX_GUARD_MASK_BIT
574 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
576 #undef TARGET_CXX_GET_COOKIE_SIZE
577 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
579 #undef TARGET_CXX_COOKIE_HAS_SIZE
580 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
582 #undef TARGET_CXX_CDTOR_RETURNS_THIS
583 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
585 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
586 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
588 #undef TARGET_CXX_USE_AEABI_ATEXIT
589 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
591 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
592 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
593 arm_cxx_determine_class_data_visibility
595 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
596 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
598 #undef TARGET_RETURN_IN_MSB
599 #define TARGET_RETURN_IN_MSB arm_return_in_msb
601 #undef TARGET_RETURN_IN_MEMORY
602 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
604 #undef TARGET_MUST_PASS_IN_STACK
605 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
607 #if ARM_UNWIND_INFO
608 #undef TARGET_ASM_UNWIND_EMIT
609 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
611 /* EABI unwinding tables use a different format for the typeinfo tables. */
612 #undef TARGET_ASM_TTYPE
613 #define TARGET_ASM_TTYPE arm_output_ttype
615 #undef TARGET_ARM_EABI_UNWINDER
616 #define TARGET_ARM_EABI_UNWINDER true
618 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
619 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
621 #endif /* ARM_UNWIND_INFO */
623 #undef TARGET_ASM_INIT_SECTIONS
624 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
626 #undef TARGET_DWARF_REGISTER_SPAN
627 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
629 #undef TARGET_CANNOT_COPY_INSN_P
630 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
632 #ifdef HAVE_AS_TLS
633 #undef TARGET_HAVE_TLS
634 #define TARGET_HAVE_TLS true
635 #endif
637 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
638 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
640 #undef TARGET_LEGITIMATE_CONSTANT_P
641 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
643 #undef TARGET_CANNOT_FORCE_CONST_MEM
644 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
646 #undef TARGET_MAX_ANCHOR_OFFSET
647 #define TARGET_MAX_ANCHOR_OFFSET 4095
649 /* The minimum is set such that the total size of the block
650 for a particular anchor is -4088 + 1 + 4095 bytes, which is
651 divisible by eight, ensuring natural spacing of anchors. */
652 #undef TARGET_MIN_ANCHOR_OFFSET
653 #define TARGET_MIN_ANCHOR_OFFSET -4088
655 #undef TARGET_SCHED_ISSUE_RATE
656 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
658 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
659 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
660 arm_first_cycle_multipass_dfa_lookahead
662 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
663 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
664 arm_first_cycle_multipass_dfa_lookahead_guard
666 #undef TARGET_MANGLE_TYPE
667 #define TARGET_MANGLE_TYPE arm_mangle_type
669 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
670 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
672 #undef TARGET_BUILD_BUILTIN_VA_LIST
673 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
674 #undef TARGET_EXPAND_BUILTIN_VA_START
675 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
676 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
677 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
679 #ifdef HAVE_AS_TLS
680 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
681 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
682 #endif
684 #undef TARGET_LEGITIMATE_ADDRESS_P
685 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
687 #undef TARGET_PREFERRED_RELOAD_CLASS
688 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
690 #undef TARGET_PROMOTED_TYPE
691 #define TARGET_PROMOTED_TYPE arm_promoted_type
693 #undef TARGET_SCALAR_MODE_SUPPORTED_P
694 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
696 #undef TARGET_COMPUTE_FRAME_LAYOUT
697 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
699 #undef TARGET_FRAME_POINTER_REQUIRED
700 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
702 #undef TARGET_CAN_ELIMINATE
703 #define TARGET_CAN_ELIMINATE arm_can_eliminate
705 #undef TARGET_CONDITIONAL_REGISTER_USAGE
706 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
708 #undef TARGET_CLASS_LIKELY_SPILLED_P
709 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
711 #undef TARGET_VECTORIZE_BUILTINS
712 #define TARGET_VECTORIZE_BUILTINS
714 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
715 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
716 arm_builtin_vectorized_function
718 #undef TARGET_VECTOR_ALIGNMENT
719 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
721 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
722 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
723 arm_vector_alignment_reachable
725 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
726 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
727 arm_builtin_support_vector_misalignment
729 #undef TARGET_PREFERRED_RENAME_CLASS
730 #define TARGET_PREFERRED_RENAME_CLASS \
731 arm_preferred_rename_class
733 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
734 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
735 arm_vectorize_vec_perm_const_ok
737 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
738 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
739 arm_builtin_vectorization_cost
740 #undef TARGET_VECTORIZE_ADD_STMT_COST
741 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
743 #undef TARGET_CANONICALIZE_COMPARISON
744 #define TARGET_CANONICALIZE_COMPARISON \
745 arm_canonicalize_comparison
747 #undef TARGET_ASAN_SHADOW_OFFSET
748 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
750 #undef MAX_INSN_PER_IT_BLOCK
751 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
753 #undef TARGET_CAN_USE_DOLOOP_P
754 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
756 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
757 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
759 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
760 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
762 #undef TARGET_SCHED_FUSION_PRIORITY
763 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
765 #undef TARGET_ASM_FUNCTION_SECTION
766 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
768 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
769 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
771 #undef TARGET_SECTION_TYPE_FLAGS
772 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
774 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
775 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
777 #undef TARGET_C_EXCESS_PRECISION
778 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
780 /* Although the architecture reserves bits 0 and 1, only the former is
781 used for ARM/Thumb ISA selection in v7 and earlier versions. */
782 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
783 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
785 #undef TARGET_FIXED_CONDITION_CODE_REGS
786 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
788 #undef TARGET_HARD_REGNO_MODE_OK
789 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
791 #undef TARGET_MODES_TIEABLE_P
792 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
794 /* Obstack for minipool constant handling. */
795 static struct obstack minipool_obstack;
796 static char * minipool_startobj;
798 /* The maximum number of insns skipped which
799 will be conditionalised if possible. */
800 static int max_insns_skipped = 5;
802 extern FILE * asm_out_file;
804 /* True if we are currently building a constant table. */
805 int making_const_table;
807 /* The processor for which instructions should be scheduled. */
808 enum processor_type arm_tune = TARGET_CPU_arm_none;
810 /* The current tuning set. */
811 const struct tune_params *current_tune;
813 /* Which floating point hardware to schedule for. */
814 int arm_fpu_attr;
816 /* Used for Thumb call_via trampolines. */
817 rtx thumb_call_via_label[14];
818 static int thumb_call_reg_needed;
820 /* The bits in this mask specify which instruction scheduling options should
821 be used. */
822 unsigned int tune_flags = 0;
824 /* The highest ARM architecture version supported by the
825 target. */
826 enum base_architecture arm_base_arch = BASE_ARCH_0;
828 /* Active target architecture and tuning. */
830 struct arm_build_target arm_active_target;
832 /* The following are used in the arm.md file as equivalents to bits
833 in the above two flag variables. */
835 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
836 int arm_arch3m = 0;
838 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
839 int arm_arch4 = 0;
841 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
842 int arm_arch4t = 0;
844 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
845 int arm_arch5 = 0;
847 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
848 int arm_arch5e = 0;
850 /* Nonzero if this chip supports the ARM Architecture 5TE extensions. */
851 int arm_arch5te = 0;
853 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
854 int arm_arch6 = 0;
856 /* Nonzero if this chip supports the ARM 6K extensions. */
857 int arm_arch6k = 0;
859 /* Nonzero if this chip supports the ARM 6KZ extensions. */
860 int arm_arch6kz = 0;
862 /* Nonzero if instructions present in ARMv6-M can be used. */
863 int arm_arch6m = 0;
865 /* Nonzero if this chip supports the ARM 7 extensions. */
866 int arm_arch7 = 0;
868 /* Nonzero if this chip supports the Large Physical Address Extension. */
869 int arm_arch_lpae = 0;
871 /* Nonzero if instructions not present in the 'M' profile can be used. */
872 int arm_arch_notm = 0;
874 /* Nonzero if instructions present in ARMv7E-M can be used. */
875 int arm_arch7em = 0;
877 /* Nonzero if instructions present in ARMv8 can be used. */
878 int arm_arch8 = 0;
880 /* Nonzero if this chip supports the ARMv8.1 extensions. */
881 int arm_arch8_1 = 0;
883 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */
884 int arm_arch8_2 = 0;
886 /* Nonzero if this chip supports the FP16 instructions extension of ARM
887 Architecture 8.2. */
888 int arm_fp16_inst = 0;
890 /* Nonzero if this chip can benefit from load scheduling. */
891 int arm_ld_sched = 0;
893 /* Nonzero if this chip is a StrongARM. */
894 int arm_tune_strongarm = 0;
896 /* Nonzero if this chip supports Intel Wireless MMX technology. */
897 int arm_arch_iwmmxt = 0;
899 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
900 int arm_arch_iwmmxt2 = 0;
902 /* Nonzero if this chip is an XScale. */
903 int arm_arch_xscale = 0;
905 /* Nonzero if tuning for XScale */
906 int arm_tune_xscale = 0;
908 /* Nonzero if we want to tune for stores that access the write-buffer.
909 This typically means an ARM6 or ARM7 with MMU or MPU. */
910 int arm_tune_wbuf = 0;
912 /* Nonzero if tuning for Cortex-A9. */
913 int arm_tune_cortex_a9 = 0;
915 /* Nonzero if we should define __THUMB_INTERWORK__ in the
916 preprocessor.
917 XXX This is a bit of a hack, it's intended to help work around
918 problems in GLD which doesn't understand that armv5t code is
919 interworking clean. */
920 int arm_cpp_interwork = 0;
922 /* Nonzero if chip supports Thumb 1. */
923 int arm_arch_thumb1;
925 /* Nonzero if chip supports Thumb 2. */
926 int arm_arch_thumb2;
928 /* Nonzero if chip supports integer division instruction. */
929 int arm_arch_arm_hwdiv;
930 int arm_arch_thumb_hwdiv;
932 /* Nonzero if chip disallows volatile memory access in IT block. */
933 int arm_arch_no_volatile_ce;
935 /* Nonzero if we should use Neon to handle 64-bits operations rather
936 than core registers. */
937 int prefer_neon_for_64bits = 0;
939 /* Nonzero if we shouldn't use literal pools. */
940 bool arm_disable_literal_pool = false;
942 /* The register number to be used for the PIC offset register. */
943 unsigned arm_pic_register = INVALID_REGNUM;
945 enum arm_pcs arm_pcs_default;
947 /* For an explanation of these variables, see final_prescan_insn below. */
948 int arm_ccfsm_state;
949 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
950 enum arm_cond_code arm_current_cc;
952 rtx arm_target_insn;
953 int arm_target_label;
954 /* The number of conditionally executed insns, including the current insn. */
955 int arm_condexec_count = 0;
956 /* A bitmask specifying the patterns for the IT block.
957 Zero means do not output an IT block before this insn. */
958 int arm_condexec_mask = 0;
959 /* The number of bits used in arm_condexec_mask. */
960 int arm_condexec_masklen = 0;
962 /* Nonzero if chip supports the ARMv8 CRC instructions. */
963 int arm_arch_crc = 0;
965 /* Nonzero if chip supports the ARMv8-M security extensions. */
966 int arm_arch_cmse = 0;
968 /* Nonzero if the core has a very small, high-latency, multiply unit. */
969 int arm_m_profile_small_mul = 0;
971 /* The condition codes of the ARM, and the inverse function. */
972 static const char * const arm_condition_codes[] =
974 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
975 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
978 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
979 int arm_regs_in_sequence[] =
981 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
984 #define ARM_LSL_NAME "lsl"
985 #define streq(string1, string2) (strcmp (string1, string2) == 0)
987 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
988 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
989 | (1 << PIC_OFFSET_TABLE_REGNUM)))
991 /* Initialization code. */
993 struct cpu_tune
995 enum processor_type scheduler;
996 unsigned int tune_flags;
997 const struct tune_params *tune;
1000 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
1001 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1003 num_slots, \
1004 l1_size, \
1005 l1_line_size \
1008 /* arm generic vectorizer costs. */
1009 static const
1010 struct cpu_vec_costs arm_default_vec_cost = {
1011 1, /* scalar_stmt_cost. */
1012 1, /* scalar load_cost. */
1013 1, /* scalar_store_cost. */
1014 1, /* vec_stmt_cost. */
1015 1, /* vec_to_scalar_cost. */
1016 1, /* scalar_to_vec_cost. */
1017 1, /* vec_align_load_cost. */
1018 1, /* vec_unalign_load_cost. */
1019 1, /* vec_unalign_store_cost. */
1020 1, /* vec_store_cost. */
1021 3, /* cond_taken_branch_cost. */
1022 1, /* cond_not_taken_branch_cost. */
1025 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
1026 #include "aarch-cost-tables.h"
1030 const struct cpu_cost_table cortexa9_extra_costs =
1032 /* ALU */
1034 0, /* arith. */
1035 0, /* logical. */
1036 0, /* shift. */
1037 COSTS_N_INSNS (1), /* shift_reg. */
1038 COSTS_N_INSNS (1), /* arith_shift. */
1039 COSTS_N_INSNS (2), /* arith_shift_reg. */
1040 0, /* log_shift. */
1041 COSTS_N_INSNS (1), /* log_shift_reg. */
1042 COSTS_N_INSNS (1), /* extend. */
1043 COSTS_N_INSNS (2), /* extend_arith. */
1044 COSTS_N_INSNS (1), /* bfi. */
1045 COSTS_N_INSNS (1), /* bfx. */
1046 0, /* clz. */
1047 0, /* rev. */
1048 0, /* non_exec. */
1049 true /* non_exec_costs_exec. */
1052 /* MULT SImode */
1054 COSTS_N_INSNS (3), /* simple. */
1055 COSTS_N_INSNS (3), /* flag_setting. */
1056 COSTS_N_INSNS (2), /* extend. */
1057 COSTS_N_INSNS (3), /* add. */
1058 COSTS_N_INSNS (2), /* extend_add. */
1059 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1061 /* MULT DImode */
1063 0, /* simple (N/A). */
1064 0, /* flag_setting (N/A). */
1065 COSTS_N_INSNS (4), /* extend. */
1066 0, /* add (N/A). */
1067 COSTS_N_INSNS (4), /* extend_add. */
1068 0 /* idiv (N/A). */
1071 /* LD/ST */
1073 COSTS_N_INSNS (2), /* load. */
1074 COSTS_N_INSNS (2), /* load_sign_extend. */
1075 COSTS_N_INSNS (2), /* ldrd. */
1076 COSTS_N_INSNS (2), /* ldm_1st. */
1077 1, /* ldm_regs_per_insn_1st. */
1078 2, /* ldm_regs_per_insn_subsequent. */
1079 COSTS_N_INSNS (5), /* loadf. */
1080 COSTS_N_INSNS (5), /* loadd. */
1081 COSTS_N_INSNS (1), /* load_unaligned. */
1082 COSTS_N_INSNS (2), /* store. */
1083 COSTS_N_INSNS (2), /* strd. */
1084 COSTS_N_INSNS (2), /* stm_1st. */
1085 1, /* stm_regs_per_insn_1st. */
1086 2, /* stm_regs_per_insn_subsequent. */
1087 COSTS_N_INSNS (1), /* storef. */
1088 COSTS_N_INSNS (1), /* stored. */
1089 COSTS_N_INSNS (1), /* store_unaligned. */
1090 COSTS_N_INSNS (1), /* loadv. */
1091 COSTS_N_INSNS (1) /* storev. */
1094 /* FP SFmode */
1096 COSTS_N_INSNS (14), /* div. */
1097 COSTS_N_INSNS (4), /* mult. */
1098 COSTS_N_INSNS (7), /* mult_addsub. */
1099 COSTS_N_INSNS (30), /* fma. */
1100 COSTS_N_INSNS (3), /* addsub. */
1101 COSTS_N_INSNS (1), /* fpconst. */
1102 COSTS_N_INSNS (1), /* neg. */
1103 COSTS_N_INSNS (3), /* compare. */
1104 COSTS_N_INSNS (3), /* widen. */
1105 COSTS_N_INSNS (3), /* narrow. */
1106 COSTS_N_INSNS (3), /* toint. */
1107 COSTS_N_INSNS (3), /* fromint. */
1108 COSTS_N_INSNS (3) /* roundint. */
1110 /* FP DFmode */
1112 COSTS_N_INSNS (24), /* div. */
1113 COSTS_N_INSNS (5), /* mult. */
1114 COSTS_N_INSNS (8), /* mult_addsub. */
1115 COSTS_N_INSNS (30), /* fma. */
1116 COSTS_N_INSNS (3), /* addsub. */
1117 COSTS_N_INSNS (1), /* fpconst. */
1118 COSTS_N_INSNS (1), /* neg. */
1119 COSTS_N_INSNS (3), /* compare. */
1120 COSTS_N_INSNS (3), /* widen. */
1121 COSTS_N_INSNS (3), /* narrow. */
1122 COSTS_N_INSNS (3), /* toint. */
1123 COSTS_N_INSNS (3), /* fromint. */
1124 COSTS_N_INSNS (3) /* roundint. */
1127 /* Vector */
1129 COSTS_N_INSNS (1) /* alu. */
1133 const struct cpu_cost_table cortexa8_extra_costs =
1135 /* ALU */
1137 0, /* arith. */
1138 0, /* logical. */
1139 COSTS_N_INSNS (1), /* shift. */
1140 0, /* shift_reg. */
1141 COSTS_N_INSNS (1), /* arith_shift. */
1142 0, /* arith_shift_reg. */
1143 COSTS_N_INSNS (1), /* log_shift. */
1144 0, /* log_shift_reg. */
1145 0, /* extend. */
1146 0, /* extend_arith. */
1147 0, /* bfi. */
1148 0, /* bfx. */
1149 0, /* clz. */
1150 0, /* rev. */
1151 0, /* non_exec. */
1152 true /* non_exec_costs_exec. */
1155 /* MULT SImode */
1157 COSTS_N_INSNS (1), /* simple. */
1158 COSTS_N_INSNS (1), /* flag_setting. */
1159 COSTS_N_INSNS (1), /* extend. */
1160 COSTS_N_INSNS (1), /* add. */
1161 COSTS_N_INSNS (1), /* extend_add. */
1162 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1164 /* MULT DImode */
1166 0, /* simple (N/A). */
1167 0, /* flag_setting (N/A). */
1168 COSTS_N_INSNS (2), /* extend. */
1169 0, /* add (N/A). */
1170 COSTS_N_INSNS (2), /* extend_add. */
1171 0 /* idiv (N/A). */
1174 /* LD/ST */
1176 COSTS_N_INSNS (1), /* load. */
1177 COSTS_N_INSNS (1), /* load_sign_extend. */
1178 COSTS_N_INSNS (1), /* ldrd. */
1179 COSTS_N_INSNS (1), /* ldm_1st. */
1180 1, /* ldm_regs_per_insn_1st. */
1181 2, /* ldm_regs_per_insn_subsequent. */
1182 COSTS_N_INSNS (1), /* loadf. */
1183 COSTS_N_INSNS (1), /* loadd. */
1184 COSTS_N_INSNS (1), /* load_unaligned. */
1185 COSTS_N_INSNS (1), /* store. */
1186 COSTS_N_INSNS (1), /* strd. */
1187 COSTS_N_INSNS (1), /* stm_1st. */
1188 1, /* stm_regs_per_insn_1st. */
1189 2, /* stm_regs_per_insn_subsequent. */
1190 COSTS_N_INSNS (1), /* storef. */
1191 COSTS_N_INSNS (1), /* stored. */
1192 COSTS_N_INSNS (1), /* store_unaligned. */
1193 COSTS_N_INSNS (1), /* loadv. */
1194 COSTS_N_INSNS (1) /* storev. */
1197 /* FP SFmode */
1199 COSTS_N_INSNS (36), /* div. */
1200 COSTS_N_INSNS (11), /* mult. */
1201 COSTS_N_INSNS (20), /* mult_addsub. */
1202 COSTS_N_INSNS (30), /* fma. */
1203 COSTS_N_INSNS (9), /* addsub. */
1204 COSTS_N_INSNS (3), /* fpconst. */
1205 COSTS_N_INSNS (3), /* neg. */
1206 COSTS_N_INSNS (6), /* compare. */
1207 COSTS_N_INSNS (4), /* widen. */
1208 COSTS_N_INSNS (4), /* narrow. */
1209 COSTS_N_INSNS (8), /* toint. */
1210 COSTS_N_INSNS (8), /* fromint. */
1211 COSTS_N_INSNS (8) /* roundint. */
1213 /* FP DFmode */
1215 COSTS_N_INSNS (64), /* div. */
1216 COSTS_N_INSNS (16), /* mult. */
1217 COSTS_N_INSNS (25), /* mult_addsub. */
1218 COSTS_N_INSNS (30), /* fma. */
1219 COSTS_N_INSNS (9), /* addsub. */
1220 COSTS_N_INSNS (3), /* fpconst. */
1221 COSTS_N_INSNS (3), /* neg. */
1222 COSTS_N_INSNS (6), /* compare. */
1223 COSTS_N_INSNS (6), /* widen. */
1224 COSTS_N_INSNS (6), /* narrow. */
1225 COSTS_N_INSNS (8), /* toint. */
1226 COSTS_N_INSNS (8), /* fromint. */
1227 COSTS_N_INSNS (8) /* roundint. */
1230 /* Vector */
1232 COSTS_N_INSNS (1) /* alu. */
1236 const struct cpu_cost_table cortexa5_extra_costs =
1238 /* ALU */
1240 0, /* arith. */
1241 0, /* logical. */
1242 COSTS_N_INSNS (1), /* shift. */
1243 COSTS_N_INSNS (1), /* shift_reg. */
1244 COSTS_N_INSNS (1), /* arith_shift. */
1245 COSTS_N_INSNS (1), /* arith_shift_reg. */
1246 COSTS_N_INSNS (1), /* log_shift. */
1247 COSTS_N_INSNS (1), /* log_shift_reg. */
1248 COSTS_N_INSNS (1), /* extend. */
1249 COSTS_N_INSNS (1), /* extend_arith. */
1250 COSTS_N_INSNS (1), /* bfi. */
1251 COSTS_N_INSNS (1), /* bfx. */
1252 COSTS_N_INSNS (1), /* clz. */
1253 COSTS_N_INSNS (1), /* rev. */
1254 0, /* non_exec. */
1255 true /* non_exec_costs_exec. */
1259 /* MULT SImode */
1261 0, /* simple. */
1262 COSTS_N_INSNS (1), /* flag_setting. */
1263 COSTS_N_INSNS (1), /* extend. */
1264 COSTS_N_INSNS (1), /* add. */
1265 COSTS_N_INSNS (1), /* extend_add. */
1266 COSTS_N_INSNS (7) /* idiv. */
1268 /* MULT DImode */
1270 0, /* simple (N/A). */
1271 0, /* flag_setting (N/A). */
1272 COSTS_N_INSNS (1), /* extend. */
1273 0, /* add. */
1274 COSTS_N_INSNS (2), /* extend_add. */
1275 0 /* idiv (N/A). */
1278 /* LD/ST */
1280 COSTS_N_INSNS (1), /* load. */
1281 COSTS_N_INSNS (1), /* load_sign_extend. */
1282 COSTS_N_INSNS (6), /* ldrd. */
1283 COSTS_N_INSNS (1), /* ldm_1st. */
1284 1, /* ldm_regs_per_insn_1st. */
1285 2, /* ldm_regs_per_insn_subsequent. */
1286 COSTS_N_INSNS (2), /* loadf. */
1287 COSTS_N_INSNS (4), /* loadd. */
1288 COSTS_N_INSNS (1), /* load_unaligned. */
1289 COSTS_N_INSNS (1), /* store. */
1290 COSTS_N_INSNS (3), /* strd. */
1291 COSTS_N_INSNS (1), /* stm_1st. */
1292 1, /* stm_regs_per_insn_1st. */
1293 2, /* stm_regs_per_insn_subsequent. */
1294 COSTS_N_INSNS (2), /* storef. */
1295 COSTS_N_INSNS (2), /* stored. */
1296 COSTS_N_INSNS (1), /* store_unaligned. */
1297 COSTS_N_INSNS (1), /* loadv. */
1298 COSTS_N_INSNS (1) /* storev. */
1301 /* FP SFmode */
1303 COSTS_N_INSNS (15), /* div. */
1304 COSTS_N_INSNS (3), /* mult. */
1305 COSTS_N_INSNS (7), /* mult_addsub. */
1306 COSTS_N_INSNS (7), /* fma. */
1307 COSTS_N_INSNS (3), /* addsub. */
1308 COSTS_N_INSNS (3), /* fpconst. */
1309 COSTS_N_INSNS (3), /* neg. */
1310 COSTS_N_INSNS (3), /* compare. */
1311 COSTS_N_INSNS (3), /* widen. */
1312 COSTS_N_INSNS (3), /* narrow. */
1313 COSTS_N_INSNS (3), /* toint. */
1314 COSTS_N_INSNS (3), /* fromint. */
1315 COSTS_N_INSNS (3) /* roundint. */
1317 /* FP DFmode */
1319 COSTS_N_INSNS (30), /* div. */
1320 COSTS_N_INSNS (6), /* mult. */
1321 COSTS_N_INSNS (10), /* mult_addsub. */
1322 COSTS_N_INSNS (7), /* fma. */
1323 COSTS_N_INSNS (3), /* addsub. */
1324 COSTS_N_INSNS (3), /* fpconst. */
1325 COSTS_N_INSNS (3), /* neg. */
1326 COSTS_N_INSNS (3), /* compare. */
1327 COSTS_N_INSNS (3), /* widen. */
1328 COSTS_N_INSNS (3), /* narrow. */
1329 COSTS_N_INSNS (3), /* toint. */
1330 COSTS_N_INSNS (3), /* fromint. */
1331 COSTS_N_INSNS (3) /* roundint. */
1334 /* Vector */
1336 COSTS_N_INSNS (1) /* alu. */
1341 const struct cpu_cost_table cortexa7_extra_costs =
1343 /* ALU */
1345 0, /* arith. */
1346 0, /* logical. */
1347 COSTS_N_INSNS (1), /* shift. */
1348 COSTS_N_INSNS (1), /* shift_reg. */
1349 COSTS_N_INSNS (1), /* arith_shift. */
1350 COSTS_N_INSNS (1), /* arith_shift_reg. */
1351 COSTS_N_INSNS (1), /* log_shift. */
1352 COSTS_N_INSNS (1), /* log_shift_reg. */
1353 COSTS_N_INSNS (1), /* extend. */
1354 COSTS_N_INSNS (1), /* extend_arith. */
1355 COSTS_N_INSNS (1), /* bfi. */
1356 COSTS_N_INSNS (1), /* bfx. */
1357 COSTS_N_INSNS (1), /* clz. */
1358 COSTS_N_INSNS (1), /* rev. */
1359 0, /* non_exec. */
1360 true /* non_exec_costs_exec. */
1364 /* MULT SImode */
1366 0, /* simple. */
1367 COSTS_N_INSNS (1), /* flag_setting. */
1368 COSTS_N_INSNS (1), /* extend. */
1369 COSTS_N_INSNS (1), /* add. */
1370 COSTS_N_INSNS (1), /* extend_add. */
1371 COSTS_N_INSNS (7) /* idiv. */
1373 /* MULT DImode */
1375 0, /* simple (N/A). */
1376 0, /* flag_setting (N/A). */
1377 COSTS_N_INSNS (1), /* extend. */
1378 0, /* add. */
1379 COSTS_N_INSNS (2), /* extend_add. */
1380 0 /* idiv (N/A). */
1383 /* LD/ST */
1385 COSTS_N_INSNS (1), /* load. */
1386 COSTS_N_INSNS (1), /* load_sign_extend. */
1387 COSTS_N_INSNS (3), /* ldrd. */
1388 COSTS_N_INSNS (1), /* ldm_1st. */
1389 1, /* ldm_regs_per_insn_1st. */
1390 2, /* ldm_regs_per_insn_subsequent. */
1391 COSTS_N_INSNS (2), /* loadf. */
1392 COSTS_N_INSNS (2), /* loadd. */
1393 COSTS_N_INSNS (1), /* load_unaligned. */
1394 COSTS_N_INSNS (1), /* store. */
1395 COSTS_N_INSNS (3), /* strd. */
1396 COSTS_N_INSNS (1), /* stm_1st. */
1397 1, /* stm_regs_per_insn_1st. */
1398 2, /* stm_regs_per_insn_subsequent. */
1399 COSTS_N_INSNS (2), /* storef. */
1400 COSTS_N_INSNS (2), /* stored. */
1401 COSTS_N_INSNS (1), /* store_unaligned. */
1402 COSTS_N_INSNS (1), /* loadv. */
1403 COSTS_N_INSNS (1) /* storev. */
1406 /* FP SFmode */
1408 COSTS_N_INSNS (15), /* div. */
1409 COSTS_N_INSNS (3), /* mult. */
1410 COSTS_N_INSNS (7), /* mult_addsub. */
1411 COSTS_N_INSNS (7), /* fma. */
1412 COSTS_N_INSNS (3), /* addsub. */
1413 COSTS_N_INSNS (3), /* fpconst. */
1414 COSTS_N_INSNS (3), /* neg. */
1415 COSTS_N_INSNS (3), /* compare. */
1416 COSTS_N_INSNS (3), /* widen. */
1417 COSTS_N_INSNS (3), /* narrow. */
1418 COSTS_N_INSNS (3), /* toint. */
1419 COSTS_N_INSNS (3), /* fromint. */
1420 COSTS_N_INSNS (3) /* roundint. */
1422 /* FP DFmode */
1424 COSTS_N_INSNS (30), /* div. */
1425 COSTS_N_INSNS (6), /* mult. */
1426 COSTS_N_INSNS (10), /* mult_addsub. */
1427 COSTS_N_INSNS (7), /* fma. */
1428 COSTS_N_INSNS (3), /* addsub. */
1429 COSTS_N_INSNS (3), /* fpconst. */
1430 COSTS_N_INSNS (3), /* neg. */
1431 COSTS_N_INSNS (3), /* compare. */
1432 COSTS_N_INSNS (3), /* widen. */
1433 COSTS_N_INSNS (3), /* narrow. */
1434 COSTS_N_INSNS (3), /* toint. */
1435 COSTS_N_INSNS (3), /* fromint. */
1436 COSTS_N_INSNS (3) /* roundint. */
1439 /* Vector */
1441 COSTS_N_INSNS (1) /* alu. */
1445 const struct cpu_cost_table cortexa12_extra_costs =
1447 /* ALU */
1449 0, /* arith. */
1450 0, /* logical. */
1451 0, /* shift. */
1452 COSTS_N_INSNS (1), /* shift_reg. */
1453 COSTS_N_INSNS (1), /* arith_shift. */
1454 COSTS_N_INSNS (1), /* arith_shift_reg. */
1455 COSTS_N_INSNS (1), /* log_shift. */
1456 COSTS_N_INSNS (1), /* log_shift_reg. */
1457 0, /* extend. */
1458 COSTS_N_INSNS (1), /* extend_arith. */
1459 0, /* bfi. */
1460 COSTS_N_INSNS (1), /* bfx. */
1461 COSTS_N_INSNS (1), /* clz. */
1462 COSTS_N_INSNS (1), /* rev. */
1463 0, /* non_exec. */
1464 true /* non_exec_costs_exec. */
1466 /* MULT SImode */
1469 COSTS_N_INSNS (2), /* simple. */
1470 COSTS_N_INSNS (3), /* flag_setting. */
1471 COSTS_N_INSNS (2), /* extend. */
1472 COSTS_N_INSNS (3), /* add. */
1473 COSTS_N_INSNS (2), /* extend_add. */
1474 COSTS_N_INSNS (18) /* idiv. */
1476 /* MULT DImode */
1478 0, /* simple (N/A). */
1479 0, /* flag_setting (N/A). */
1480 COSTS_N_INSNS (3), /* extend. */
1481 0, /* add (N/A). */
1482 COSTS_N_INSNS (3), /* extend_add. */
1483 0 /* idiv (N/A). */
1486 /* LD/ST */
1488 COSTS_N_INSNS (3), /* load. */
1489 COSTS_N_INSNS (3), /* load_sign_extend. */
1490 COSTS_N_INSNS (3), /* ldrd. */
1491 COSTS_N_INSNS (3), /* ldm_1st. */
1492 1, /* ldm_regs_per_insn_1st. */
1493 2, /* ldm_regs_per_insn_subsequent. */
1494 COSTS_N_INSNS (3), /* loadf. */
1495 COSTS_N_INSNS (3), /* loadd. */
1496 0, /* load_unaligned. */
1497 0, /* store. */
1498 0, /* strd. */
1499 0, /* stm_1st. */
1500 1, /* stm_regs_per_insn_1st. */
1501 2, /* stm_regs_per_insn_subsequent. */
1502 COSTS_N_INSNS (2), /* storef. */
1503 COSTS_N_INSNS (2), /* stored. */
1504 0, /* store_unaligned. */
1505 COSTS_N_INSNS (1), /* loadv. */
1506 COSTS_N_INSNS (1) /* storev. */
1509 /* FP SFmode */
1511 COSTS_N_INSNS (17), /* div. */
1512 COSTS_N_INSNS (4), /* mult. */
1513 COSTS_N_INSNS (8), /* mult_addsub. */
1514 COSTS_N_INSNS (8), /* fma. */
1515 COSTS_N_INSNS (4), /* addsub. */
1516 COSTS_N_INSNS (2), /* fpconst. */
1517 COSTS_N_INSNS (2), /* neg. */
1518 COSTS_N_INSNS (2), /* compare. */
1519 COSTS_N_INSNS (4), /* widen. */
1520 COSTS_N_INSNS (4), /* narrow. */
1521 COSTS_N_INSNS (4), /* toint. */
1522 COSTS_N_INSNS (4), /* fromint. */
1523 COSTS_N_INSNS (4) /* roundint. */
1525 /* FP DFmode */
1527 COSTS_N_INSNS (31), /* div. */
1528 COSTS_N_INSNS (4), /* mult. */
1529 COSTS_N_INSNS (8), /* mult_addsub. */
1530 COSTS_N_INSNS (8), /* fma. */
1531 COSTS_N_INSNS (4), /* addsub. */
1532 COSTS_N_INSNS (2), /* fpconst. */
1533 COSTS_N_INSNS (2), /* neg. */
1534 COSTS_N_INSNS (2), /* compare. */
1535 COSTS_N_INSNS (4), /* widen. */
1536 COSTS_N_INSNS (4), /* narrow. */
1537 COSTS_N_INSNS (4), /* toint. */
1538 COSTS_N_INSNS (4), /* fromint. */
1539 COSTS_N_INSNS (4) /* roundint. */
1542 /* Vector */
1544 COSTS_N_INSNS (1) /* alu. */
1548 const struct cpu_cost_table cortexa15_extra_costs =
1550 /* ALU */
1552 0, /* arith. */
1553 0, /* logical. */
1554 0, /* shift. */
1555 0, /* shift_reg. */
1556 COSTS_N_INSNS (1), /* arith_shift. */
1557 COSTS_N_INSNS (1), /* arith_shift_reg. */
1558 COSTS_N_INSNS (1), /* log_shift. */
1559 COSTS_N_INSNS (1), /* log_shift_reg. */
1560 0, /* extend. */
1561 COSTS_N_INSNS (1), /* extend_arith. */
1562 COSTS_N_INSNS (1), /* bfi. */
1563 0, /* bfx. */
1564 0, /* clz. */
1565 0, /* rev. */
1566 0, /* non_exec. */
1567 true /* non_exec_costs_exec. */
1569 /* MULT SImode */
1572 COSTS_N_INSNS (2), /* simple. */
1573 COSTS_N_INSNS (3), /* flag_setting. */
1574 COSTS_N_INSNS (2), /* extend. */
1575 COSTS_N_INSNS (2), /* add. */
1576 COSTS_N_INSNS (2), /* extend_add. */
1577 COSTS_N_INSNS (18) /* idiv. */
1579 /* MULT DImode */
1581 0, /* simple (N/A). */
1582 0, /* flag_setting (N/A). */
1583 COSTS_N_INSNS (3), /* extend. */
1584 0, /* add (N/A). */
1585 COSTS_N_INSNS (3), /* extend_add. */
1586 0 /* idiv (N/A). */
1589 /* LD/ST */
1591 COSTS_N_INSNS (3), /* load. */
1592 COSTS_N_INSNS (3), /* load_sign_extend. */
1593 COSTS_N_INSNS (3), /* ldrd. */
1594 COSTS_N_INSNS (4), /* ldm_1st. */
1595 1, /* ldm_regs_per_insn_1st. */
1596 2, /* ldm_regs_per_insn_subsequent. */
1597 COSTS_N_INSNS (4), /* loadf. */
1598 COSTS_N_INSNS (4), /* loadd. */
1599 0, /* load_unaligned. */
1600 0, /* store. */
1601 0, /* strd. */
1602 COSTS_N_INSNS (1), /* stm_1st. */
1603 1, /* stm_regs_per_insn_1st. */
1604 2, /* stm_regs_per_insn_subsequent. */
1605 0, /* storef. */
1606 0, /* stored. */
1607 0, /* store_unaligned. */
1608 COSTS_N_INSNS (1), /* loadv. */
1609 COSTS_N_INSNS (1) /* storev. */
1612 /* FP SFmode */
1614 COSTS_N_INSNS (17), /* div. */
1615 COSTS_N_INSNS (4), /* mult. */
1616 COSTS_N_INSNS (8), /* mult_addsub. */
1617 COSTS_N_INSNS (8), /* fma. */
1618 COSTS_N_INSNS (4), /* addsub. */
1619 COSTS_N_INSNS (2), /* fpconst. */
1620 COSTS_N_INSNS (2), /* neg. */
1621 COSTS_N_INSNS (5), /* compare. */
1622 COSTS_N_INSNS (4), /* widen. */
1623 COSTS_N_INSNS (4), /* narrow. */
1624 COSTS_N_INSNS (4), /* toint. */
1625 COSTS_N_INSNS (4), /* fromint. */
1626 COSTS_N_INSNS (4) /* roundint. */
1628 /* FP DFmode */
1630 COSTS_N_INSNS (31), /* div. */
1631 COSTS_N_INSNS (4), /* mult. */
1632 COSTS_N_INSNS (8), /* mult_addsub. */
1633 COSTS_N_INSNS (8), /* fma. */
1634 COSTS_N_INSNS (4), /* addsub. */
1635 COSTS_N_INSNS (2), /* fpconst. */
1636 COSTS_N_INSNS (2), /* neg. */
1637 COSTS_N_INSNS (2), /* compare. */
1638 COSTS_N_INSNS (4), /* widen. */
1639 COSTS_N_INSNS (4), /* narrow. */
1640 COSTS_N_INSNS (4), /* toint. */
1641 COSTS_N_INSNS (4), /* fromint. */
1642 COSTS_N_INSNS (4) /* roundint. */
1645 /* Vector */
1647 COSTS_N_INSNS (1) /* alu. */
1651 const struct cpu_cost_table v7m_extra_costs =
1653 /* ALU */
1655 0, /* arith. */
1656 0, /* logical. */
1657 0, /* shift. */
1658 0, /* shift_reg. */
1659 0, /* arith_shift. */
1660 COSTS_N_INSNS (1), /* arith_shift_reg. */
1661 0, /* log_shift. */
1662 COSTS_N_INSNS (1), /* log_shift_reg. */
1663 0, /* extend. */
1664 COSTS_N_INSNS (1), /* extend_arith. */
1665 0, /* bfi. */
1666 0, /* bfx. */
1667 0, /* clz. */
1668 0, /* rev. */
1669 COSTS_N_INSNS (1), /* non_exec. */
1670 false /* non_exec_costs_exec. */
1673 /* MULT SImode */
1675 COSTS_N_INSNS (1), /* simple. */
1676 COSTS_N_INSNS (1), /* flag_setting. */
1677 COSTS_N_INSNS (2), /* extend. */
1678 COSTS_N_INSNS (1), /* add. */
1679 COSTS_N_INSNS (3), /* extend_add. */
1680 COSTS_N_INSNS (8) /* idiv. */
1682 /* MULT DImode */
1684 0, /* simple (N/A). */
1685 0, /* flag_setting (N/A). */
1686 COSTS_N_INSNS (2), /* extend. */
1687 0, /* add (N/A). */
1688 COSTS_N_INSNS (3), /* extend_add. */
1689 0 /* idiv (N/A). */
1692 /* LD/ST */
1694 COSTS_N_INSNS (2), /* load. */
1695 0, /* load_sign_extend. */
1696 COSTS_N_INSNS (3), /* ldrd. */
1697 COSTS_N_INSNS (2), /* ldm_1st. */
1698 1, /* ldm_regs_per_insn_1st. */
1699 1, /* ldm_regs_per_insn_subsequent. */
1700 COSTS_N_INSNS (2), /* loadf. */
1701 COSTS_N_INSNS (3), /* loadd. */
1702 COSTS_N_INSNS (1), /* load_unaligned. */
1703 COSTS_N_INSNS (2), /* store. */
1704 COSTS_N_INSNS (3), /* strd. */
1705 COSTS_N_INSNS (2), /* stm_1st. */
1706 1, /* stm_regs_per_insn_1st. */
1707 1, /* stm_regs_per_insn_subsequent. */
1708 COSTS_N_INSNS (2), /* storef. */
1709 COSTS_N_INSNS (3), /* stored. */
1710 COSTS_N_INSNS (1), /* store_unaligned. */
1711 COSTS_N_INSNS (1), /* loadv. */
1712 COSTS_N_INSNS (1) /* storev. */
1715 /* FP SFmode */
1717 COSTS_N_INSNS (7), /* div. */
1718 COSTS_N_INSNS (2), /* mult. */
1719 COSTS_N_INSNS (5), /* mult_addsub. */
1720 COSTS_N_INSNS (3), /* fma. */
1721 COSTS_N_INSNS (1), /* addsub. */
1722 0, /* fpconst. */
1723 0, /* neg. */
1724 0, /* compare. */
1725 0, /* widen. */
1726 0, /* narrow. */
1727 0, /* toint. */
1728 0, /* fromint. */
1729 0 /* roundint. */
1731 /* FP DFmode */
1733 COSTS_N_INSNS (15), /* div. */
1734 COSTS_N_INSNS (5), /* mult. */
1735 COSTS_N_INSNS (7), /* mult_addsub. */
1736 COSTS_N_INSNS (7), /* fma. */
1737 COSTS_N_INSNS (3), /* addsub. */
1738 0, /* fpconst. */
1739 0, /* neg. */
1740 0, /* compare. */
1741 0, /* widen. */
1742 0, /* narrow. */
1743 0, /* toint. */
1744 0, /* fromint. */
1745 0 /* roundint. */
1748 /* Vector */
1750 COSTS_N_INSNS (1) /* alu. */
1754 const struct tune_params arm_slowmul_tune =
1756 &generic_extra_costs, /* Insn extra costs. */
1757 NULL, /* Sched adj cost. */
1758 arm_default_branch_cost,
1759 &arm_default_vec_cost,
1760 3, /* Constant limit. */
1761 5, /* Max cond insns. */
1762 8, /* Memset max inline. */
1763 1, /* Issue rate. */
1764 ARM_PREFETCH_NOT_BENEFICIAL,
1765 tune_params::PREF_CONST_POOL_TRUE,
1766 tune_params::PREF_LDRD_FALSE,
1767 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1768 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1769 tune_params::DISPARAGE_FLAGS_NEITHER,
1770 tune_params::PREF_NEON_64_FALSE,
1771 tune_params::PREF_NEON_STRINGOPS_FALSE,
1772 tune_params::FUSE_NOTHING,
1773 tune_params::SCHED_AUTOPREF_OFF
1776 const struct tune_params arm_fastmul_tune =
1778 &generic_extra_costs, /* Insn extra costs. */
1779 NULL, /* Sched adj cost. */
1780 arm_default_branch_cost,
1781 &arm_default_vec_cost,
1782 1, /* Constant limit. */
1783 5, /* Max cond insns. */
1784 8, /* Memset max inline. */
1785 1, /* Issue rate. */
1786 ARM_PREFETCH_NOT_BENEFICIAL,
1787 tune_params::PREF_CONST_POOL_TRUE,
1788 tune_params::PREF_LDRD_FALSE,
1789 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1790 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1791 tune_params::DISPARAGE_FLAGS_NEITHER,
1792 tune_params::PREF_NEON_64_FALSE,
1793 tune_params::PREF_NEON_STRINGOPS_FALSE,
1794 tune_params::FUSE_NOTHING,
1795 tune_params::SCHED_AUTOPREF_OFF
1798 /* StrongARM has early execution of branches, so a sequence that is worth
1799 skipping is shorter. Set max_insns_skipped to a lower value. */
1801 const struct tune_params arm_strongarm_tune =
1803 &generic_extra_costs, /* Insn extra costs. */
1804 NULL, /* Sched adj cost. */
1805 arm_default_branch_cost,
1806 &arm_default_vec_cost,
1807 1, /* Constant limit. */
1808 3, /* Max cond insns. */
1809 8, /* Memset max inline. */
1810 1, /* Issue rate. */
1811 ARM_PREFETCH_NOT_BENEFICIAL,
1812 tune_params::PREF_CONST_POOL_TRUE,
1813 tune_params::PREF_LDRD_FALSE,
1814 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1815 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1816 tune_params::DISPARAGE_FLAGS_NEITHER,
1817 tune_params::PREF_NEON_64_FALSE,
1818 tune_params::PREF_NEON_STRINGOPS_FALSE,
1819 tune_params::FUSE_NOTHING,
1820 tune_params::SCHED_AUTOPREF_OFF
1823 const struct tune_params arm_xscale_tune =
1825 &generic_extra_costs, /* Insn extra costs. */
1826 xscale_sched_adjust_cost,
1827 arm_default_branch_cost,
1828 &arm_default_vec_cost,
1829 2, /* Constant limit. */
1830 3, /* Max cond insns. */
1831 8, /* Memset max inline. */
1832 1, /* Issue rate. */
1833 ARM_PREFETCH_NOT_BENEFICIAL,
1834 tune_params::PREF_CONST_POOL_TRUE,
1835 tune_params::PREF_LDRD_FALSE,
1836 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1837 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1838 tune_params::DISPARAGE_FLAGS_NEITHER,
1839 tune_params::PREF_NEON_64_FALSE,
1840 tune_params::PREF_NEON_STRINGOPS_FALSE,
1841 tune_params::FUSE_NOTHING,
1842 tune_params::SCHED_AUTOPREF_OFF
1845 const struct tune_params arm_9e_tune =
1847 &generic_extra_costs, /* Insn extra costs. */
1848 NULL, /* Sched adj cost. */
1849 arm_default_branch_cost,
1850 &arm_default_vec_cost,
1851 1, /* Constant limit. */
1852 5, /* Max cond insns. */
1853 8, /* Memset max inline. */
1854 1, /* Issue rate. */
1855 ARM_PREFETCH_NOT_BENEFICIAL,
1856 tune_params::PREF_CONST_POOL_TRUE,
1857 tune_params::PREF_LDRD_FALSE,
1858 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1859 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1860 tune_params::DISPARAGE_FLAGS_NEITHER,
1861 tune_params::PREF_NEON_64_FALSE,
1862 tune_params::PREF_NEON_STRINGOPS_FALSE,
1863 tune_params::FUSE_NOTHING,
1864 tune_params::SCHED_AUTOPREF_OFF
1867 const struct tune_params arm_marvell_pj4_tune =
1869 &generic_extra_costs, /* Insn extra costs. */
1870 NULL, /* Sched adj cost. */
1871 arm_default_branch_cost,
1872 &arm_default_vec_cost,
1873 1, /* Constant limit. */
1874 5, /* Max cond insns. */
1875 8, /* Memset max inline. */
1876 2, /* Issue rate. */
1877 ARM_PREFETCH_NOT_BENEFICIAL,
1878 tune_params::PREF_CONST_POOL_TRUE,
1879 tune_params::PREF_LDRD_FALSE,
1880 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1881 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1882 tune_params::DISPARAGE_FLAGS_NEITHER,
1883 tune_params::PREF_NEON_64_FALSE,
1884 tune_params::PREF_NEON_STRINGOPS_FALSE,
1885 tune_params::FUSE_NOTHING,
1886 tune_params::SCHED_AUTOPREF_OFF
1889 const struct tune_params arm_v6t2_tune =
1891 &generic_extra_costs, /* Insn extra costs. */
1892 NULL, /* Sched adj cost. */
1893 arm_default_branch_cost,
1894 &arm_default_vec_cost,
1895 1, /* Constant limit. */
1896 5, /* Max cond insns. */
1897 8, /* Memset max inline. */
1898 1, /* Issue rate. */
1899 ARM_PREFETCH_NOT_BENEFICIAL,
1900 tune_params::PREF_CONST_POOL_FALSE,
1901 tune_params::PREF_LDRD_FALSE,
1902 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1903 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1904 tune_params::DISPARAGE_FLAGS_NEITHER,
1905 tune_params::PREF_NEON_64_FALSE,
1906 tune_params::PREF_NEON_STRINGOPS_FALSE,
1907 tune_params::FUSE_NOTHING,
1908 tune_params::SCHED_AUTOPREF_OFF
1912 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1913 const struct tune_params arm_cortex_tune =
1915 &generic_extra_costs,
1916 NULL, /* Sched adj cost. */
1917 arm_default_branch_cost,
1918 &arm_default_vec_cost,
1919 1, /* Constant limit. */
1920 5, /* Max cond insns. */
1921 8, /* Memset max inline. */
1922 2, /* Issue rate. */
1923 ARM_PREFETCH_NOT_BENEFICIAL,
1924 tune_params::PREF_CONST_POOL_FALSE,
1925 tune_params::PREF_LDRD_FALSE,
1926 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1927 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1928 tune_params::DISPARAGE_FLAGS_NEITHER,
1929 tune_params::PREF_NEON_64_FALSE,
1930 tune_params::PREF_NEON_STRINGOPS_FALSE,
1931 tune_params::FUSE_NOTHING,
1932 tune_params::SCHED_AUTOPREF_OFF
1935 const struct tune_params arm_cortex_a8_tune =
1937 &cortexa8_extra_costs,
1938 NULL, /* Sched adj cost. */
1939 arm_default_branch_cost,
1940 &arm_default_vec_cost,
1941 1, /* Constant limit. */
1942 5, /* Max cond insns. */
1943 8, /* Memset max inline. */
1944 2, /* Issue rate. */
1945 ARM_PREFETCH_NOT_BENEFICIAL,
1946 tune_params::PREF_CONST_POOL_FALSE,
1947 tune_params::PREF_LDRD_FALSE,
1948 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1949 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1950 tune_params::DISPARAGE_FLAGS_NEITHER,
1951 tune_params::PREF_NEON_64_FALSE,
1952 tune_params::PREF_NEON_STRINGOPS_TRUE,
1953 tune_params::FUSE_NOTHING,
1954 tune_params::SCHED_AUTOPREF_OFF
1957 const struct tune_params arm_cortex_a7_tune =
1959 &cortexa7_extra_costs,
1960 NULL, /* Sched adj cost. */
1961 arm_default_branch_cost,
1962 &arm_default_vec_cost,
1963 1, /* Constant limit. */
1964 5, /* Max cond insns. */
1965 8, /* Memset max inline. */
1966 2, /* Issue rate. */
1967 ARM_PREFETCH_NOT_BENEFICIAL,
1968 tune_params::PREF_CONST_POOL_FALSE,
1969 tune_params::PREF_LDRD_FALSE,
1970 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1971 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1972 tune_params::DISPARAGE_FLAGS_NEITHER,
1973 tune_params::PREF_NEON_64_FALSE,
1974 tune_params::PREF_NEON_STRINGOPS_TRUE,
1975 tune_params::FUSE_NOTHING,
1976 tune_params::SCHED_AUTOPREF_OFF
1979 const struct tune_params arm_cortex_a15_tune =
1981 &cortexa15_extra_costs,
1982 NULL, /* Sched adj cost. */
1983 arm_default_branch_cost,
1984 &arm_default_vec_cost,
1985 1, /* Constant limit. */
1986 2, /* Max cond insns. */
1987 8, /* Memset max inline. */
1988 3, /* Issue rate. */
1989 ARM_PREFETCH_NOT_BENEFICIAL,
1990 tune_params::PREF_CONST_POOL_FALSE,
1991 tune_params::PREF_LDRD_TRUE,
1992 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1993 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1994 tune_params::DISPARAGE_FLAGS_ALL,
1995 tune_params::PREF_NEON_64_FALSE,
1996 tune_params::PREF_NEON_STRINGOPS_TRUE,
1997 tune_params::FUSE_NOTHING,
1998 tune_params::SCHED_AUTOPREF_FULL
2001 const struct tune_params arm_cortex_a35_tune =
2003 &cortexa53_extra_costs,
2004 NULL, /* Sched adj cost. */
2005 arm_default_branch_cost,
2006 &arm_default_vec_cost,
2007 1, /* Constant limit. */
2008 5, /* Max cond insns. */
2009 8, /* Memset max inline. */
2010 1, /* Issue rate. */
2011 ARM_PREFETCH_NOT_BENEFICIAL,
2012 tune_params::PREF_CONST_POOL_FALSE,
2013 tune_params::PREF_LDRD_FALSE,
2014 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2015 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2016 tune_params::DISPARAGE_FLAGS_NEITHER,
2017 tune_params::PREF_NEON_64_FALSE,
2018 tune_params::PREF_NEON_STRINGOPS_TRUE,
2019 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2020 tune_params::SCHED_AUTOPREF_OFF
2023 const struct tune_params arm_cortex_a53_tune =
2025 &cortexa53_extra_costs,
2026 NULL, /* Sched adj cost. */
2027 arm_default_branch_cost,
2028 &arm_default_vec_cost,
2029 1, /* Constant limit. */
2030 5, /* Max cond insns. */
2031 8, /* Memset max inline. */
2032 2, /* Issue rate. */
2033 ARM_PREFETCH_NOT_BENEFICIAL,
2034 tune_params::PREF_CONST_POOL_FALSE,
2035 tune_params::PREF_LDRD_FALSE,
2036 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2037 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2038 tune_params::DISPARAGE_FLAGS_NEITHER,
2039 tune_params::PREF_NEON_64_FALSE,
2040 tune_params::PREF_NEON_STRINGOPS_TRUE,
2041 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2042 tune_params::SCHED_AUTOPREF_OFF
2045 const struct tune_params arm_cortex_a57_tune =
2047 &cortexa57_extra_costs,
2048 NULL, /* Sched adj cost. */
2049 arm_default_branch_cost,
2050 &arm_default_vec_cost,
2051 1, /* Constant limit. */
2052 2, /* Max cond insns. */
2053 8, /* Memset max inline. */
2054 3, /* Issue rate. */
2055 ARM_PREFETCH_NOT_BENEFICIAL,
2056 tune_params::PREF_CONST_POOL_FALSE,
2057 tune_params::PREF_LDRD_TRUE,
2058 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2059 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2060 tune_params::DISPARAGE_FLAGS_ALL,
2061 tune_params::PREF_NEON_64_FALSE,
2062 tune_params::PREF_NEON_STRINGOPS_TRUE,
2063 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2064 tune_params::SCHED_AUTOPREF_FULL
2067 const struct tune_params arm_exynosm1_tune =
2069 &exynosm1_extra_costs,
2070 NULL, /* Sched adj cost. */
2071 arm_default_branch_cost,
2072 &arm_default_vec_cost,
2073 1, /* Constant limit. */
2074 2, /* Max cond insns. */
2075 8, /* Memset max inline. */
2076 3, /* Issue rate. */
2077 ARM_PREFETCH_NOT_BENEFICIAL,
2078 tune_params::PREF_CONST_POOL_FALSE,
2079 tune_params::PREF_LDRD_TRUE,
2080 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2081 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2082 tune_params::DISPARAGE_FLAGS_ALL,
2083 tune_params::PREF_NEON_64_FALSE,
2084 tune_params::PREF_NEON_STRINGOPS_TRUE,
2085 tune_params::FUSE_NOTHING,
2086 tune_params::SCHED_AUTOPREF_OFF
2089 const struct tune_params arm_xgene1_tune =
2091 &xgene1_extra_costs,
2092 NULL, /* Sched adj cost. */
2093 arm_default_branch_cost,
2094 &arm_default_vec_cost,
2095 1, /* Constant limit. */
2096 2, /* Max cond insns. */
2097 32, /* Memset max inline. */
2098 4, /* Issue rate. */
2099 ARM_PREFETCH_NOT_BENEFICIAL,
2100 tune_params::PREF_CONST_POOL_FALSE,
2101 tune_params::PREF_LDRD_TRUE,
2102 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2103 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2104 tune_params::DISPARAGE_FLAGS_ALL,
2105 tune_params::PREF_NEON_64_FALSE,
2106 tune_params::PREF_NEON_STRINGOPS_FALSE,
2107 tune_params::FUSE_NOTHING,
2108 tune_params::SCHED_AUTOPREF_OFF
2111 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2112 less appealing. Set max_insns_skipped to a low value. */
2114 const struct tune_params arm_cortex_a5_tune =
2116 &cortexa5_extra_costs,
2117 NULL, /* Sched adj cost. */
2118 arm_cortex_a5_branch_cost,
2119 &arm_default_vec_cost,
2120 1, /* Constant limit. */
2121 1, /* Max cond insns. */
2122 8, /* Memset max inline. */
2123 2, /* Issue rate. */
2124 ARM_PREFETCH_NOT_BENEFICIAL,
2125 tune_params::PREF_CONST_POOL_FALSE,
2126 tune_params::PREF_LDRD_FALSE,
2127 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2128 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2129 tune_params::DISPARAGE_FLAGS_NEITHER,
2130 tune_params::PREF_NEON_64_FALSE,
2131 tune_params::PREF_NEON_STRINGOPS_TRUE,
2132 tune_params::FUSE_NOTHING,
2133 tune_params::SCHED_AUTOPREF_OFF
2136 const struct tune_params arm_cortex_a9_tune =
2138 &cortexa9_extra_costs,
2139 cortex_a9_sched_adjust_cost,
2140 arm_default_branch_cost,
2141 &arm_default_vec_cost,
2142 1, /* Constant limit. */
2143 5, /* Max cond insns. */
2144 8, /* Memset max inline. */
2145 2, /* Issue rate. */
2146 ARM_PREFETCH_BENEFICIAL(4,32,32),
2147 tune_params::PREF_CONST_POOL_FALSE,
2148 tune_params::PREF_LDRD_FALSE,
2149 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2150 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2151 tune_params::DISPARAGE_FLAGS_NEITHER,
2152 tune_params::PREF_NEON_64_FALSE,
2153 tune_params::PREF_NEON_STRINGOPS_FALSE,
2154 tune_params::FUSE_NOTHING,
2155 tune_params::SCHED_AUTOPREF_OFF
2158 const struct tune_params arm_cortex_a12_tune =
2160 &cortexa12_extra_costs,
2161 NULL, /* Sched adj cost. */
2162 arm_default_branch_cost,
2163 &arm_default_vec_cost, /* Vectorizer costs. */
2164 1, /* Constant limit. */
2165 2, /* Max cond insns. */
2166 8, /* Memset max inline. */
2167 2, /* Issue rate. */
2168 ARM_PREFETCH_NOT_BENEFICIAL,
2169 tune_params::PREF_CONST_POOL_FALSE,
2170 tune_params::PREF_LDRD_TRUE,
2171 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2172 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2173 tune_params::DISPARAGE_FLAGS_ALL,
2174 tune_params::PREF_NEON_64_FALSE,
2175 tune_params::PREF_NEON_STRINGOPS_TRUE,
2176 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2177 tune_params::SCHED_AUTOPREF_OFF
2180 const struct tune_params arm_cortex_a73_tune =
2182 &cortexa57_extra_costs,
2183 NULL, /* Sched adj cost. */
2184 arm_default_branch_cost,
2185 &arm_default_vec_cost, /* Vectorizer costs. */
2186 1, /* Constant limit. */
2187 2, /* Max cond insns. */
2188 8, /* Memset max inline. */
2189 2, /* Issue rate. */
2190 ARM_PREFETCH_NOT_BENEFICIAL,
2191 tune_params::PREF_CONST_POOL_FALSE,
2192 tune_params::PREF_LDRD_TRUE,
2193 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2194 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2195 tune_params::DISPARAGE_FLAGS_ALL,
2196 tune_params::PREF_NEON_64_FALSE,
2197 tune_params::PREF_NEON_STRINGOPS_TRUE,
2198 FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2199 tune_params::SCHED_AUTOPREF_FULL
2202 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2203 cycle to execute each. An LDR from the constant pool also takes two cycles
2204 to execute, but mildly increases pipelining opportunity (consecutive
2205 loads/stores can be pipelined together, saving one cycle), and may also
2206 improve icache utilisation. Hence we prefer the constant pool for such
2207 processors. */
2209 const struct tune_params arm_v7m_tune =
2211 &v7m_extra_costs,
2212 NULL, /* Sched adj cost. */
2213 arm_cortex_m_branch_cost,
2214 &arm_default_vec_cost,
2215 1, /* Constant limit. */
2216 2, /* Max cond insns. */
2217 8, /* Memset max inline. */
2218 1, /* Issue rate. */
2219 ARM_PREFETCH_NOT_BENEFICIAL,
2220 tune_params::PREF_CONST_POOL_TRUE,
2221 tune_params::PREF_LDRD_FALSE,
2222 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2223 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2224 tune_params::DISPARAGE_FLAGS_NEITHER,
2225 tune_params::PREF_NEON_64_FALSE,
2226 tune_params::PREF_NEON_STRINGOPS_FALSE,
2227 tune_params::FUSE_NOTHING,
2228 tune_params::SCHED_AUTOPREF_OFF
2231 /* Cortex-M7 tuning. */
2233 const struct tune_params arm_cortex_m7_tune =
2235 &v7m_extra_costs,
2236 NULL, /* Sched adj cost. */
2237 arm_cortex_m7_branch_cost,
2238 &arm_default_vec_cost,
2239 0, /* Constant limit. */
2240 1, /* Max cond insns. */
2241 8, /* Memset max inline. */
2242 2, /* Issue rate. */
2243 ARM_PREFETCH_NOT_BENEFICIAL,
2244 tune_params::PREF_CONST_POOL_TRUE,
2245 tune_params::PREF_LDRD_FALSE,
2246 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2247 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2248 tune_params::DISPARAGE_FLAGS_NEITHER,
2249 tune_params::PREF_NEON_64_FALSE,
2250 tune_params::PREF_NEON_STRINGOPS_FALSE,
2251 tune_params::FUSE_NOTHING,
2252 tune_params::SCHED_AUTOPREF_OFF
2255 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2256 arm_v6t2_tune. It is used for cortex-m0, cortex-m1, cortex-m0plus and
2257 cortex-m23. */
2258 const struct tune_params arm_v6m_tune =
2260 &generic_extra_costs, /* Insn extra costs. */
2261 NULL, /* Sched adj cost. */
2262 arm_default_branch_cost,
2263 &arm_default_vec_cost, /* Vectorizer costs. */
2264 1, /* Constant limit. */
2265 5, /* Max cond insns. */
2266 8, /* Memset max inline. */
2267 1, /* Issue rate. */
2268 ARM_PREFETCH_NOT_BENEFICIAL,
2269 tune_params::PREF_CONST_POOL_FALSE,
2270 tune_params::PREF_LDRD_FALSE,
2271 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2272 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2273 tune_params::DISPARAGE_FLAGS_NEITHER,
2274 tune_params::PREF_NEON_64_FALSE,
2275 tune_params::PREF_NEON_STRINGOPS_FALSE,
2276 tune_params::FUSE_NOTHING,
2277 tune_params::SCHED_AUTOPREF_OFF
2280 const struct tune_params arm_fa726te_tune =
2282 &generic_extra_costs, /* Insn extra costs. */
2283 fa726te_sched_adjust_cost,
2284 arm_default_branch_cost,
2285 &arm_default_vec_cost,
2286 1, /* Constant limit. */
2287 5, /* Max cond insns. */
2288 8, /* Memset max inline. */
2289 2, /* Issue rate. */
2290 ARM_PREFETCH_NOT_BENEFICIAL,
2291 tune_params::PREF_CONST_POOL_TRUE,
2292 tune_params::PREF_LDRD_FALSE,
2293 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2294 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2295 tune_params::DISPARAGE_FLAGS_NEITHER,
2296 tune_params::PREF_NEON_64_FALSE,
2297 tune_params::PREF_NEON_STRINGOPS_FALSE,
2298 tune_params::FUSE_NOTHING,
2299 tune_params::SCHED_AUTOPREF_OFF
2302 /* Auto-generated CPU, FPU and architecture tables. */
2303 #include "arm-cpu-data.h"
2305 /* The name of the preprocessor macro to define for this architecture. PROFILE
2306 is replaced by the architecture name (eg. 8A) in arm_option_override () and
2307 is thus chosen to be big enough to hold the longest architecture name. */
2309 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2311 /* Supported TLS relocations. */
2313 enum tls_reloc {
2314 TLS_GD32,
2315 TLS_LDM32,
2316 TLS_LDO32,
2317 TLS_IE32,
2318 TLS_LE32,
2319 TLS_DESCSEQ /* GNU scheme */
2322 /* The maximum number of insns to be used when loading a constant. */
2323 inline static int
2324 arm_constant_limit (bool size_p)
2326 return size_p ? 1 : current_tune->constant_limit;
2329 /* Emit an insn that's a simple single-set. Both the operands must be known
2330 to be valid. */
2331 inline static rtx_insn *
2332 emit_set_insn (rtx x, rtx y)
2334 return emit_insn (gen_rtx_SET (x, y));
2337 /* Return the number of bits set in VALUE. */
2338 static unsigned
2339 bit_count (unsigned long value)
2341 unsigned long count = 0;
2343 while (value)
2345 count++;
2346 value &= value - 1; /* Clear the least-significant set bit. */
2349 return count;
2352 /* Return the number of bits set in BMAP. */
2353 static unsigned
2354 bitmap_popcount (const sbitmap bmap)
2356 unsigned int count = 0;
2357 unsigned int n = 0;
2358 sbitmap_iterator sbi;
2360 EXECUTE_IF_SET_IN_BITMAP (bmap, 0, n, sbi)
2361 count++;
2362 return count;
2365 typedef struct
2367 machine_mode mode;
2368 const char *name;
2369 } arm_fixed_mode_set;
2371 /* A small helper for setting fixed-point library libfuncs. */
2373 static void
2374 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2375 const char *funcname, const char *modename,
2376 int num_suffix)
2378 char buffer[50];
2380 if (num_suffix == 0)
2381 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2382 else
2383 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2385 set_optab_libfunc (optable, mode, buffer);
2388 static void
2389 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2390 machine_mode from, const char *funcname,
2391 const char *toname, const char *fromname)
2393 char buffer[50];
2394 const char *maybe_suffix_2 = "";
2396 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2397 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2398 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2399 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2400 maybe_suffix_2 = "2";
2402 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2403 maybe_suffix_2);
2405 set_conv_libfunc (optable, to, from, buffer);
2408 /* Set up library functions unique to ARM. */
2410 static void
2411 arm_init_libfuncs (void)
2413 /* For Linux, we have access to kernel support for atomic operations. */
2414 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2415 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2417 /* There are no special library functions unless we are using the
2418 ARM BPABI. */
2419 if (!TARGET_BPABI)
2420 return;
2422 /* The functions below are described in Section 4 of the "Run-Time
2423 ABI for the ARM architecture", Version 1.0. */
2425 /* Double-precision floating-point arithmetic. Table 2. */
2426 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2427 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2428 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2429 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2430 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2432 /* Double-precision comparisons. Table 3. */
2433 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2434 set_optab_libfunc (ne_optab, DFmode, NULL);
2435 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2436 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2437 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2438 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2439 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2441 /* Single-precision floating-point arithmetic. Table 4. */
2442 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2443 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2444 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2445 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2446 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2448 /* Single-precision comparisons. Table 5. */
2449 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2450 set_optab_libfunc (ne_optab, SFmode, NULL);
2451 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2452 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2453 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2454 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2455 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2457 /* Floating-point to integer conversions. Table 6. */
2458 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2459 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2460 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2461 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2462 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2463 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2464 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2465 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2467 /* Conversions between floating types. Table 7. */
2468 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2469 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2471 /* Integer to floating-point conversions. Table 8. */
2472 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2473 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2474 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2475 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2476 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2477 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2478 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2479 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2481 /* Long long. Table 9. */
2482 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2483 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2484 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2485 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2486 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2487 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2488 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2489 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2491 /* Integer (32/32->32) division. \S 4.3.1. */
2492 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2493 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2495 /* The divmod functions are designed so that they can be used for
2496 plain division, even though they return both the quotient and the
2497 remainder. The quotient is returned in the usual location (i.e.,
2498 r0 for SImode, {r0, r1} for DImode), just as would be expected
2499 for an ordinary division routine. Because the AAPCS calling
2500 conventions specify that all of { r0, r1, r2, r3 } are
2501 callee-saved registers, there is no need to tell the compiler
2502 explicitly that those registers are clobbered by these
2503 routines. */
2504 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2505 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2507 /* For SImode division the ABI provides div-without-mod routines,
2508 which are faster. */
2509 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2510 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2512 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2513 divmod libcalls instead. */
2514 set_optab_libfunc (smod_optab, DImode, NULL);
2515 set_optab_libfunc (umod_optab, DImode, NULL);
2516 set_optab_libfunc (smod_optab, SImode, NULL);
2517 set_optab_libfunc (umod_optab, SImode, NULL);
2519 /* Half-precision float operations. The compiler handles all operations
2520 with NULL libfuncs by converting the SFmode. */
2521 switch (arm_fp16_format)
2523 case ARM_FP16_FORMAT_IEEE:
2524 case ARM_FP16_FORMAT_ALTERNATIVE:
2526 /* Conversions. */
2527 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2528 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2529 ? "__gnu_f2h_ieee"
2530 : "__gnu_f2h_alternative"));
2531 set_conv_libfunc (sext_optab, SFmode, HFmode,
2532 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2533 ? "__gnu_h2f_ieee"
2534 : "__gnu_h2f_alternative"));
2536 set_conv_libfunc (trunc_optab, HFmode, DFmode,
2537 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2538 ? "__gnu_d2h_ieee"
2539 : "__gnu_d2h_alternative"));
2541 /* Arithmetic. */
2542 set_optab_libfunc (add_optab, HFmode, NULL);
2543 set_optab_libfunc (sdiv_optab, HFmode, NULL);
2544 set_optab_libfunc (smul_optab, HFmode, NULL);
2545 set_optab_libfunc (neg_optab, HFmode, NULL);
2546 set_optab_libfunc (sub_optab, HFmode, NULL);
2548 /* Comparisons. */
2549 set_optab_libfunc (eq_optab, HFmode, NULL);
2550 set_optab_libfunc (ne_optab, HFmode, NULL);
2551 set_optab_libfunc (lt_optab, HFmode, NULL);
2552 set_optab_libfunc (le_optab, HFmode, NULL);
2553 set_optab_libfunc (ge_optab, HFmode, NULL);
2554 set_optab_libfunc (gt_optab, HFmode, NULL);
2555 set_optab_libfunc (unord_optab, HFmode, NULL);
2556 break;
2558 default:
2559 break;
2562 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2564 const arm_fixed_mode_set fixed_arith_modes[] =
2566 { E_QQmode, "qq" },
2567 { E_UQQmode, "uqq" },
2568 { E_HQmode, "hq" },
2569 { E_UHQmode, "uhq" },
2570 { E_SQmode, "sq" },
2571 { E_USQmode, "usq" },
2572 { E_DQmode, "dq" },
2573 { E_UDQmode, "udq" },
2574 { E_TQmode, "tq" },
2575 { E_UTQmode, "utq" },
2576 { E_HAmode, "ha" },
2577 { E_UHAmode, "uha" },
2578 { E_SAmode, "sa" },
2579 { E_USAmode, "usa" },
2580 { E_DAmode, "da" },
2581 { E_UDAmode, "uda" },
2582 { E_TAmode, "ta" },
2583 { E_UTAmode, "uta" }
2585 const arm_fixed_mode_set fixed_conv_modes[] =
2587 { E_QQmode, "qq" },
2588 { E_UQQmode, "uqq" },
2589 { E_HQmode, "hq" },
2590 { E_UHQmode, "uhq" },
2591 { E_SQmode, "sq" },
2592 { E_USQmode, "usq" },
2593 { E_DQmode, "dq" },
2594 { E_UDQmode, "udq" },
2595 { E_TQmode, "tq" },
2596 { E_UTQmode, "utq" },
2597 { E_HAmode, "ha" },
2598 { E_UHAmode, "uha" },
2599 { E_SAmode, "sa" },
2600 { E_USAmode, "usa" },
2601 { E_DAmode, "da" },
2602 { E_UDAmode, "uda" },
2603 { E_TAmode, "ta" },
2604 { E_UTAmode, "uta" },
2605 { E_QImode, "qi" },
2606 { E_HImode, "hi" },
2607 { E_SImode, "si" },
2608 { E_DImode, "di" },
2609 { E_TImode, "ti" },
2610 { E_SFmode, "sf" },
2611 { E_DFmode, "df" }
2613 unsigned int i, j;
2615 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2617 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2618 "add", fixed_arith_modes[i].name, 3);
2619 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2620 "ssadd", fixed_arith_modes[i].name, 3);
2621 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2622 "usadd", fixed_arith_modes[i].name, 3);
2623 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2624 "sub", fixed_arith_modes[i].name, 3);
2625 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2626 "sssub", fixed_arith_modes[i].name, 3);
2627 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2628 "ussub", fixed_arith_modes[i].name, 3);
2629 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2630 "mul", fixed_arith_modes[i].name, 3);
2631 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2632 "ssmul", fixed_arith_modes[i].name, 3);
2633 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2634 "usmul", fixed_arith_modes[i].name, 3);
2635 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2636 "div", fixed_arith_modes[i].name, 3);
2637 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2638 "udiv", fixed_arith_modes[i].name, 3);
2639 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2640 "ssdiv", fixed_arith_modes[i].name, 3);
2641 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2642 "usdiv", fixed_arith_modes[i].name, 3);
2643 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2644 "neg", fixed_arith_modes[i].name, 2);
2645 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2646 "ssneg", fixed_arith_modes[i].name, 2);
2647 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2648 "usneg", fixed_arith_modes[i].name, 2);
2649 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2650 "ashl", fixed_arith_modes[i].name, 3);
2651 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2652 "ashr", fixed_arith_modes[i].name, 3);
2653 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2654 "lshr", fixed_arith_modes[i].name, 3);
2655 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2656 "ssashl", fixed_arith_modes[i].name, 3);
2657 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2658 "usashl", fixed_arith_modes[i].name, 3);
2659 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2660 "cmp", fixed_arith_modes[i].name, 2);
2663 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2664 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2666 if (i == j
2667 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2668 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2669 continue;
2671 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2672 fixed_conv_modes[j].mode, "fract",
2673 fixed_conv_modes[i].name,
2674 fixed_conv_modes[j].name);
2675 arm_set_fixed_conv_libfunc (satfract_optab,
2676 fixed_conv_modes[i].mode,
2677 fixed_conv_modes[j].mode, "satfract",
2678 fixed_conv_modes[i].name,
2679 fixed_conv_modes[j].name);
2680 arm_set_fixed_conv_libfunc (fractuns_optab,
2681 fixed_conv_modes[i].mode,
2682 fixed_conv_modes[j].mode, "fractuns",
2683 fixed_conv_modes[i].name,
2684 fixed_conv_modes[j].name);
2685 arm_set_fixed_conv_libfunc (satfractuns_optab,
2686 fixed_conv_modes[i].mode,
2687 fixed_conv_modes[j].mode, "satfractuns",
2688 fixed_conv_modes[i].name,
2689 fixed_conv_modes[j].name);
2693 if (TARGET_AAPCS_BASED)
2694 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2697 /* On AAPCS systems, this is the "struct __va_list". */
2698 static GTY(()) tree va_list_type;
2700 /* Return the type to use as __builtin_va_list. */
2701 static tree
2702 arm_build_builtin_va_list (void)
2704 tree va_list_name;
2705 tree ap_field;
2707 if (!TARGET_AAPCS_BASED)
2708 return std_build_builtin_va_list ();
2710 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2711 defined as:
2713 struct __va_list
2715 void *__ap;
2718 The C Library ABI further reinforces this definition in \S
2719 4.1.
2721 We must follow this definition exactly. The structure tag
2722 name is visible in C++ mangled names, and thus forms a part
2723 of the ABI. The field name may be used by people who
2724 #include <stdarg.h>. */
2725 /* Create the type. */
2726 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2727 /* Give it the required name. */
2728 va_list_name = build_decl (BUILTINS_LOCATION,
2729 TYPE_DECL,
2730 get_identifier ("__va_list"),
2731 va_list_type);
2732 DECL_ARTIFICIAL (va_list_name) = 1;
2733 TYPE_NAME (va_list_type) = va_list_name;
2734 TYPE_STUB_DECL (va_list_type) = va_list_name;
2735 /* Create the __ap field. */
2736 ap_field = build_decl (BUILTINS_LOCATION,
2737 FIELD_DECL,
2738 get_identifier ("__ap"),
2739 ptr_type_node);
2740 DECL_ARTIFICIAL (ap_field) = 1;
2741 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2742 TYPE_FIELDS (va_list_type) = ap_field;
2743 /* Compute its layout. */
2744 layout_type (va_list_type);
2746 return va_list_type;
2749 /* Return an expression of type "void *" pointing to the next
2750 available argument in a variable-argument list. VALIST is the
2751 user-level va_list object, of type __builtin_va_list. */
2752 static tree
2753 arm_extract_valist_ptr (tree valist)
2755 if (TREE_TYPE (valist) == error_mark_node)
2756 return error_mark_node;
2758 /* On an AAPCS target, the pointer is stored within "struct
2759 va_list". */
2760 if (TARGET_AAPCS_BASED)
2762 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2763 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2764 valist, ap_field, NULL_TREE);
2767 return valist;
2770 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2771 static void
2772 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2774 valist = arm_extract_valist_ptr (valist);
2775 std_expand_builtin_va_start (valist, nextarg);
2778 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2779 static tree
2780 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2781 gimple_seq *post_p)
2783 valist = arm_extract_valist_ptr (valist);
2784 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2787 /* Check any incompatible options that the user has specified. */
2788 static void
2789 arm_option_check_internal (struct gcc_options *opts)
2791 int flags = opts->x_target_flags;
2793 /* iWMMXt and NEON are incompatible. */
2794 if (TARGET_IWMMXT
2795 && bitmap_bit_p (arm_active_target.isa, isa_bit_neon))
2796 error ("iWMMXt and NEON are incompatible");
2798 /* Make sure that the processor choice does not conflict with any of the
2799 other command line choices. */
2800 if (TARGET_ARM_P (flags)
2801 && !bitmap_bit_p (arm_active_target.isa, isa_bit_notm))
2802 error ("target CPU does not support ARM mode");
2804 /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet. */
2805 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2806 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2808 if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2809 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2811 /* If this target is normally configured to use APCS frames, warn if they
2812 are turned off and debugging is turned on. */
2813 if (TARGET_ARM_P (flags)
2814 && write_symbols != NO_DEBUG
2815 && !TARGET_APCS_FRAME
2816 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2817 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2819 /* iWMMXt unsupported under Thumb mode. */
2820 if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2821 error ("iWMMXt unsupported under Thumb mode");
2823 if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
2824 error ("can not use -mtp=cp15 with 16-bit Thumb");
2826 if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
2828 error ("RTP PIC is incompatible with Thumb");
2829 flag_pic = 0;
2832 /* We only support -mpure-code and -mslow-flash-data on M-profile targets
2833 with MOVT. */
2834 if ((target_pure_code || target_slow_flash_data)
2835 && (!TARGET_HAVE_MOVT || arm_arch_notm || flag_pic || TARGET_NEON))
2837 const char *flag = (target_pure_code ? "-mpure-code" :
2838 "-mslow-flash-data");
2839 error ("%s only supports non-pic code on M-profile targets with the "
2840 "MOVT instruction", flag);
2845 /* Recompute the global settings depending on target attribute options. */
2847 static void
2848 arm_option_params_internal (void)
2850 /* If we are not using the default (ARM mode) section anchor offset
2851 ranges, then set the correct ranges now. */
2852 if (TARGET_THUMB1)
2854 /* Thumb-1 LDR instructions cannot have negative offsets.
2855 Permissible positive offset ranges are 5-bit (for byte loads),
2856 6-bit (for halfword loads), or 7-bit (for word loads).
2857 Empirical results suggest a 7-bit anchor range gives the best
2858 overall code size. */
2859 targetm.min_anchor_offset = 0;
2860 targetm.max_anchor_offset = 127;
2862 else if (TARGET_THUMB2)
2864 /* The minimum is set such that the total size of the block
2865 for a particular anchor is 248 + 1 + 4095 bytes, which is
2866 divisible by eight, ensuring natural spacing of anchors. */
2867 targetm.min_anchor_offset = -248;
2868 targetm.max_anchor_offset = 4095;
2870 else
2872 targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
2873 targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
2876 if (optimize_size)
2878 /* If optimizing for size, bump the number of instructions that we
2879 are prepared to conditionally execute (even on a StrongARM). */
2880 max_insns_skipped = 6;
2882 /* For THUMB2, we limit the conditional sequence to one IT block. */
2883 if (TARGET_THUMB2)
2884 max_insns_skipped = arm_restrict_it ? 1 : 4;
2886 else
2887 /* When -mrestrict-it is in use tone down the if-conversion. */
2888 max_insns_skipped = (TARGET_THUMB2 && arm_restrict_it)
2889 ? 1 : current_tune->max_insns_skipped;
2892 /* True if -mflip-thumb should next add an attribute for the default
2893 mode, false if it should next add an attribute for the opposite mode. */
2894 static GTY(()) bool thumb_flipper;
2896 /* Options after initial target override. */
2897 static GTY(()) tree init_optimize;
2899 static void
2900 arm_override_options_after_change_1 (struct gcc_options *opts)
2902 if (opts->x_align_functions <= 0)
2903 opts->x_align_functions = TARGET_THUMB_P (opts->x_target_flags)
2904 && opts->x_optimize_size ? 2 : 4;
2907 /* Implement targetm.override_options_after_change. */
2909 static void
2910 arm_override_options_after_change (void)
2912 arm_configure_build_target (&arm_active_target,
2913 TREE_TARGET_OPTION (target_option_default_node),
2914 &global_options_set, false);
2916 arm_override_options_after_change_1 (&global_options);
2919 /* Implement TARGET_OPTION_SAVE. */
2920 static void
2921 arm_option_save (struct cl_target_option *ptr, struct gcc_options *opts)
2923 ptr->x_arm_arch_string = opts->x_arm_arch_string;
2924 ptr->x_arm_cpu_string = opts->x_arm_cpu_string;
2925 ptr->x_arm_tune_string = opts->x_arm_tune_string;
2928 /* Implement TARGET_OPTION_RESTORE. */
2929 static void
2930 arm_option_restore (struct gcc_options *opts, struct cl_target_option *ptr)
2932 opts->x_arm_arch_string = ptr->x_arm_arch_string;
2933 opts->x_arm_cpu_string = ptr->x_arm_cpu_string;
2934 opts->x_arm_tune_string = ptr->x_arm_tune_string;
2935 arm_configure_build_target (&arm_active_target, ptr, &global_options_set,
2936 false);
2939 /* Reset options between modes that the user has specified. */
2940 static void
2941 arm_option_override_internal (struct gcc_options *opts,
2942 struct gcc_options *opts_set)
2944 arm_override_options_after_change_1 (opts);
2946 if (TARGET_INTERWORK && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
2948 /* The default is to enable interworking, so this warning message would
2949 be confusing to users who have just compiled with, eg, -march=armv3. */
2950 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
2951 opts->x_target_flags &= ~MASK_INTERWORK;
2954 if (TARGET_THUMB_P (opts->x_target_flags)
2955 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
2957 warning (0, "target CPU does not support THUMB instructions");
2958 opts->x_target_flags &= ~MASK_THUMB;
2961 if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
2963 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2964 opts->x_target_flags &= ~MASK_APCS_FRAME;
2967 /* Callee super interworking implies thumb interworking. Adding
2968 this to the flags here simplifies the logic elsewhere. */
2969 if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
2970 opts->x_target_flags |= MASK_INTERWORK;
2972 /* need to remember initial values so combinaisons of options like
2973 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
2974 cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
2976 if (! opts_set->x_arm_restrict_it)
2977 opts->x_arm_restrict_it = arm_arch8;
2979 /* ARM execution state and M profile don't have [restrict] IT. */
2980 if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
2981 opts->x_arm_restrict_it = 0;
2983 /* Enable -munaligned-access by default for
2984 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
2985 i.e. Thumb2 and ARM state only.
2986 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2987 - ARMv8 architecture-base processors.
2989 Disable -munaligned-access by default for
2990 - all pre-ARMv6 architecture-based processors
2991 - ARMv6-M architecture-based processors
2992 - ARMv8-M Baseline processors. */
2994 if (! opts_set->x_unaligned_access)
2996 opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
2997 && arm_arch6 && (arm_arch_notm || arm_arch7));
2999 else if (opts->x_unaligned_access == 1
3000 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3002 warning (0, "target CPU does not support unaligned accesses");
3003 opts->x_unaligned_access = 0;
3006 /* Don't warn since it's on by default in -O2. */
3007 if (TARGET_THUMB1_P (opts->x_target_flags))
3008 opts->x_flag_schedule_insns = 0;
3009 else
3010 opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
3012 /* Disable shrink-wrap when optimizing function for size, since it tends to
3013 generate additional returns. */
3014 if (optimize_function_for_size_p (cfun)
3015 && TARGET_THUMB2_P (opts->x_target_flags))
3016 opts->x_flag_shrink_wrap = false;
3017 else
3018 opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
3020 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3021 - epilogue_insns - does not accurately model the corresponding insns
3022 emitted in the asm file. In particular, see the comment in thumb_exit
3023 'Find out how many of the (return) argument registers we can corrupt'.
3024 As a consequence, the epilogue may clobber registers without fipa-ra
3025 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3026 TODO: Accurately model clobbers for epilogue_insns and reenable
3027 fipa-ra. */
3028 if (TARGET_THUMB1_P (opts->x_target_flags))
3029 opts->x_flag_ipa_ra = 0;
3030 else
3031 opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3033 /* Thumb2 inline assembly code should always use unified syntax.
3034 This will apply to ARM and Thumb1 eventually. */
3035 opts->x_inline_asm_unified = TARGET_THUMB2_P (opts->x_target_flags);
3037 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3038 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3039 #endif
3042 static sbitmap isa_all_fpubits;
3043 static sbitmap isa_quirkbits;
3045 /* Configure a build target TARGET from the user-specified options OPTS and
3046 OPTS_SET. If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3047 architecture have been specified, but the two are not identical. */
3048 void
3049 arm_configure_build_target (struct arm_build_target *target,
3050 struct cl_target_option *opts,
3051 struct gcc_options *opts_set,
3052 bool warn_compatible)
3054 const cpu_option *arm_selected_tune = NULL;
3055 const arch_option *arm_selected_arch = NULL;
3056 const cpu_option *arm_selected_cpu = NULL;
3057 const arm_fpu_desc *arm_selected_fpu = NULL;
3058 const char *tune_opts = NULL;
3059 const char *arch_opts = NULL;
3060 const char *cpu_opts = NULL;
3062 bitmap_clear (target->isa);
3063 target->core_name = NULL;
3064 target->arch_name = NULL;
3066 if (opts_set->x_arm_arch_string)
3068 arm_selected_arch = arm_parse_arch_option_name (all_architectures,
3069 "-march",
3070 opts->x_arm_arch_string);
3071 arch_opts = strchr (opts->x_arm_arch_string, '+');
3074 if (opts_set->x_arm_cpu_string)
3076 arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "-mcpu",
3077 opts->x_arm_cpu_string);
3078 cpu_opts = strchr (opts->x_arm_cpu_string, '+');
3079 arm_selected_tune = arm_selected_cpu;
3080 /* If taking the tuning from -mcpu, we don't need to rescan the
3081 options for tuning. */
3084 if (opts_set->x_arm_tune_string)
3086 arm_selected_tune = arm_parse_cpu_option_name (all_cores, "-mtune",
3087 opts->x_arm_tune_string);
3088 tune_opts = strchr (opts->x_arm_tune_string, '+');
3091 if (arm_selected_arch)
3093 arm_initialize_isa (target->isa, arm_selected_arch->common.isa_bits);
3094 arm_parse_option_features (target->isa, &arm_selected_arch->common,
3095 arch_opts);
3097 if (arm_selected_cpu)
3099 auto_sbitmap cpu_isa (isa_num_bits);
3100 auto_sbitmap isa_delta (isa_num_bits);
3102 arm_initialize_isa (cpu_isa, arm_selected_cpu->common.isa_bits);
3103 arm_parse_option_features (cpu_isa, &arm_selected_cpu->common,
3104 cpu_opts);
3105 bitmap_xor (isa_delta, cpu_isa, target->isa);
3106 /* Ignore any bits that are quirk bits. */
3107 bitmap_and_compl (isa_delta, isa_delta, isa_quirkbits);
3108 /* Ignore (for now) any bits that might be set by -mfpu. */
3109 bitmap_and_compl (isa_delta, isa_delta, isa_all_fpubits);
3111 if (!bitmap_empty_p (isa_delta))
3113 if (warn_compatible)
3114 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
3115 arm_selected_cpu->common.name,
3116 arm_selected_arch->common.name);
3117 /* -march wins for code generation.
3118 -mcpu wins for default tuning. */
3119 if (!arm_selected_tune)
3120 arm_selected_tune = arm_selected_cpu;
3122 arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3123 target->arch_name = arm_selected_arch->common.name;
3125 else
3127 /* Architecture and CPU are essentially the same.
3128 Prefer the CPU setting. */
3129 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3130 target->core_name = arm_selected_cpu->common.name;
3131 /* Copy the CPU's capabilities, so that we inherit the
3132 appropriate extensions and quirks. */
3133 bitmap_copy (target->isa, cpu_isa);
3136 else
3138 /* Pick a CPU based on the architecture. */
3139 arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3140 target->arch_name = arm_selected_arch->common.name;
3141 /* Note: target->core_name is left unset in this path. */
3144 else if (arm_selected_cpu)
3146 target->core_name = arm_selected_cpu->common.name;
3147 arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3148 arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3149 cpu_opts);
3150 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3152 /* If the user did not specify a processor or architecture, choose
3153 one for them. */
3154 else
3156 const cpu_option *sel;
3157 auto_sbitmap sought_isa (isa_num_bits);
3158 bitmap_clear (sought_isa);
3159 auto_sbitmap default_isa (isa_num_bits);
3161 arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "default CPU",
3162 TARGET_CPU_DEFAULT);
3163 cpu_opts = strchr (TARGET_CPU_DEFAULT, '+');
3164 gcc_assert (arm_selected_cpu->common.name);
3166 /* RWE: All of the selection logic below (to the end of this
3167 'if' clause) looks somewhat suspect. It appears to be mostly
3168 there to support forcing thumb support when the default CPU
3169 does not have thumb (somewhat dubious in terms of what the
3170 user might be expecting). I think it should be removed once
3171 support for the pre-thumb era cores is removed. */
3172 sel = arm_selected_cpu;
3173 arm_initialize_isa (default_isa, sel->common.isa_bits);
3174 arm_parse_option_features (default_isa, &arm_selected_cpu->common,
3175 cpu_opts);
3177 /* Now check to see if the user has specified any command line
3178 switches that require certain abilities from the cpu. */
3180 if (TARGET_INTERWORK || TARGET_THUMB)
3182 bitmap_set_bit (sought_isa, isa_bit_thumb);
3183 bitmap_set_bit (sought_isa, isa_bit_mode32);
3185 /* There are no ARM processors that support both APCS-26 and
3186 interworking. Therefore we forcibly remove MODE26 from
3187 from the isa features here (if it was set), so that the
3188 search below will always be able to find a compatible
3189 processor. */
3190 bitmap_clear_bit (default_isa, isa_bit_mode26);
3193 /* If there are such requirements and the default CPU does not
3194 satisfy them, we need to run over the complete list of
3195 cores looking for one that is satisfactory. */
3196 if (!bitmap_empty_p (sought_isa)
3197 && !bitmap_subset_p (sought_isa, default_isa))
3199 auto_sbitmap candidate_isa (isa_num_bits);
3200 /* We're only interested in a CPU with at least the
3201 capabilities of the default CPU and the required
3202 additional features. */
3203 bitmap_ior (default_isa, default_isa, sought_isa);
3205 /* Try to locate a CPU type that supports all of the abilities
3206 of the default CPU, plus the extra abilities requested by
3207 the user. */
3208 for (sel = all_cores; sel->common.name != NULL; sel++)
3210 arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3211 /* An exact match? */
3212 if (bitmap_equal_p (default_isa, candidate_isa))
3213 break;
3216 if (sel->common.name == NULL)
3218 unsigned current_bit_count = isa_num_bits;
3219 const cpu_option *best_fit = NULL;
3221 /* Ideally we would like to issue an error message here
3222 saying that it was not possible to find a CPU compatible
3223 with the default CPU, but which also supports the command
3224 line options specified by the programmer, and so they
3225 ought to use the -mcpu=<name> command line option to
3226 override the default CPU type.
3228 If we cannot find a CPU that has exactly the
3229 characteristics of the default CPU and the given
3230 command line options we scan the array again looking
3231 for a best match. The best match must have at least
3232 the capabilities of the perfect match. */
3233 for (sel = all_cores; sel->common.name != NULL; sel++)
3235 arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3237 if (bitmap_subset_p (default_isa, candidate_isa))
3239 unsigned count;
3241 bitmap_and_compl (candidate_isa, candidate_isa,
3242 default_isa);
3243 count = bitmap_popcount (candidate_isa);
3245 if (count < current_bit_count)
3247 best_fit = sel;
3248 current_bit_count = count;
3252 gcc_assert (best_fit);
3253 sel = best_fit;
3256 arm_selected_cpu = sel;
3259 /* Now we know the CPU, we can finally initialize the target
3260 structure. */
3261 target->core_name = arm_selected_cpu->common.name;
3262 arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3263 arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3264 cpu_opts);
3265 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3268 gcc_assert (arm_selected_cpu);
3269 gcc_assert (arm_selected_arch);
3271 if (opts->x_arm_fpu_index != TARGET_FPU_auto)
3273 arm_selected_fpu = &all_fpus[opts->x_arm_fpu_index];
3274 auto_sbitmap fpu_bits (isa_num_bits);
3276 arm_initialize_isa (fpu_bits, arm_selected_fpu->isa_bits);
3277 bitmap_and_compl (target->isa, target->isa, isa_all_fpubits);
3278 bitmap_ior (target->isa, target->isa, fpu_bits);
3281 if (!arm_selected_tune)
3282 arm_selected_tune = arm_selected_cpu;
3283 else /* Validate the features passed to -mtune. */
3284 arm_parse_option_features (NULL, &arm_selected_tune->common, tune_opts);
3286 const cpu_tune *tune_data = &all_tunes[arm_selected_tune - all_cores];
3288 /* Finish initializing the target structure. */
3289 target->arch_pp_name = arm_selected_arch->arch;
3290 target->base_arch = arm_selected_arch->base_arch;
3291 target->profile = arm_selected_arch->profile;
3293 target->tune_flags = tune_data->tune_flags;
3294 target->tune = tune_data->tune;
3295 target->tune_core = tune_data->scheduler;
3298 /* Fix up any incompatible options that the user has specified. */
3299 static void
3300 arm_option_override (void)
3302 static const enum isa_feature fpu_bitlist[]
3303 = { ISA_ALL_FPU_INTERNAL, isa_nobit };
3304 static const enum isa_feature quirk_bitlist[] = { ISA_ALL_QUIRKS, isa_nobit};
3305 cl_target_option opts;
3307 isa_quirkbits = sbitmap_alloc (isa_num_bits);
3308 arm_initialize_isa (isa_quirkbits, quirk_bitlist);
3310 isa_all_fpubits = sbitmap_alloc (isa_num_bits);
3311 arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
3313 arm_active_target.isa = sbitmap_alloc (isa_num_bits);
3315 if (!global_options_set.x_arm_fpu_index)
3317 bool ok;
3318 int fpu_index;
3320 ok = opt_enum_arg_to_value (OPT_mfpu_, FPUTYPE_AUTO, &fpu_index,
3321 CL_TARGET);
3322 gcc_assert (ok);
3323 arm_fpu_index = (enum fpu_type) fpu_index;
3326 cl_target_option_save (&opts, &global_options);
3327 arm_configure_build_target (&arm_active_target, &opts, &global_options_set,
3328 true);
3330 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3331 SUBTARGET_OVERRIDE_OPTIONS;
3332 #endif
3334 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_active_target.arch_pp_name);
3335 arm_base_arch = arm_active_target.base_arch;
3337 arm_tune = arm_active_target.tune_core;
3338 tune_flags = arm_active_target.tune_flags;
3339 current_tune = arm_active_target.tune;
3341 /* TBD: Dwarf info for apcs frame is not handled yet. */
3342 if (TARGET_APCS_FRAME)
3343 flag_shrink_wrap = false;
3345 /* BPABI targets use linker tricks to allow interworking on cores
3346 without thumb support. */
3347 if (TARGET_INTERWORK
3348 && !TARGET_BPABI
3349 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3351 warning (0, "target CPU does not support interworking" );
3352 target_flags &= ~MASK_INTERWORK;
3355 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3357 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3358 target_flags |= MASK_APCS_FRAME;
3361 if (TARGET_POKE_FUNCTION_NAME)
3362 target_flags |= MASK_APCS_FRAME;
3364 if (TARGET_APCS_REENT && flag_pic)
3365 error ("-fpic and -mapcs-reent are incompatible");
3367 if (TARGET_APCS_REENT)
3368 warning (0, "APCS reentrant code not supported. Ignored");
3370 /* Initialize boolean versions of the architectural flags, for use
3371 in the arm.md file. */
3372 arm_arch3m = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv3m);
3373 arm_arch4 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv4);
3374 arm_arch4t = arm_arch4 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3375 arm_arch5 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv5);
3376 arm_arch5e = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv5e);
3377 arm_arch5te = arm_arch5e
3378 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3379 arm_arch6 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv6);
3380 arm_arch6k = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv6k);
3381 arm_arch_notm = bitmap_bit_p (arm_active_target.isa, isa_bit_notm);
3382 arm_arch6m = arm_arch6 && !arm_arch_notm;
3383 arm_arch7 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv7);
3384 arm_arch7em = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv7em);
3385 arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv8);
3386 arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv8_1);
3387 arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv8_2);
3388 arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3389 arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
3390 arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
3391 arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt);
3392 arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2);
3393 arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv);
3394 arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
3395 arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
3396 arm_arch_cmse = bitmap_bit_p (arm_active_target.isa, isa_bit_cmse);
3397 arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16);
3398 arm_arch_lpae = bitmap_bit_p (arm_active_target.isa, isa_bit_lpae);
3399 if (arm_fp16_inst)
3401 if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
3402 error ("selected fp16 options are incompatible");
3403 arm_fp16_format = ARM_FP16_FORMAT_IEEE;
3407 /* Set up some tuning parameters. */
3408 arm_ld_sched = (tune_flags & TF_LDSCHED) != 0;
3409 arm_tune_strongarm = (tune_flags & TF_STRONG) != 0;
3410 arm_tune_wbuf = (tune_flags & TF_WBUF) != 0;
3411 arm_tune_xscale = (tune_flags & TF_XSCALE) != 0;
3412 arm_tune_cortex_a9 = (arm_tune == TARGET_CPU_cortexa9) != 0;
3413 arm_m_profile_small_mul = (tune_flags & TF_SMALLMUL) != 0;
3415 /* And finally, set up some quirks. */
3416 arm_arch_no_volatile_ce
3417 = bitmap_bit_p (arm_active_target.isa, isa_quirk_no_volatile_ce);
3418 arm_arch6kz
3419 = arm_arch6k && bitmap_bit_p (arm_active_target.isa, isa_quirk_ARMv6kz);
3421 /* V5 code we generate is completely interworking capable, so we turn off
3422 TARGET_INTERWORK here to avoid many tests later on. */
3424 /* XXX However, we must pass the right pre-processor defines to CPP
3425 or GLD can get confused. This is a hack. */
3426 if (TARGET_INTERWORK)
3427 arm_cpp_interwork = 1;
3429 if (arm_arch5)
3430 target_flags &= ~MASK_INTERWORK;
3432 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3433 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3435 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3436 error ("iwmmxt abi requires an iwmmxt capable cpu");
3438 /* If soft-float is specified then don't use FPU. */
3439 if (TARGET_SOFT_FLOAT)
3440 arm_fpu_attr = FPU_NONE;
3441 else
3442 arm_fpu_attr = FPU_VFP;
3444 if (TARGET_AAPCS_BASED)
3446 if (TARGET_CALLER_INTERWORKING)
3447 error ("AAPCS does not support -mcaller-super-interworking");
3448 else
3449 if (TARGET_CALLEE_INTERWORKING)
3450 error ("AAPCS does not support -mcallee-super-interworking");
3453 /* __fp16 support currently assumes the core has ldrh. */
3454 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3455 sorry ("__fp16 and no ldrh");
3457 if (TARGET_AAPCS_BASED)
3459 if (arm_abi == ARM_ABI_IWMMXT)
3460 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3461 else if (TARGET_HARD_FLOAT_ABI)
3463 arm_pcs_default = ARM_PCS_AAPCS_VFP;
3464 if (!bitmap_bit_p (arm_active_target.isa, isa_bit_VFPv2))
3465 error ("-mfloat-abi=hard: selected processor lacks an FPU");
3467 else
3468 arm_pcs_default = ARM_PCS_AAPCS;
3470 else
3472 if (arm_float_abi == ARM_FLOAT_ABI_HARD)
3473 sorry ("-mfloat-abi=hard and VFP");
3475 if (arm_abi == ARM_ABI_APCS)
3476 arm_pcs_default = ARM_PCS_APCS;
3477 else
3478 arm_pcs_default = ARM_PCS_ATPCS;
3481 /* For arm2/3 there is no need to do any scheduling if we are doing
3482 software floating-point. */
3483 if (TARGET_SOFT_FLOAT && (tune_flags & TF_NO_MODE32))
3484 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3486 /* Use the cp15 method if it is available. */
3487 if (target_thread_pointer == TP_AUTO)
3489 if (arm_arch6k && !TARGET_THUMB1)
3490 target_thread_pointer = TP_CP15;
3491 else
3492 target_thread_pointer = TP_SOFT;
3495 /* Override the default structure alignment for AAPCS ABI. */
3496 if (!global_options_set.x_arm_structure_size_boundary)
3498 if (TARGET_AAPCS_BASED)
3499 arm_structure_size_boundary = 8;
3501 else
3503 warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3505 if (arm_structure_size_boundary != 8
3506 && arm_structure_size_boundary != 32
3507 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3509 if (ARM_DOUBLEWORD_ALIGN)
3510 warning (0,
3511 "structure size boundary can only be set to 8, 32 or 64");
3512 else
3513 warning (0, "structure size boundary can only be set to 8 or 32");
3514 arm_structure_size_boundary
3515 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3519 if (TARGET_VXWORKS_RTP)
3521 if (!global_options_set.x_arm_pic_data_is_text_relative)
3522 arm_pic_data_is_text_relative = 0;
3524 else if (flag_pic
3525 && !arm_pic_data_is_text_relative
3526 && !(global_options_set.x_target_flags & MASK_SINGLE_PIC_BASE))
3527 /* When text & data segments don't have a fixed displacement, the
3528 intended use is with a single, read only, pic base register.
3529 Unless the user explicitly requested not to do that, set
3530 it. */
3531 target_flags |= MASK_SINGLE_PIC_BASE;
3533 /* If stack checking is disabled, we can use r10 as the PIC register,
3534 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3535 if (flag_pic && TARGET_SINGLE_PIC_BASE)
3537 if (TARGET_VXWORKS_RTP)
3538 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3539 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3542 if (flag_pic && TARGET_VXWORKS_RTP)
3543 arm_pic_register = 9;
3545 if (arm_pic_register_string != NULL)
3547 int pic_register = decode_reg_name (arm_pic_register_string);
3549 if (!flag_pic)
3550 warning (0, "-mpic-register= is useless without -fpic");
3552 /* Prevent the user from choosing an obviously stupid PIC register. */
3553 else if (pic_register < 0 || call_used_regs[pic_register]
3554 || pic_register == HARD_FRAME_POINTER_REGNUM
3555 || pic_register == STACK_POINTER_REGNUM
3556 || pic_register >= PC_REGNUM
3557 || (TARGET_VXWORKS_RTP
3558 && (unsigned int) pic_register != arm_pic_register))
3559 error ("unable to use '%s' for PIC register", arm_pic_register_string);
3560 else
3561 arm_pic_register = pic_register;
3564 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3565 if (fix_cm3_ldrd == 2)
3567 if (bitmap_bit_p (arm_active_target.isa, isa_quirk_cm3_ldrd))
3568 fix_cm3_ldrd = 1;
3569 else
3570 fix_cm3_ldrd = 0;
3573 /* Hot/Cold partitioning is not currently supported, since we can't
3574 handle literal pool placement in that case. */
3575 if (flag_reorder_blocks_and_partition)
3577 inform (input_location,
3578 "-freorder-blocks-and-partition not supported on this architecture");
3579 flag_reorder_blocks_and_partition = 0;
3580 flag_reorder_blocks = 1;
3583 if (flag_pic)
3584 /* Hoisting PIC address calculations more aggressively provides a small,
3585 but measurable, size reduction for PIC code. Therefore, we decrease
3586 the bar for unrestricted expression hoisting to the cost of PIC address
3587 calculation, which is 2 instructions. */
3588 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3589 global_options.x_param_values,
3590 global_options_set.x_param_values);
3592 /* ARM EABI defaults to strict volatile bitfields. */
3593 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3594 && abi_version_at_least(2))
3595 flag_strict_volatile_bitfields = 1;
3597 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3598 have deemed it beneficial (signified by setting
3599 prefetch.num_slots to 1 or more). */
3600 if (flag_prefetch_loop_arrays < 0
3601 && HAVE_prefetch
3602 && optimize >= 3
3603 && current_tune->prefetch.num_slots > 0)
3604 flag_prefetch_loop_arrays = 1;
3606 /* Set up parameters to be used in prefetching algorithm. Do not
3607 override the defaults unless we are tuning for a core we have
3608 researched values for. */
3609 if (current_tune->prefetch.num_slots > 0)
3610 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3611 current_tune->prefetch.num_slots,
3612 global_options.x_param_values,
3613 global_options_set.x_param_values);
3614 if (current_tune->prefetch.l1_cache_line_size >= 0)
3615 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3616 current_tune->prefetch.l1_cache_line_size,
3617 global_options.x_param_values,
3618 global_options_set.x_param_values);
3619 if (current_tune->prefetch.l1_cache_size >= 0)
3620 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3621 current_tune->prefetch.l1_cache_size,
3622 global_options.x_param_values,
3623 global_options_set.x_param_values);
3625 /* Use Neon to perform 64-bits operations rather than core
3626 registers. */
3627 prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3628 if (use_neon_for_64bits == 1)
3629 prefer_neon_for_64bits = true;
3631 /* Use the alternative scheduling-pressure algorithm by default. */
3632 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3633 global_options.x_param_values,
3634 global_options_set.x_param_values);
3636 /* Look through ready list and all of queue for instructions
3637 relevant for L2 auto-prefetcher. */
3638 int param_sched_autopref_queue_depth;
3640 switch (current_tune->sched_autopref)
3642 case tune_params::SCHED_AUTOPREF_OFF:
3643 param_sched_autopref_queue_depth = -1;
3644 break;
3646 case tune_params::SCHED_AUTOPREF_RANK:
3647 param_sched_autopref_queue_depth = 0;
3648 break;
3650 case tune_params::SCHED_AUTOPREF_FULL:
3651 param_sched_autopref_queue_depth = max_insn_queue_index + 1;
3652 break;
3654 default:
3655 gcc_unreachable ();
3658 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
3659 param_sched_autopref_queue_depth,
3660 global_options.x_param_values,
3661 global_options_set.x_param_values);
3663 /* Currently, for slow flash data, we just disable literal pools. We also
3664 disable it for pure-code. */
3665 if (target_slow_flash_data || target_pure_code)
3666 arm_disable_literal_pool = true;
3668 if (use_cmse && !arm_arch_cmse)
3669 error ("target CPU does not support ARMv8-M Security Extensions");
3671 /* Disable scheduling fusion by default if it's not armv7 processor
3672 or doesn't prefer ldrd/strd. */
3673 if (flag_schedule_fusion == 2
3674 && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3675 flag_schedule_fusion = 0;
3677 /* Need to remember initial options before they are overriden. */
3678 init_optimize = build_optimization_node (&global_options);
3680 arm_option_override_internal (&global_options, &global_options_set);
3681 arm_option_check_internal (&global_options);
3682 arm_option_params_internal ();
3684 /* Create the default target_options structure. */
3685 target_option_default_node = target_option_current_node
3686 = build_target_option_node (&global_options);
3688 /* Register global variables with the garbage collector. */
3689 arm_add_gc_roots ();
3691 /* Init initial mode for testing. */
3692 thumb_flipper = TARGET_THUMB;
3695 static void
3696 arm_add_gc_roots (void)
3698 gcc_obstack_init(&minipool_obstack);
3699 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3702 /* A table of known ARM exception types.
3703 For use with the interrupt function attribute. */
3705 typedef struct
3707 const char *const arg;
3708 const unsigned long return_value;
3710 isr_attribute_arg;
3712 static const isr_attribute_arg isr_attribute_args [] =
3714 { "IRQ", ARM_FT_ISR },
3715 { "irq", ARM_FT_ISR },
3716 { "FIQ", ARM_FT_FIQ },
3717 { "fiq", ARM_FT_FIQ },
3718 { "ABORT", ARM_FT_ISR },
3719 { "abort", ARM_FT_ISR },
3720 { "ABORT", ARM_FT_ISR },
3721 { "abort", ARM_FT_ISR },
3722 { "UNDEF", ARM_FT_EXCEPTION },
3723 { "undef", ARM_FT_EXCEPTION },
3724 { "SWI", ARM_FT_EXCEPTION },
3725 { "swi", ARM_FT_EXCEPTION },
3726 { NULL, ARM_FT_NORMAL }
3729 /* Returns the (interrupt) function type of the current
3730 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3732 static unsigned long
3733 arm_isr_value (tree argument)
3735 const isr_attribute_arg * ptr;
3736 const char * arg;
3738 if (!arm_arch_notm)
3739 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3741 /* No argument - default to IRQ. */
3742 if (argument == NULL_TREE)
3743 return ARM_FT_ISR;
3745 /* Get the value of the argument. */
3746 if (TREE_VALUE (argument) == NULL_TREE
3747 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3748 return ARM_FT_UNKNOWN;
3750 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3752 /* Check it against the list of known arguments. */
3753 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3754 if (streq (arg, ptr->arg))
3755 return ptr->return_value;
3757 /* An unrecognized interrupt type. */
3758 return ARM_FT_UNKNOWN;
3761 /* Computes the type of the current function. */
3763 static unsigned long
3764 arm_compute_func_type (void)
3766 unsigned long type = ARM_FT_UNKNOWN;
3767 tree a;
3768 tree attr;
3770 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3772 /* Decide if the current function is volatile. Such functions
3773 never return, and many memory cycles can be saved by not storing
3774 register values that will never be needed again. This optimization
3775 was added to speed up context switching in a kernel application. */
3776 if (optimize > 0
3777 && (TREE_NOTHROW (current_function_decl)
3778 || !(flag_unwind_tables
3779 || (flag_exceptions
3780 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3781 && TREE_THIS_VOLATILE (current_function_decl))
3782 type |= ARM_FT_VOLATILE;
3784 if (cfun->static_chain_decl != NULL)
3785 type |= ARM_FT_NESTED;
3787 attr = DECL_ATTRIBUTES (current_function_decl);
3789 a = lookup_attribute ("naked", attr);
3790 if (a != NULL_TREE)
3791 type |= ARM_FT_NAKED;
3793 a = lookup_attribute ("isr", attr);
3794 if (a == NULL_TREE)
3795 a = lookup_attribute ("interrupt", attr);
3797 if (a == NULL_TREE)
3798 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3799 else
3800 type |= arm_isr_value (TREE_VALUE (a));
3802 if (lookup_attribute ("cmse_nonsecure_entry", attr))
3803 type |= ARM_FT_CMSE_ENTRY;
3805 return type;
3808 /* Returns the type of the current function. */
3810 unsigned long
3811 arm_current_func_type (void)
3813 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3814 cfun->machine->func_type = arm_compute_func_type ();
3816 return cfun->machine->func_type;
3819 bool
3820 arm_allocate_stack_slots_for_args (void)
3822 /* Naked functions should not allocate stack slots for arguments. */
3823 return !IS_NAKED (arm_current_func_type ());
3826 static bool
3827 arm_warn_func_return (tree decl)
3829 /* Naked functions are implemented entirely in assembly, including the
3830 return sequence, so suppress warnings about this. */
3831 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3835 /* Output assembler code for a block containing the constant parts
3836 of a trampoline, leaving space for the variable parts.
3838 On the ARM, (if r8 is the static chain regnum, and remembering that
3839 referencing pc adds an offset of 8) the trampoline looks like:
3840 ldr r8, [pc, #0]
3841 ldr pc, [pc]
3842 .word static chain value
3843 .word function's address
3844 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3846 static void
3847 arm_asm_trampoline_template (FILE *f)
3849 fprintf (f, "\t.syntax unified\n");
3851 if (TARGET_ARM)
3853 fprintf (f, "\t.arm\n");
3854 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3855 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3857 else if (TARGET_THUMB2)
3859 fprintf (f, "\t.thumb\n");
3860 /* The Thumb-2 trampoline is similar to the arm implementation.
3861 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3862 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3863 STATIC_CHAIN_REGNUM, PC_REGNUM);
3864 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3866 else
3868 ASM_OUTPUT_ALIGN (f, 2);
3869 fprintf (f, "\t.code\t16\n");
3870 fprintf (f, ".Ltrampoline_start:\n");
3871 asm_fprintf (f, "\tpush\t{r0, r1}\n");
3872 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3873 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3874 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3875 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3876 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3878 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3879 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3882 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3884 static void
3885 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3887 rtx fnaddr, mem, a_tramp;
3889 emit_block_move (m_tramp, assemble_trampoline_template (),
3890 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3892 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3893 emit_move_insn (mem, chain_value);
3895 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3896 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3897 emit_move_insn (mem, fnaddr);
3899 a_tramp = XEXP (m_tramp, 0);
3900 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3901 LCT_NORMAL, VOIDmode, a_tramp, Pmode,
3902 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3905 /* Thumb trampolines should be entered in thumb mode, so set
3906 the bottom bit of the address. */
3908 static rtx
3909 arm_trampoline_adjust_address (rtx addr)
3911 if (TARGET_THUMB)
3912 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3913 NULL, 0, OPTAB_LIB_WIDEN);
3914 return addr;
3917 /* Return 1 if it is possible to return using a single instruction.
3918 If SIBLING is non-null, this is a test for a return before a sibling
3919 call. SIBLING is the call insn, so we can examine its register usage. */
3922 use_return_insn (int iscond, rtx sibling)
3924 int regno;
3925 unsigned int func_type;
3926 unsigned long saved_int_regs;
3927 unsigned HOST_WIDE_INT stack_adjust;
3928 arm_stack_offsets *offsets;
3930 /* Never use a return instruction before reload has run. */
3931 if (!reload_completed)
3932 return 0;
3934 func_type = arm_current_func_type ();
3936 /* Naked, volatile and stack alignment functions need special
3937 consideration. */
3938 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3939 return 0;
3941 /* So do interrupt functions that use the frame pointer and Thumb
3942 interrupt functions. */
3943 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3944 return 0;
3946 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3947 && !optimize_function_for_size_p (cfun))
3948 return 0;
3950 offsets = arm_get_frame_offsets ();
3951 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3953 /* As do variadic functions. */
3954 if (crtl->args.pretend_args_size
3955 || cfun->machine->uses_anonymous_args
3956 /* Or if the function calls __builtin_eh_return () */
3957 || crtl->calls_eh_return
3958 /* Or if the function calls alloca */
3959 || cfun->calls_alloca
3960 /* Or if there is a stack adjustment. However, if the stack pointer
3961 is saved on the stack, we can use a pre-incrementing stack load. */
3962 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3963 && stack_adjust == 4))
3964 /* Or if the static chain register was saved above the frame, under the
3965 assumption that the stack pointer isn't saved on the stack. */
3966 || (!(TARGET_APCS_FRAME && frame_pointer_needed)
3967 && arm_compute_static_chain_stack_bytes() != 0))
3968 return 0;
3970 saved_int_regs = offsets->saved_regs_mask;
3972 /* Unfortunately, the insn
3974 ldmib sp, {..., sp, ...}
3976 triggers a bug on most SA-110 based devices, such that the stack
3977 pointer won't be correctly restored if the instruction takes a
3978 page fault. We work around this problem by popping r3 along with
3979 the other registers, since that is never slower than executing
3980 another instruction.
3982 We test for !arm_arch5 here, because code for any architecture
3983 less than this could potentially be run on one of the buggy
3984 chips. */
3985 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3987 /* Validate that r3 is a call-clobbered register (always true in
3988 the default abi) ... */
3989 if (!call_used_regs[3])
3990 return 0;
3992 /* ... that it isn't being used for a return value ... */
3993 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
3994 return 0;
3996 /* ... or for a tail-call argument ... */
3997 if (sibling)
3999 gcc_assert (CALL_P (sibling));
4001 if (find_regno_fusage (sibling, USE, 3))
4002 return 0;
4005 /* ... and that there are no call-saved registers in r0-r2
4006 (always true in the default ABI). */
4007 if (saved_int_regs & 0x7)
4008 return 0;
4011 /* Can't be done if interworking with Thumb, and any registers have been
4012 stacked. */
4013 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
4014 return 0;
4016 /* On StrongARM, conditional returns are expensive if they aren't
4017 taken and multiple registers have been stacked. */
4018 if (iscond && arm_tune_strongarm)
4020 /* Conditional return when just the LR is stored is a simple
4021 conditional-load instruction, that's not expensive. */
4022 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
4023 return 0;
4025 if (flag_pic
4026 && arm_pic_register != INVALID_REGNUM
4027 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
4028 return 0;
4031 /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4032 several instructions if anything needs to be popped. */
4033 if (saved_int_regs && IS_CMSE_ENTRY (func_type))
4034 return 0;
4036 /* If there are saved registers but the LR isn't saved, then we need
4037 two instructions for the return. */
4038 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
4039 return 0;
4041 /* Can't be done if any of the VFP regs are pushed,
4042 since this also requires an insn. */
4043 if (TARGET_HARD_FLOAT)
4044 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
4045 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
4046 return 0;
4048 if (TARGET_REALLY_IWMMXT)
4049 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
4050 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
4051 return 0;
4053 return 1;
4056 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4057 shrink-wrapping if possible. This is the case if we need to emit a
4058 prologue, which we can test by looking at the offsets. */
4059 bool
4060 use_simple_return_p (void)
4062 arm_stack_offsets *offsets;
4064 /* Note this function can be called before or after reload. */
4065 if (!reload_completed)
4066 arm_compute_frame_layout ();
4068 offsets = arm_get_frame_offsets ();
4069 return offsets->outgoing_args != 0;
4072 /* Return TRUE if int I is a valid immediate ARM constant. */
4075 const_ok_for_arm (HOST_WIDE_INT i)
4077 int lowbit;
4079 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4080 be all zero, or all one. */
4081 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
4082 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
4083 != ((~(unsigned HOST_WIDE_INT) 0)
4084 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
4085 return FALSE;
4087 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
4089 /* Fast return for 0 and small values. We must do this for zero, since
4090 the code below can't handle that one case. */
4091 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
4092 return TRUE;
4094 /* Get the number of trailing zeros. */
4095 lowbit = ffs((int) i) - 1;
4097 /* Only even shifts are allowed in ARM mode so round down to the
4098 nearest even number. */
4099 if (TARGET_ARM)
4100 lowbit &= ~1;
4102 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
4103 return TRUE;
4105 if (TARGET_ARM)
4107 /* Allow rotated constants in ARM mode. */
4108 if (lowbit <= 4
4109 && ((i & ~0xc000003f) == 0
4110 || (i & ~0xf000000f) == 0
4111 || (i & ~0xfc000003) == 0))
4112 return TRUE;
4114 else if (TARGET_THUMB2)
4116 HOST_WIDE_INT v;
4118 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
4119 v = i & 0xff;
4120 v |= v << 16;
4121 if (i == v || i == (v | (v << 8)))
4122 return TRUE;
4124 /* Allow repeated pattern 0xXY00XY00. */
4125 v = i & 0xff00;
4126 v |= v << 16;
4127 if (i == v)
4128 return TRUE;
4130 else if (TARGET_HAVE_MOVT)
4132 /* Thumb-1 Targets with MOVT. */
4133 if (i > 0xffff)
4134 return FALSE;
4135 else
4136 return TRUE;
4139 return FALSE;
4142 /* Return true if I is a valid constant for the operation CODE. */
4144 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
4146 if (const_ok_for_arm (i))
4147 return 1;
4149 switch (code)
4151 case SET:
4152 /* See if we can use movw. */
4153 if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
4154 return 1;
4155 else
4156 /* Otherwise, try mvn. */
4157 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4159 case PLUS:
4160 /* See if we can use addw or subw. */
4161 if (TARGET_THUMB2
4162 && ((i & 0xfffff000) == 0
4163 || ((-i) & 0xfffff000) == 0))
4164 return 1;
4165 /* Fall through. */
4166 case COMPARE:
4167 case EQ:
4168 case NE:
4169 case GT:
4170 case LE:
4171 case LT:
4172 case GE:
4173 case GEU:
4174 case LTU:
4175 case GTU:
4176 case LEU:
4177 case UNORDERED:
4178 case ORDERED:
4179 case UNEQ:
4180 case UNGE:
4181 case UNLT:
4182 case UNGT:
4183 case UNLE:
4184 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
4186 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
4187 case XOR:
4188 return 0;
4190 case IOR:
4191 if (TARGET_THUMB2)
4192 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4193 return 0;
4195 case AND:
4196 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4198 default:
4199 gcc_unreachable ();
4203 /* Return true if I is a valid di mode constant for the operation CODE. */
4205 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4207 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4208 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4209 rtx hi = GEN_INT (hi_val);
4210 rtx lo = GEN_INT (lo_val);
4212 if (TARGET_THUMB1)
4213 return 0;
4215 switch (code)
4217 case AND:
4218 case IOR:
4219 case XOR:
4220 return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
4221 && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
4222 case PLUS:
4223 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4225 default:
4226 return 0;
4230 /* Emit a sequence of insns to handle a large constant.
4231 CODE is the code of the operation required, it can be any of SET, PLUS,
4232 IOR, AND, XOR, MINUS;
4233 MODE is the mode in which the operation is being performed;
4234 VAL is the integer to operate on;
4235 SOURCE is the other operand (a register, or a null-pointer for SET);
4236 SUBTARGETS means it is safe to create scratch registers if that will
4237 either produce a simpler sequence, or we will want to cse the values.
4238 Return value is the number of insns emitted. */
4240 /* ??? Tweak this for thumb2. */
4242 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4243 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4245 rtx cond;
4247 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4248 cond = COND_EXEC_TEST (PATTERN (insn));
4249 else
4250 cond = NULL_RTX;
4252 if (subtargets || code == SET
4253 || (REG_P (target) && REG_P (source)
4254 && REGNO (target) != REGNO (source)))
4256 /* After arm_reorg has been called, we can't fix up expensive
4257 constants by pushing them into memory so we must synthesize
4258 them in-line, regardless of the cost. This is only likely to
4259 be more costly on chips that have load delay slots and we are
4260 compiling without running the scheduler (so no splitting
4261 occurred before the final instruction emission).
4263 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4265 if (!cfun->machine->after_arm_reorg
4266 && !cond
4267 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4268 1, 0)
4269 > (arm_constant_limit (optimize_function_for_size_p (cfun))
4270 + (code != SET))))
4272 if (code == SET)
4274 /* Currently SET is the only monadic value for CODE, all
4275 the rest are diadic. */
4276 if (TARGET_USE_MOVT)
4277 arm_emit_movpair (target, GEN_INT (val));
4278 else
4279 emit_set_insn (target, GEN_INT (val));
4281 return 1;
4283 else
4285 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4287 if (TARGET_USE_MOVT)
4288 arm_emit_movpair (temp, GEN_INT (val));
4289 else
4290 emit_set_insn (temp, GEN_INT (val));
4292 /* For MINUS, the value is subtracted from, since we never
4293 have subtraction of a constant. */
4294 if (code == MINUS)
4295 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4296 else
4297 emit_set_insn (target,
4298 gen_rtx_fmt_ee (code, mode, source, temp));
4299 return 2;
4304 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4308 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4309 ARM/THUMB2 immediates, and add up to VAL.
4310 Thr function return value gives the number of insns required. */
4311 static int
4312 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4313 struct four_ints *return_sequence)
4315 int best_consecutive_zeros = 0;
4316 int i;
4317 int best_start = 0;
4318 int insns1, insns2;
4319 struct four_ints tmp_sequence;
4321 /* If we aren't targeting ARM, the best place to start is always at
4322 the bottom, otherwise look more closely. */
4323 if (TARGET_ARM)
4325 for (i = 0; i < 32; i += 2)
4327 int consecutive_zeros = 0;
4329 if (!(val & (3 << i)))
4331 while ((i < 32) && !(val & (3 << i)))
4333 consecutive_zeros += 2;
4334 i += 2;
4336 if (consecutive_zeros > best_consecutive_zeros)
4338 best_consecutive_zeros = consecutive_zeros;
4339 best_start = i - consecutive_zeros;
4341 i -= 2;
4346 /* So long as it won't require any more insns to do so, it's
4347 desirable to emit a small constant (in bits 0...9) in the last
4348 insn. This way there is more chance that it can be combined with
4349 a later addressing insn to form a pre-indexed load or store
4350 operation. Consider:
4352 *((volatile int *)0xe0000100) = 1;
4353 *((volatile int *)0xe0000110) = 2;
4355 We want this to wind up as:
4357 mov rA, #0xe0000000
4358 mov rB, #1
4359 str rB, [rA, #0x100]
4360 mov rB, #2
4361 str rB, [rA, #0x110]
4363 rather than having to synthesize both large constants from scratch.
4365 Therefore, we calculate how many insns would be required to emit
4366 the constant starting from `best_start', and also starting from
4367 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4368 yield a shorter sequence, we may as well use zero. */
4369 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4370 if (best_start != 0
4371 && ((HOST_WIDE_INT_1U << best_start) < val))
4373 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4374 if (insns2 <= insns1)
4376 *return_sequence = tmp_sequence;
4377 insns1 = insns2;
4381 return insns1;
4384 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4385 static int
4386 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4387 struct four_ints *return_sequence, int i)
4389 int remainder = val & 0xffffffff;
4390 int insns = 0;
4392 /* Try and find a way of doing the job in either two or three
4393 instructions.
4395 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4396 location. We start at position I. This may be the MSB, or
4397 optimial_immediate_sequence may have positioned it at the largest block
4398 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4399 wrapping around to the top of the word when we drop off the bottom.
4400 In the worst case this code should produce no more than four insns.
4402 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4403 constants, shifted to any arbitrary location. We should always start
4404 at the MSB. */
4407 int end;
4408 unsigned int b1, b2, b3, b4;
4409 unsigned HOST_WIDE_INT result;
4410 int loc;
4412 gcc_assert (insns < 4);
4414 if (i <= 0)
4415 i += 32;
4417 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4418 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4420 loc = i;
4421 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4422 /* We can use addw/subw for the last 12 bits. */
4423 result = remainder;
4424 else
4426 /* Use an 8-bit shifted/rotated immediate. */
4427 end = i - 8;
4428 if (end < 0)
4429 end += 32;
4430 result = remainder & ((0x0ff << end)
4431 | ((i < end) ? (0xff >> (32 - end))
4432 : 0));
4433 i -= 8;
4436 else
4438 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4439 arbitrary shifts. */
4440 i -= TARGET_ARM ? 2 : 1;
4441 continue;
4444 /* Next, see if we can do a better job with a thumb2 replicated
4445 constant.
4447 We do it this way around to catch the cases like 0x01F001E0 where
4448 two 8-bit immediates would work, but a replicated constant would
4449 make it worse.
4451 TODO: 16-bit constants that don't clear all the bits, but still win.
4452 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4453 if (TARGET_THUMB2)
4455 b1 = (remainder & 0xff000000) >> 24;
4456 b2 = (remainder & 0x00ff0000) >> 16;
4457 b3 = (remainder & 0x0000ff00) >> 8;
4458 b4 = remainder & 0xff;
4460 if (loc > 24)
4462 /* The 8-bit immediate already found clears b1 (and maybe b2),
4463 but must leave b3 and b4 alone. */
4465 /* First try to find a 32-bit replicated constant that clears
4466 almost everything. We can assume that we can't do it in one,
4467 or else we wouldn't be here. */
4468 unsigned int tmp = b1 & b2 & b3 & b4;
4469 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4470 + (tmp << 24);
4471 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4472 + (tmp == b3) + (tmp == b4);
4473 if (tmp
4474 && (matching_bytes >= 3
4475 || (matching_bytes == 2
4476 && const_ok_for_op (remainder & ~tmp2, code))))
4478 /* At least 3 of the bytes match, and the fourth has at
4479 least as many bits set, or two of the bytes match
4480 and it will only require one more insn to finish. */
4481 result = tmp2;
4482 i = tmp != b1 ? 32
4483 : tmp != b2 ? 24
4484 : tmp != b3 ? 16
4485 : 8;
4488 /* Second, try to find a 16-bit replicated constant that can
4489 leave three of the bytes clear. If b2 or b4 is already
4490 zero, then we can. If the 8-bit from above would not
4491 clear b2 anyway, then we still win. */
4492 else if (b1 == b3 && (!b2 || !b4
4493 || (remainder & 0x00ff0000 & ~result)))
4495 result = remainder & 0xff00ff00;
4496 i = 24;
4499 else if (loc > 16)
4501 /* The 8-bit immediate already found clears b2 (and maybe b3)
4502 and we don't get here unless b1 is alredy clear, but it will
4503 leave b4 unchanged. */
4505 /* If we can clear b2 and b4 at once, then we win, since the
4506 8-bits couldn't possibly reach that far. */
4507 if (b2 == b4)
4509 result = remainder & 0x00ff00ff;
4510 i = 16;
4515 return_sequence->i[insns++] = result;
4516 remainder &= ~result;
4518 if (code == SET || code == MINUS)
4519 code = PLUS;
4521 while (remainder);
4523 return insns;
4526 /* Emit an instruction with the indicated PATTERN. If COND is
4527 non-NULL, conditionalize the execution of the instruction on COND
4528 being true. */
4530 static void
4531 emit_constant_insn (rtx cond, rtx pattern)
4533 if (cond)
4534 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4535 emit_insn (pattern);
4538 /* As above, but extra parameter GENERATE which, if clear, suppresses
4539 RTL generation. */
4541 static int
4542 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4543 unsigned HOST_WIDE_INT val, rtx target, rtx source,
4544 int subtargets, int generate)
4546 int can_invert = 0;
4547 int can_negate = 0;
4548 int final_invert = 0;
4549 int i;
4550 int set_sign_bit_copies = 0;
4551 int clear_sign_bit_copies = 0;
4552 int clear_zero_bit_copies = 0;
4553 int set_zero_bit_copies = 0;
4554 int insns = 0, neg_insns, inv_insns;
4555 unsigned HOST_WIDE_INT temp1, temp2;
4556 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4557 struct four_ints *immediates;
4558 struct four_ints pos_immediates, neg_immediates, inv_immediates;
4560 /* Find out which operations are safe for a given CODE. Also do a quick
4561 check for degenerate cases; these can occur when DImode operations
4562 are split. */
4563 switch (code)
4565 case SET:
4566 can_invert = 1;
4567 break;
4569 case PLUS:
4570 can_negate = 1;
4571 break;
4573 case IOR:
4574 if (remainder == 0xffffffff)
4576 if (generate)
4577 emit_constant_insn (cond,
4578 gen_rtx_SET (target,
4579 GEN_INT (ARM_SIGN_EXTEND (val))));
4580 return 1;
4583 if (remainder == 0)
4585 if (reload_completed && rtx_equal_p (target, source))
4586 return 0;
4588 if (generate)
4589 emit_constant_insn (cond, gen_rtx_SET (target, source));
4590 return 1;
4592 break;
4594 case AND:
4595 if (remainder == 0)
4597 if (generate)
4598 emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
4599 return 1;
4601 if (remainder == 0xffffffff)
4603 if (reload_completed && rtx_equal_p (target, source))
4604 return 0;
4605 if (generate)
4606 emit_constant_insn (cond, gen_rtx_SET (target, source));
4607 return 1;
4609 can_invert = 1;
4610 break;
4612 case XOR:
4613 if (remainder == 0)
4615 if (reload_completed && rtx_equal_p (target, source))
4616 return 0;
4617 if (generate)
4618 emit_constant_insn (cond, gen_rtx_SET (target, source));
4619 return 1;
4622 if (remainder == 0xffffffff)
4624 if (generate)
4625 emit_constant_insn (cond,
4626 gen_rtx_SET (target,
4627 gen_rtx_NOT (mode, source)));
4628 return 1;
4630 final_invert = 1;
4631 break;
4633 case MINUS:
4634 /* We treat MINUS as (val - source), since (source - val) is always
4635 passed as (source + (-val)). */
4636 if (remainder == 0)
4638 if (generate)
4639 emit_constant_insn (cond,
4640 gen_rtx_SET (target,
4641 gen_rtx_NEG (mode, source)));
4642 return 1;
4644 if (const_ok_for_arm (val))
4646 if (generate)
4647 emit_constant_insn (cond,
4648 gen_rtx_SET (target,
4649 gen_rtx_MINUS (mode, GEN_INT (val),
4650 source)));
4651 return 1;
4654 break;
4656 default:
4657 gcc_unreachable ();
4660 /* If we can do it in one insn get out quickly. */
4661 if (const_ok_for_op (val, code))
4663 if (generate)
4664 emit_constant_insn (cond,
4665 gen_rtx_SET (target,
4666 (source
4667 ? gen_rtx_fmt_ee (code, mode, source,
4668 GEN_INT (val))
4669 : GEN_INT (val))));
4670 return 1;
4673 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4674 insn. */
4675 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4676 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4678 if (generate)
4680 if (mode == SImode && i == 16)
4681 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4682 smaller insn. */
4683 emit_constant_insn (cond,
4684 gen_zero_extendhisi2
4685 (target, gen_lowpart (HImode, source)));
4686 else
4687 /* Extz only supports SImode, but we can coerce the operands
4688 into that mode. */
4689 emit_constant_insn (cond,
4690 gen_extzv_t2 (gen_lowpart (SImode, target),
4691 gen_lowpart (SImode, source),
4692 GEN_INT (i), const0_rtx));
4695 return 1;
4698 /* Calculate a few attributes that may be useful for specific
4699 optimizations. */
4700 /* Count number of leading zeros. */
4701 for (i = 31; i >= 0; i--)
4703 if ((remainder & (1 << i)) == 0)
4704 clear_sign_bit_copies++;
4705 else
4706 break;
4709 /* Count number of leading 1's. */
4710 for (i = 31; i >= 0; i--)
4712 if ((remainder & (1 << i)) != 0)
4713 set_sign_bit_copies++;
4714 else
4715 break;
4718 /* Count number of trailing zero's. */
4719 for (i = 0; i <= 31; i++)
4721 if ((remainder & (1 << i)) == 0)
4722 clear_zero_bit_copies++;
4723 else
4724 break;
4727 /* Count number of trailing 1's. */
4728 for (i = 0; i <= 31; i++)
4730 if ((remainder & (1 << i)) != 0)
4731 set_zero_bit_copies++;
4732 else
4733 break;
4736 switch (code)
4738 case SET:
4739 /* See if we can do this by sign_extending a constant that is known
4740 to be negative. This is a good, way of doing it, since the shift
4741 may well merge into a subsequent insn. */
4742 if (set_sign_bit_copies > 1)
4744 if (const_ok_for_arm
4745 (temp1 = ARM_SIGN_EXTEND (remainder
4746 << (set_sign_bit_copies - 1))))
4748 if (generate)
4750 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4751 emit_constant_insn (cond,
4752 gen_rtx_SET (new_src, GEN_INT (temp1)));
4753 emit_constant_insn (cond,
4754 gen_ashrsi3 (target, new_src,
4755 GEN_INT (set_sign_bit_copies - 1)));
4757 return 2;
4759 /* For an inverted constant, we will need to set the low bits,
4760 these will be shifted out of harm's way. */
4761 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4762 if (const_ok_for_arm (~temp1))
4764 if (generate)
4766 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4767 emit_constant_insn (cond,
4768 gen_rtx_SET (new_src, GEN_INT (temp1)));
4769 emit_constant_insn (cond,
4770 gen_ashrsi3 (target, new_src,
4771 GEN_INT (set_sign_bit_copies - 1)));
4773 return 2;
4777 /* See if we can calculate the value as the difference between two
4778 valid immediates. */
4779 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4781 int topshift = clear_sign_bit_copies & ~1;
4783 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4784 & (0xff000000 >> topshift));
4786 /* If temp1 is zero, then that means the 9 most significant
4787 bits of remainder were 1 and we've caused it to overflow.
4788 When topshift is 0 we don't need to do anything since we
4789 can borrow from 'bit 32'. */
4790 if (temp1 == 0 && topshift != 0)
4791 temp1 = 0x80000000 >> (topshift - 1);
4793 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4795 if (const_ok_for_arm (temp2))
4797 if (generate)
4799 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4800 emit_constant_insn (cond,
4801 gen_rtx_SET (new_src, GEN_INT (temp1)));
4802 emit_constant_insn (cond,
4803 gen_addsi3 (target, new_src,
4804 GEN_INT (-temp2)));
4807 return 2;
4811 /* See if we can generate this by setting the bottom (or the top)
4812 16 bits, and then shifting these into the other half of the
4813 word. We only look for the simplest cases, to do more would cost
4814 too much. Be careful, however, not to generate this when the
4815 alternative would take fewer insns. */
4816 if (val & 0xffff0000)
4818 temp1 = remainder & 0xffff0000;
4819 temp2 = remainder & 0x0000ffff;
4821 /* Overlaps outside this range are best done using other methods. */
4822 for (i = 9; i < 24; i++)
4824 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4825 && !const_ok_for_arm (temp2))
4827 rtx new_src = (subtargets
4828 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4829 : target);
4830 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4831 source, subtargets, generate);
4832 source = new_src;
4833 if (generate)
4834 emit_constant_insn
4835 (cond,
4836 gen_rtx_SET
4837 (target,
4838 gen_rtx_IOR (mode,
4839 gen_rtx_ASHIFT (mode, source,
4840 GEN_INT (i)),
4841 source)));
4842 return insns + 1;
4846 /* Don't duplicate cases already considered. */
4847 for (i = 17; i < 24; i++)
4849 if (((temp1 | (temp1 >> i)) == remainder)
4850 && !const_ok_for_arm (temp1))
4852 rtx new_src = (subtargets
4853 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4854 : target);
4855 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4856 source, subtargets, generate);
4857 source = new_src;
4858 if (generate)
4859 emit_constant_insn
4860 (cond,
4861 gen_rtx_SET (target,
4862 gen_rtx_IOR
4863 (mode,
4864 gen_rtx_LSHIFTRT (mode, source,
4865 GEN_INT (i)),
4866 source)));
4867 return insns + 1;
4871 break;
4873 case IOR:
4874 case XOR:
4875 /* If we have IOR or XOR, and the constant can be loaded in a
4876 single instruction, and we can find a temporary to put it in,
4877 then this can be done in two instructions instead of 3-4. */
4878 if (subtargets
4879 /* TARGET can't be NULL if SUBTARGETS is 0 */
4880 || (reload_completed && !reg_mentioned_p (target, source)))
4882 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4884 if (generate)
4886 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4888 emit_constant_insn (cond,
4889 gen_rtx_SET (sub, GEN_INT (val)));
4890 emit_constant_insn (cond,
4891 gen_rtx_SET (target,
4892 gen_rtx_fmt_ee (code, mode,
4893 source, sub)));
4895 return 2;
4899 if (code == XOR)
4900 break;
4902 /* Convert.
4903 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4904 and the remainder 0s for e.g. 0xfff00000)
4905 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4907 This can be done in 2 instructions by using shifts with mov or mvn.
4908 e.g. for
4909 x = x | 0xfff00000;
4910 we generate.
4911 mvn r0, r0, asl #12
4912 mvn r0, r0, lsr #12 */
4913 if (set_sign_bit_copies > 8
4914 && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
4916 if (generate)
4918 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4919 rtx shift = GEN_INT (set_sign_bit_copies);
4921 emit_constant_insn
4922 (cond,
4923 gen_rtx_SET (sub,
4924 gen_rtx_NOT (mode,
4925 gen_rtx_ASHIFT (mode,
4926 source,
4927 shift))));
4928 emit_constant_insn
4929 (cond,
4930 gen_rtx_SET (target,
4931 gen_rtx_NOT (mode,
4932 gen_rtx_LSHIFTRT (mode, sub,
4933 shift))));
4935 return 2;
4938 /* Convert
4939 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4941 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4943 For eg. r0 = r0 | 0xfff
4944 mvn r0, r0, lsr #12
4945 mvn r0, r0, asl #12
4948 if (set_zero_bit_copies > 8
4949 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4951 if (generate)
4953 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4954 rtx shift = GEN_INT (set_zero_bit_copies);
4956 emit_constant_insn
4957 (cond,
4958 gen_rtx_SET (sub,
4959 gen_rtx_NOT (mode,
4960 gen_rtx_LSHIFTRT (mode,
4961 source,
4962 shift))));
4963 emit_constant_insn
4964 (cond,
4965 gen_rtx_SET (target,
4966 gen_rtx_NOT (mode,
4967 gen_rtx_ASHIFT (mode, sub,
4968 shift))));
4970 return 2;
4973 /* This will never be reached for Thumb2 because orn is a valid
4974 instruction. This is for Thumb1 and the ARM 32 bit cases.
4976 x = y | constant (such that ~constant is a valid constant)
4977 Transform this to
4978 x = ~(~y & ~constant).
4980 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4982 if (generate)
4984 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4985 emit_constant_insn (cond,
4986 gen_rtx_SET (sub,
4987 gen_rtx_NOT (mode, source)));
4988 source = sub;
4989 if (subtargets)
4990 sub = gen_reg_rtx (mode);
4991 emit_constant_insn (cond,
4992 gen_rtx_SET (sub,
4993 gen_rtx_AND (mode, source,
4994 GEN_INT (temp1))));
4995 emit_constant_insn (cond,
4996 gen_rtx_SET (target,
4997 gen_rtx_NOT (mode, sub)));
4999 return 3;
5001 break;
5003 case AND:
5004 /* See if two shifts will do 2 or more insn's worth of work. */
5005 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
5007 HOST_WIDE_INT shift_mask = ((0xffffffff
5008 << (32 - clear_sign_bit_copies))
5009 & 0xffffffff);
5011 if ((remainder | shift_mask) != 0xffffffff)
5013 HOST_WIDE_INT new_val
5014 = ARM_SIGN_EXTEND (remainder | shift_mask);
5016 if (generate)
5018 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5019 insns = arm_gen_constant (AND, SImode, cond, new_val,
5020 new_src, source, subtargets, 1);
5021 source = new_src;
5023 else
5025 rtx targ = subtargets ? NULL_RTX : target;
5026 insns = arm_gen_constant (AND, mode, cond, new_val,
5027 targ, source, subtargets, 0);
5031 if (generate)
5033 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5034 rtx shift = GEN_INT (clear_sign_bit_copies);
5036 emit_insn (gen_ashlsi3 (new_src, source, shift));
5037 emit_insn (gen_lshrsi3 (target, new_src, shift));
5040 return insns + 2;
5043 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
5045 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
5047 if ((remainder | shift_mask) != 0xffffffff)
5049 HOST_WIDE_INT new_val
5050 = ARM_SIGN_EXTEND (remainder | shift_mask);
5051 if (generate)
5053 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5055 insns = arm_gen_constant (AND, mode, cond, new_val,
5056 new_src, source, subtargets, 1);
5057 source = new_src;
5059 else
5061 rtx targ = subtargets ? NULL_RTX : target;
5063 insns = arm_gen_constant (AND, mode, cond, new_val,
5064 targ, source, subtargets, 0);
5068 if (generate)
5070 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5071 rtx shift = GEN_INT (clear_zero_bit_copies);
5073 emit_insn (gen_lshrsi3 (new_src, source, shift));
5074 emit_insn (gen_ashlsi3 (target, new_src, shift));
5077 return insns + 2;
5080 break;
5082 default:
5083 break;
5086 /* Calculate what the instruction sequences would be if we generated it
5087 normally, negated, or inverted. */
5088 if (code == AND)
5089 /* AND cannot be split into multiple insns, so invert and use BIC. */
5090 insns = 99;
5091 else
5092 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
5094 if (can_negate)
5095 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
5096 &neg_immediates);
5097 else
5098 neg_insns = 99;
5100 if (can_invert || final_invert)
5101 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
5102 &inv_immediates);
5103 else
5104 inv_insns = 99;
5106 immediates = &pos_immediates;
5108 /* Is the negated immediate sequence more efficient? */
5109 if (neg_insns < insns && neg_insns <= inv_insns)
5111 insns = neg_insns;
5112 immediates = &neg_immediates;
5114 else
5115 can_negate = 0;
5117 /* Is the inverted immediate sequence more efficient?
5118 We must allow for an extra NOT instruction for XOR operations, although
5119 there is some chance that the final 'mvn' will get optimized later. */
5120 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
5122 insns = inv_insns;
5123 immediates = &inv_immediates;
5125 else
5127 can_invert = 0;
5128 final_invert = 0;
5131 /* Now output the chosen sequence as instructions. */
5132 if (generate)
5134 for (i = 0; i < insns; i++)
5136 rtx new_src, temp1_rtx;
5138 temp1 = immediates->i[i];
5140 if (code == SET || code == MINUS)
5141 new_src = (subtargets ? gen_reg_rtx (mode) : target);
5142 else if ((final_invert || i < (insns - 1)) && subtargets)
5143 new_src = gen_reg_rtx (mode);
5144 else
5145 new_src = target;
5147 if (can_invert)
5148 temp1 = ~temp1;
5149 else if (can_negate)
5150 temp1 = -temp1;
5152 temp1 = trunc_int_for_mode (temp1, mode);
5153 temp1_rtx = GEN_INT (temp1);
5155 if (code == SET)
5157 else if (code == MINUS)
5158 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
5159 else
5160 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
5162 emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
5163 source = new_src;
5165 if (code == SET)
5167 can_negate = can_invert;
5168 can_invert = 0;
5169 code = PLUS;
5171 else if (code == MINUS)
5172 code = PLUS;
5176 if (final_invert)
5178 if (generate)
5179 emit_constant_insn (cond, gen_rtx_SET (target,
5180 gen_rtx_NOT (mode, source)));
5181 insns++;
5184 return insns;
5187 /* Canonicalize a comparison so that we are more likely to recognize it.
5188 This can be done for a few constant compares, where we can make the
5189 immediate value easier to load. */
5191 static void
5192 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5193 bool op0_preserve_value)
5195 machine_mode mode;
5196 unsigned HOST_WIDE_INT i, maxval;
5198 mode = GET_MODE (*op0);
5199 if (mode == VOIDmode)
5200 mode = GET_MODE (*op1);
5202 maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5204 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
5205 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
5206 reversed or (for constant OP1) adjusted to GE/LT. Similarly
5207 for GTU/LEU in Thumb mode. */
5208 if (mode == DImode)
5211 if (*code == GT || *code == LE
5212 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
5214 /* Missing comparison. First try to use an available
5215 comparison. */
5216 if (CONST_INT_P (*op1))
5218 i = INTVAL (*op1);
5219 switch (*code)
5221 case GT:
5222 case LE:
5223 if (i != maxval
5224 && arm_const_double_by_immediates (GEN_INT (i + 1)))
5226 *op1 = GEN_INT (i + 1);
5227 *code = *code == GT ? GE : LT;
5228 return;
5230 break;
5231 case GTU:
5232 case LEU:
5233 if (i != ~((unsigned HOST_WIDE_INT) 0)
5234 && arm_const_double_by_immediates (GEN_INT (i + 1)))
5236 *op1 = GEN_INT (i + 1);
5237 *code = *code == GTU ? GEU : LTU;
5238 return;
5240 break;
5241 default:
5242 gcc_unreachable ();
5246 /* If that did not work, reverse the condition. */
5247 if (!op0_preserve_value)
5249 std::swap (*op0, *op1);
5250 *code = (int)swap_condition ((enum rtx_code)*code);
5253 return;
5256 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5257 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5258 to facilitate possible combining with a cmp into 'ands'. */
5259 if (mode == SImode
5260 && GET_CODE (*op0) == ZERO_EXTEND
5261 && GET_CODE (XEXP (*op0, 0)) == SUBREG
5262 && GET_MODE (XEXP (*op0, 0)) == QImode
5263 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5264 && subreg_lowpart_p (XEXP (*op0, 0))
5265 && *op1 == const0_rtx)
5266 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5267 GEN_INT (255));
5269 /* Comparisons smaller than DImode. Only adjust comparisons against
5270 an out-of-range constant. */
5271 if (!CONST_INT_P (*op1)
5272 || const_ok_for_arm (INTVAL (*op1))
5273 || const_ok_for_arm (- INTVAL (*op1)))
5274 return;
5276 i = INTVAL (*op1);
5278 switch (*code)
5280 case EQ:
5281 case NE:
5282 return;
5284 case GT:
5285 case LE:
5286 if (i != maxval
5287 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5289 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5290 *code = *code == GT ? GE : LT;
5291 return;
5293 break;
5295 case GE:
5296 case LT:
5297 if (i != ~maxval
5298 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5300 *op1 = GEN_INT (i - 1);
5301 *code = *code == GE ? GT : LE;
5302 return;
5304 break;
5306 case GTU:
5307 case LEU:
5308 if (i != ~((unsigned HOST_WIDE_INT) 0)
5309 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5311 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5312 *code = *code == GTU ? GEU : LTU;
5313 return;
5315 break;
5317 case GEU:
5318 case LTU:
5319 if (i != 0
5320 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5322 *op1 = GEN_INT (i - 1);
5323 *code = *code == GEU ? GTU : LEU;
5324 return;
5326 break;
5328 default:
5329 gcc_unreachable ();
5334 /* Define how to find the value returned by a function. */
5336 static rtx
5337 arm_function_value(const_tree type, const_tree func,
5338 bool outgoing ATTRIBUTE_UNUSED)
5340 machine_mode mode;
5341 int unsignedp ATTRIBUTE_UNUSED;
5342 rtx r ATTRIBUTE_UNUSED;
5344 mode = TYPE_MODE (type);
5346 if (TARGET_AAPCS_BASED)
5347 return aapcs_allocate_return_reg (mode, type, func);
5349 /* Promote integer types. */
5350 if (INTEGRAL_TYPE_P (type))
5351 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5353 /* Promotes small structs returned in a register to full-word size
5354 for big-endian AAPCS. */
5355 if (arm_return_in_msb (type))
5357 HOST_WIDE_INT size = int_size_in_bytes (type);
5358 if (size % UNITS_PER_WORD != 0)
5360 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5361 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5365 return arm_libcall_value_1 (mode);
5368 /* libcall hashtable helpers. */
5370 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5372 static inline hashval_t hash (const rtx_def *);
5373 static inline bool equal (const rtx_def *, const rtx_def *);
5374 static inline void remove (rtx_def *);
5377 inline bool
5378 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5380 return rtx_equal_p (p1, p2);
5383 inline hashval_t
5384 libcall_hasher::hash (const rtx_def *p1)
5386 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5389 typedef hash_table<libcall_hasher> libcall_table_type;
5391 static void
5392 add_libcall (libcall_table_type *htab, rtx libcall)
5394 *htab->find_slot (libcall, INSERT) = libcall;
5397 static bool
5398 arm_libcall_uses_aapcs_base (const_rtx libcall)
5400 static bool init_done = false;
5401 static libcall_table_type *libcall_htab = NULL;
5403 if (!init_done)
5405 init_done = true;
5407 libcall_htab = new libcall_table_type (31);
5408 add_libcall (libcall_htab,
5409 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5410 add_libcall (libcall_htab,
5411 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5412 add_libcall (libcall_htab,
5413 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5414 add_libcall (libcall_htab,
5415 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5417 add_libcall (libcall_htab,
5418 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5419 add_libcall (libcall_htab,
5420 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5421 add_libcall (libcall_htab,
5422 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5423 add_libcall (libcall_htab,
5424 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5426 add_libcall (libcall_htab,
5427 convert_optab_libfunc (sext_optab, SFmode, HFmode));
5428 add_libcall (libcall_htab,
5429 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5430 add_libcall (libcall_htab,
5431 convert_optab_libfunc (sfix_optab, SImode, DFmode));
5432 add_libcall (libcall_htab,
5433 convert_optab_libfunc (ufix_optab, SImode, DFmode));
5434 add_libcall (libcall_htab,
5435 convert_optab_libfunc (sfix_optab, DImode, DFmode));
5436 add_libcall (libcall_htab,
5437 convert_optab_libfunc (ufix_optab, DImode, DFmode));
5438 add_libcall (libcall_htab,
5439 convert_optab_libfunc (sfix_optab, DImode, SFmode));
5440 add_libcall (libcall_htab,
5441 convert_optab_libfunc (ufix_optab, DImode, SFmode));
5443 /* Values from double-precision helper functions are returned in core
5444 registers if the selected core only supports single-precision
5445 arithmetic, even if we are using the hard-float ABI. The same is
5446 true for single-precision helpers, but we will never be using the
5447 hard-float ABI on a CPU which doesn't support single-precision
5448 operations in hardware. */
5449 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5450 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5451 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5452 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5453 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5454 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5455 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5456 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5457 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5458 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5459 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5460 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5461 SFmode));
5462 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5463 DFmode));
5464 add_libcall (libcall_htab,
5465 convert_optab_libfunc (trunc_optab, HFmode, DFmode));
5468 return libcall && libcall_htab->find (libcall) != NULL;
5471 static rtx
5472 arm_libcall_value_1 (machine_mode mode)
5474 if (TARGET_AAPCS_BASED)
5475 return aapcs_libcall_value (mode);
5476 else if (TARGET_IWMMXT_ABI
5477 && arm_vector_mode_supported_p (mode))
5478 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5479 else
5480 return gen_rtx_REG (mode, ARG_REGISTER (1));
5483 /* Define how to find the value returned by a library function
5484 assuming the value has mode MODE. */
5486 static rtx
5487 arm_libcall_value (machine_mode mode, const_rtx libcall)
5489 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5490 && GET_MODE_CLASS (mode) == MODE_FLOAT)
5492 /* The following libcalls return their result in integer registers,
5493 even though they return a floating point value. */
5494 if (arm_libcall_uses_aapcs_base (libcall))
5495 return gen_rtx_REG (mode, ARG_REGISTER(1));
5499 return arm_libcall_value_1 (mode);
5502 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5504 static bool
5505 arm_function_value_regno_p (const unsigned int regno)
5507 if (regno == ARG_REGISTER (1)
5508 || (TARGET_32BIT
5509 && TARGET_AAPCS_BASED
5510 && TARGET_HARD_FLOAT
5511 && regno == FIRST_VFP_REGNUM)
5512 || (TARGET_IWMMXT_ABI
5513 && regno == FIRST_IWMMXT_REGNUM))
5514 return true;
5516 return false;
5519 /* Determine the amount of memory needed to store the possible return
5520 registers of an untyped call. */
5522 arm_apply_result_size (void)
5524 int size = 16;
5526 if (TARGET_32BIT)
5528 if (TARGET_HARD_FLOAT_ABI)
5529 size += 32;
5530 if (TARGET_IWMMXT_ABI)
5531 size += 8;
5534 return size;
5537 /* Decide whether TYPE should be returned in memory (true)
5538 or in a register (false). FNTYPE is the type of the function making
5539 the call. */
5540 static bool
5541 arm_return_in_memory (const_tree type, const_tree fntype)
5543 HOST_WIDE_INT size;
5545 size = int_size_in_bytes (type); /* Negative if not fixed size. */
5547 if (TARGET_AAPCS_BASED)
5549 /* Simple, non-aggregate types (ie not including vectors and
5550 complex) are always returned in a register (or registers).
5551 We don't care about which register here, so we can short-cut
5552 some of the detail. */
5553 if (!AGGREGATE_TYPE_P (type)
5554 && TREE_CODE (type) != VECTOR_TYPE
5555 && TREE_CODE (type) != COMPLEX_TYPE)
5556 return false;
5558 /* Any return value that is no larger than one word can be
5559 returned in r0. */
5560 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5561 return false;
5563 /* Check any available co-processors to see if they accept the
5564 type as a register candidate (VFP, for example, can return
5565 some aggregates in consecutive registers). These aren't
5566 available if the call is variadic. */
5567 if (aapcs_select_return_coproc (type, fntype) >= 0)
5568 return false;
5570 /* Vector values should be returned using ARM registers, not
5571 memory (unless they're over 16 bytes, which will break since
5572 we only have four call-clobbered registers to play with). */
5573 if (TREE_CODE (type) == VECTOR_TYPE)
5574 return (size < 0 || size > (4 * UNITS_PER_WORD));
5576 /* The rest go in memory. */
5577 return true;
5580 if (TREE_CODE (type) == VECTOR_TYPE)
5581 return (size < 0 || size > (4 * UNITS_PER_WORD));
5583 if (!AGGREGATE_TYPE_P (type) &&
5584 (TREE_CODE (type) != VECTOR_TYPE))
5585 /* All simple types are returned in registers. */
5586 return false;
5588 if (arm_abi != ARM_ABI_APCS)
5590 /* ATPCS and later return aggregate types in memory only if they are
5591 larger than a word (or are variable size). */
5592 return (size < 0 || size > UNITS_PER_WORD);
5595 /* For the arm-wince targets we choose to be compatible with Microsoft's
5596 ARM and Thumb compilers, which always return aggregates in memory. */
5597 #ifndef ARM_WINCE
5598 /* All structures/unions bigger than one word are returned in memory.
5599 Also catch the case where int_size_in_bytes returns -1. In this case
5600 the aggregate is either huge or of variable size, and in either case
5601 we will want to return it via memory and not in a register. */
5602 if (size < 0 || size > UNITS_PER_WORD)
5603 return true;
5605 if (TREE_CODE (type) == RECORD_TYPE)
5607 tree field;
5609 /* For a struct the APCS says that we only return in a register
5610 if the type is 'integer like' and every addressable element
5611 has an offset of zero. For practical purposes this means
5612 that the structure can have at most one non bit-field element
5613 and that this element must be the first one in the structure. */
5615 /* Find the first field, ignoring non FIELD_DECL things which will
5616 have been created by C++. */
5617 for (field = TYPE_FIELDS (type);
5618 field && TREE_CODE (field) != FIELD_DECL;
5619 field = DECL_CHAIN (field))
5620 continue;
5622 if (field == NULL)
5623 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5625 /* Check that the first field is valid for returning in a register. */
5627 /* ... Floats are not allowed */
5628 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5629 return true;
5631 /* ... Aggregates that are not themselves valid for returning in
5632 a register are not allowed. */
5633 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5634 return true;
5636 /* Now check the remaining fields, if any. Only bitfields are allowed,
5637 since they are not addressable. */
5638 for (field = DECL_CHAIN (field);
5639 field;
5640 field = DECL_CHAIN (field))
5642 if (TREE_CODE (field) != FIELD_DECL)
5643 continue;
5645 if (!DECL_BIT_FIELD_TYPE (field))
5646 return true;
5649 return false;
5652 if (TREE_CODE (type) == UNION_TYPE)
5654 tree field;
5656 /* Unions can be returned in registers if every element is
5657 integral, or can be returned in an integer register. */
5658 for (field = TYPE_FIELDS (type);
5659 field;
5660 field = DECL_CHAIN (field))
5662 if (TREE_CODE (field) != FIELD_DECL)
5663 continue;
5665 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5666 return true;
5668 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5669 return true;
5672 return false;
5674 #endif /* not ARM_WINCE */
5676 /* Return all other types in memory. */
5677 return true;
5680 const struct pcs_attribute_arg
5682 const char *arg;
5683 enum arm_pcs value;
5684 } pcs_attribute_args[] =
5686 {"aapcs", ARM_PCS_AAPCS},
5687 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5688 #if 0
5689 /* We could recognize these, but changes would be needed elsewhere
5690 * to implement them. */
5691 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5692 {"atpcs", ARM_PCS_ATPCS},
5693 {"apcs", ARM_PCS_APCS},
5694 #endif
5695 {NULL, ARM_PCS_UNKNOWN}
5698 static enum arm_pcs
5699 arm_pcs_from_attribute (tree attr)
5701 const struct pcs_attribute_arg *ptr;
5702 const char *arg;
5704 /* Get the value of the argument. */
5705 if (TREE_VALUE (attr) == NULL_TREE
5706 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5707 return ARM_PCS_UNKNOWN;
5709 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5711 /* Check it against the list of known arguments. */
5712 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5713 if (streq (arg, ptr->arg))
5714 return ptr->value;
5716 /* An unrecognized interrupt type. */
5717 return ARM_PCS_UNKNOWN;
5720 /* Get the PCS variant to use for this call. TYPE is the function's type
5721 specification, DECL is the specific declartion. DECL may be null if
5722 the call could be indirect or if this is a library call. */
5723 static enum arm_pcs
5724 arm_get_pcs_model (const_tree type, const_tree decl)
5726 bool user_convention = false;
5727 enum arm_pcs user_pcs = arm_pcs_default;
5728 tree attr;
5730 gcc_assert (type);
5732 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5733 if (attr)
5735 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5736 user_convention = true;
5739 if (TARGET_AAPCS_BASED)
5741 /* Detect varargs functions. These always use the base rules
5742 (no argument is ever a candidate for a co-processor
5743 register). */
5744 bool base_rules = stdarg_p (type);
5746 if (user_convention)
5748 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5749 sorry ("non-AAPCS derived PCS variant");
5750 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5751 error ("variadic functions must use the base AAPCS variant");
5754 if (base_rules)
5755 return ARM_PCS_AAPCS;
5756 else if (user_convention)
5757 return user_pcs;
5758 else if (decl && flag_unit_at_a_time)
5760 /* Local functions never leak outside this compilation unit,
5761 so we are free to use whatever conventions are
5762 appropriate. */
5763 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5764 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5765 if (i && i->local)
5766 return ARM_PCS_AAPCS_LOCAL;
5769 else if (user_convention && user_pcs != arm_pcs_default)
5770 sorry ("PCS variant");
5772 /* For everything else we use the target's default. */
5773 return arm_pcs_default;
5777 static void
5778 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5779 const_tree fntype ATTRIBUTE_UNUSED,
5780 rtx libcall ATTRIBUTE_UNUSED,
5781 const_tree fndecl ATTRIBUTE_UNUSED)
5783 /* Record the unallocated VFP registers. */
5784 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5785 pcum->aapcs_vfp_reg_alloc = 0;
5788 /* Walk down the type tree of TYPE counting consecutive base elements.
5789 If *MODEP is VOIDmode, then set it to the first valid floating point
5790 type. If a non-floating point type is found, or if a floating point
5791 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5792 otherwise return the count in the sub-tree. */
5793 static int
5794 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5796 machine_mode mode;
5797 HOST_WIDE_INT size;
5799 switch (TREE_CODE (type))
5801 case REAL_TYPE:
5802 mode = TYPE_MODE (type);
5803 if (mode != DFmode && mode != SFmode && mode != HFmode)
5804 return -1;
5806 if (*modep == VOIDmode)
5807 *modep = mode;
5809 if (*modep == mode)
5810 return 1;
5812 break;
5814 case COMPLEX_TYPE:
5815 mode = TYPE_MODE (TREE_TYPE (type));
5816 if (mode != DFmode && mode != SFmode)
5817 return -1;
5819 if (*modep == VOIDmode)
5820 *modep = mode;
5822 if (*modep == mode)
5823 return 2;
5825 break;
5827 case VECTOR_TYPE:
5828 /* Use V2SImode and V4SImode as representatives of all 64-bit
5829 and 128-bit vector types, whether or not those modes are
5830 supported with the present options. */
5831 size = int_size_in_bytes (type);
5832 switch (size)
5834 case 8:
5835 mode = V2SImode;
5836 break;
5837 case 16:
5838 mode = V4SImode;
5839 break;
5840 default:
5841 return -1;
5844 if (*modep == VOIDmode)
5845 *modep = mode;
5847 /* Vector modes are considered to be opaque: two vectors are
5848 equivalent for the purposes of being homogeneous aggregates
5849 if they are the same size. */
5850 if (*modep == mode)
5851 return 1;
5853 break;
5855 case ARRAY_TYPE:
5857 int count;
5858 tree index = TYPE_DOMAIN (type);
5860 /* Can't handle incomplete types nor sizes that are not
5861 fixed. */
5862 if (!COMPLETE_TYPE_P (type)
5863 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5864 return -1;
5866 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5867 if (count == -1
5868 || !index
5869 || !TYPE_MAX_VALUE (index)
5870 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5871 || !TYPE_MIN_VALUE (index)
5872 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5873 || count < 0)
5874 return -1;
5876 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5877 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5879 /* There must be no padding. */
5880 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5881 return -1;
5883 return count;
5886 case RECORD_TYPE:
5888 int count = 0;
5889 int sub_count;
5890 tree field;
5892 /* Can't handle incomplete types nor sizes that are not
5893 fixed. */
5894 if (!COMPLETE_TYPE_P (type)
5895 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5896 return -1;
5898 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5900 if (TREE_CODE (field) != FIELD_DECL)
5901 continue;
5903 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5904 if (sub_count < 0)
5905 return -1;
5906 count += sub_count;
5909 /* There must be no padding. */
5910 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5911 return -1;
5913 return count;
5916 case UNION_TYPE:
5917 case QUAL_UNION_TYPE:
5919 /* These aren't very interesting except in a degenerate case. */
5920 int count = 0;
5921 int sub_count;
5922 tree field;
5924 /* Can't handle incomplete types nor sizes that are not
5925 fixed. */
5926 if (!COMPLETE_TYPE_P (type)
5927 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5928 return -1;
5930 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5932 if (TREE_CODE (field) != FIELD_DECL)
5933 continue;
5935 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5936 if (sub_count < 0)
5937 return -1;
5938 count = count > sub_count ? count : sub_count;
5941 /* There must be no padding. */
5942 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5943 return -1;
5945 return count;
5948 default:
5949 break;
5952 return -1;
5955 /* Return true if PCS_VARIANT should use VFP registers. */
5956 static bool
5957 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5959 if (pcs_variant == ARM_PCS_AAPCS_VFP)
5961 static bool seen_thumb1_vfp = false;
5963 if (TARGET_THUMB1 && !seen_thumb1_vfp)
5965 sorry ("Thumb-1 hard-float VFP ABI");
5966 /* sorry() is not immediately fatal, so only display this once. */
5967 seen_thumb1_vfp = true;
5970 return true;
5973 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5974 return false;
5976 return (TARGET_32BIT && TARGET_HARD_FLOAT &&
5977 (TARGET_VFP_DOUBLE || !is_double));
5980 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5981 suitable for passing or returning in VFP registers for the PCS
5982 variant selected. If it is, then *BASE_MODE is updated to contain
5983 a machine mode describing each element of the argument's type and
5984 *COUNT to hold the number of such elements. */
5985 static bool
5986 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5987 machine_mode mode, const_tree type,
5988 machine_mode *base_mode, int *count)
5990 machine_mode new_mode = VOIDmode;
5992 /* If we have the type information, prefer that to working things
5993 out from the mode. */
5994 if (type)
5996 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5998 if (ag_count > 0 && ag_count <= 4)
5999 *count = ag_count;
6000 else
6001 return false;
6003 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
6004 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6005 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6007 *count = 1;
6008 new_mode = mode;
6010 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6012 *count = 2;
6013 new_mode = (mode == DCmode ? DFmode : SFmode);
6015 else
6016 return false;
6019 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
6020 return false;
6022 *base_mode = new_mode;
6023 return true;
6026 static bool
6027 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
6028 machine_mode mode, const_tree type)
6030 int count ATTRIBUTE_UNUSED;
6031 machine_mode ag_mode ATTRIBUTE_UNUSED;
6033 if (!use_vfp_abi (pcs_variant, false))
6034 return false;
6035 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6036 &ag_mode, &count);
6039 static bool
6040 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6041 const_tree type)
6043 if (!use_vfp_abi (pcum->pcs_variant, false))
6044 return false;
6046 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
6047 &pcum->aapcs_vfp_rmode,
6048 &pcum->aapcs_vfp_rcount);
6051 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
6052 for the behaviour of this function. */
6054 static bool
6055 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6056 const_tree type ATTRIBUTE_UNUSED)
6058 int rmode_size
6059 = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
6060 int shift = rmode_size / GET_MODE_SIZE (SFmode);
6061 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
6062 int regno;
6064 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
6065 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
6067 pcum->aapcs_vfp_reg_alloc = mask << regno;
6068 if (mode == BLKmode
6069 || (mode == TImode && ! TARGET_NEON)
6070 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
6072 int i;
6073 int rcount = pcum->aapcs_vfp_rcount;
6074 int rshift = shift;
6075 machine_mode rmode = pcum->aapcs_vfp_rmode;
6076 rtx par;
6077 if (!TARGET_NEON)
6079 /* Avoid using unsupported vector modes. */
6080 if (rmode == V2SImode)
6081 rmode = DImode;
6082 else if (rmode == V4SImode)
6084 rmode = DImode;
6085 rcount *= 2;
6086 rshift /= 2;
6089 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
6090 for (i = 0; i < rcount; i++)
6092 rtx tmp = gen_rtx_REG (rmode,
6093 FIRST_VFP_REGNUM + regno + i * rshift);
6094 tmp = gen_rtx_EXPR_LIST
6095 (VOIDmode, tmp,
6096 GEN_INT (i * GET_MODE_SIZE (rmode)));
6097 XVECEXP (par, 0, i) = tmp;
6100 pcum->aapcs_reg = par;
6102 else
6103 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
6104 return true;
6106 return false;
6109 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
6110 comment there for the behaviour of this function. */
6112 static rtx
6113 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
6114 machine_mode mode,
6115 const_tree type ATTRIBUTE_UNUSED)
6117 if (!use_vfp_abi (pcs_variant, false))
6118 return NULL;
6120 if (mode == BLKmode
6121 || (GET_MODE_CLASS (mode) == MODE_INT
6122 && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
6123 && !TARGET_NEON))
6125 int count;
6126 machine_mode ag_mode;
6127 int i;
6128 rtx par;
6129 int shift;
6131 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6132 &ag_mode, &count);
6134 if (!TARGET_NEON)
6136 if (ag_mode == V2SImode)
6137 ag_mode = DImode;
6138 else if (ag_mode == V4SImode)
6140 ag_mode = DImode;
6141 count *= 2;
6144 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
6145 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
6146 for (i = 0; i < count; i++)
6148 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
6149 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
6150 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
6151 XVECEXP (par, 0, i) = tmp;
6154 return par;
6157 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
6160 static void
6161 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
6162 machine_mode mode ATTRIBUTE_UNUSED,
6163 const_tree type ATTRIBUTE_UNUSED)
6165 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
6166 pcum->aapcs_vfp_reg_alloc = 0;
6167 return;
6170 #define AAPCS_CP(X) \
6172 aapcs_ ## X ## _cum_init, \
6173 aapcs_ ## X ## _is_call_candidate, \
6174 aapcs_ ## X ## _allocate, \
6175 aapcs_ ## X ## _is_return_candidate, \
6176 aapcs_ ## X ## _allocate_return_reg, \
6177 aapcs_ ## X ## _advance \
6180 /* Table of co-processors that can be used to pass arguments in
6181 registers. Idealy no arugment should be a candidate for more than
6182 one co-processor table entry, but the table is processed in order
6183 and stops after the first match. If that entry then fails to put
6184 the argument into a co-processor register, the argument will go on
6185 the stack. */
6186 static struct
6188 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
6189 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
6191 /* Return true if an argument of mode MODE (or type TYPE if MODE is
6192 BLKmode) is a candidate for this co-processor's registers; this
6193 function should ignore any position-dependent state in
6194 CUMULATIVE_ARGS and only use call-type dependent information. */
6195 bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6197 /* Return true if the argument does get a co-processor register; it
6198 should set aapcs_reg to an RTX of the register allocated as is
6199 required for a return from FUNCTION_ARG. */
6200 bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6202 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6203 be returned in this co-processor's registers. */
6204 bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6206 /* Allocate and return an RTX element to hold the return type of a call. This
6207 routine must not fail and will only be called if is_return_candidate
6208 returned true with the same parameters. */
6209 rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6211 /* Finish processing this argument and prepare to start processing
6212 the next one. */
6213 void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6214 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6216 AAPCS_CP(vfp)
6219 #undef AAPCS_CP
6221 static int
6222 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6223 const_tree type)
6225 int i;
6227 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6228 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6229 return i;
6231 return -1;
6234 static int
6235 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6237 /* We aren't passed a decl, so we can't check that a call is local.
6238 However, it isn't clear that that would be a win anyway, since it
6239 might limit some tail-calling opportunities. */
6240 enum arm_pcs pcs_variant;
6242 if (fntype)
6244 const_tree fndecl = NULL_TREE;
6246 if (TREE_CODE (fntype) == FUNCTION_DECL)
6248 fndecl = fntype;
6249 fntype = TREE_TYPE (fntype);
6252 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6254 else
6255 pcs_variant = arm_pcs_default;
6257 if (pcs_variant != ARM_PCS_AAPCS)
6259 int i;
6261 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6262 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6263 TYPE_MODE (type),
6264 type))
6265 return i;
6267 return -1;
6270 static rtx
6271 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6272 const_tree fntype)
6274 /* We aren't passed a decl, so we can't check that a call is local.
6275 However, it isn't clear that that would be a win anyway, since it
6276 might limit some tail-calling opportunities. */
6277 enum arm_pcs pcs_variant;
6278 int unsignedp ATTRIBUTE_UNUSED;
6280 if (fntype)
6282 const_tree fndecl = NULL_TREE;
6284 if (TREE_CODE (fntype) == FUNCTION_DECL)
6286 fndecl = fntype;
6287 fntype = TREE_TYPE (fntype);
6290 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6292 else
6293 pcs_variant = arm_pcs_default;
6295 /* Promote integer types. */
6296 if (type && INTEGRAL_TYPE_P (type))
6297 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
6299 if (pcs_variant != ARM_PCS_AAPCS)
6301 int i;
6303 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6304 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6305 type))
6306 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6307 mode, type);
6310 /* Promotes small structs returned in a register to full-word size
6311 for big-endian AAPCS. */
6312 if (type && arm_return_in_msb (type))
6314 HOST_WIDE_INT size = int_size_in_bytes (type);
6315 if (size % UNITS_PER_WORD != 0)
6317 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6318 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
6322 return gen_rtx_REG (mode, R0_REGNUM);
6325 static rtx
6326 aapcs_libcall_value (machine_mode mode)
6328 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6329 && GET_MODE_SIZE (mode) <= 4)
6330 mode = SImode;
6332 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
6335 /* Lay out a function argument using the AAPCS rules. The rule
6336 numbers referred to here are those in the AAPCS. */
6337 static void
6338 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
6339 const_tree type, bool named)
6341 int nregs, nregs2;
6342 int ncrn;
6344 /* We only need to do this once per argument. */
6345 if (pcum->aapcs_arg_processed)
6346 return;
6348 pcum->aapcs_arg_processed = true;
6350 /* Special case: if named is false then we are handling an incoming
6351 anonymous argument which is on the stack. */
6352 if (!named)
6353 return;
6355 /* Is this a potential co-processor register candidate? */
6356 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6358 int slot = aapcs_select_call_coproc (pcum, mode, type);
6359 pcum->aapcs_cprc_slot = slot;
6361 /* We don't have to apply any of the rules from part B of the
6362 preparation phase, these are handled elsewhere in the
6363 compiler. */
6365 if (slot >= 0)
6367 /* A Co-processor register candidate goes either in its own
6368 class of registers or on the stack. */
6369 if (!pcum->aapcs_cprc_failed[slot])
6371 /* C1.cp - Try to allocate the argument to co-processor
6372 registers. */
6373 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
6374 return;
6376 /* C2.cp - Put the argument on the stack and note that we
6377 can't assign any more candidates in this slot. We also
6378 need to note that we have allocated stack space, so that
6379 we won't later try to split a non-cprc candidate between
6380 core registers and the stack. */
6381 pcum->aapcs_cprc_failed[slot] = true;
6382 pcum->can_split = false;
6385 /* We didn't get a register, so this argument goes on the
6386 stack. */
6387 gcc_assert (pcum->can_split == false);
6388 return;
6392 /* C3 - For double-word aligned arguments, round the NCRN up to the
6393 next even number. */
6394 ncrn = pcum->aapcs_ncrn;
6395 if (ncrn & 1)
6397 int res = arm_needs_doubleword_align (mode, type);
6398 /* Only warn during RTL expansion of call stmts, otherwise we would
6399 warn e.g. during gimplification even on functions that will be
6400 always inlined, and we'd warn multiple times. Don't warn when
6401 called in expand_function_start either, as we warn instead in
6402 arm_function_arg_boundary in that case. */
6403 if (res < 0 && warn_psabi && currently_expanding_gimple_stmt)
6404 inform (input_location, "parameter passing for argument of type "
6405 "%qT changed in GCC 7.1", type);
6406 else if (res > 0)
6407 ncrn++;
6410 nregs = ARM_NUM_REGS2(mode, type);
6412 /* Sigh, this test should really assert that nregs > 0, but a GCC
6413 extension allows empty structs and then gives them empty size; it
6414 then allows such a structure to be passed by value. For some of
6415 the code below we have to pretend that such an argument has
6416 non-zero size so that we 'locate' it correctly either in
6417 registers or on the stack. */
6418 gcc_assert (nregs >= 0);
6420 nregs2 = nregs ? nregs : 1;
6422 /* C4 - Argument fits entirely in core registers. */
6423 if (ncrn + nregs2 <= NUM_ARG_REGS)
6425 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6426 pcum->aapcs_next_ncrn = ncrn + nregs;
6427 return;
6430 /* C5 - Some core registers left and there are no arguments already
6431 on the stack: split this argument between the remaining core
6432 registers and the stack. */
6433 if (ncrn < NUM_ARG_REGS && pcum->can_split)
6435 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6436 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6437 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
6438 return;
6441 /* C6 - NCRN is set to 4. */
6442 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6444 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
6445 return;
6448 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6449 for a call to a function whose data type is FNTYPE.
6450 For a library call, FNTYPE is NULL. */
6451 void
6452 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
6453 rtx libname,
6454 tree fndecl ATTRIBUTE_UNUSED)
6456 /* Long call handling. */
6457 if (fntype)
6458 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
6459 else
6460 pcum->pcs_variant = arm_pcs_default;
6462 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6464 if (arm_libcall_uses_aapcs_base (libname))
6465 pcum->pcs_variant = ARM_PCS_AAPCS;
6467 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
6468 pcum->aapcs_reg = NULL_RTX;
6469 pcum->aapcs_partial = 0;
6470 pcum->aapcs_arg_processed = false;
6471 pcum->aapcs_cprc_slot = -1;
6472 pcum->can_split = true;
6474 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6476 int i;
6478 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6480 pcum->aapcs_cprc_failed[i] = false;
6481 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
6484 return;
6487 /* Legacy ABIs */
6489 /* On the ARM, the offset starts at 0. */
6490 pcum->nregs = 0;
6491 pcum->iwmmxt_nregs = 0;
6492 pcum->can_split = true;
6494 /* Varargs vectors are treated the same as long long.
6495 named_count avoids having to change the way arm handles 'named' */
6496 pcum->named_count = 0;
6497 pcum->nargs = 0;
6499 if (TARGET_REALLY_IWMMXT && fntype)
6501 tree fn_arg;
6503 for (fn_arg = TYPE_ARG_TYPES (fntype);
6504 fn_arg;
6505 fn_arg = TREE_CHAIN (fn_arg))
6506 pcum->named_count += 1;
6508 if (! pcum->named_count)
6509 pcum->named_count = INT_MAX;
6513 /* Return 1 if double word alignment is required for argument passing.
6514 Return -1 if double word alignment used to be required for argument
6515 passing before PR77728 ABI fix, but is not required anymore.
6516 Return 0 if double word alignment is not required and wasn't requried
6517 before either. */
6518 static int
6519 arm_needs_doubleword_align (machine_mode mode, const_tree type)
6521 if (!type)
6522 return GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY;
6524 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
6525 if (!AGGREGATE_TYPE_P (type))
6526 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
6528 /* Array types: Use member alignment of element type. */
6529 if (TREE_CODE (type) == ARRAY_TYPE)
6530 return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
6532 int ret = 0;
6533 /* Record/aggregate types: Use greatest member alignment of any member. */
6534 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6535 if (DECL_ALIGN (field) > PARM_BOUNDARY)
6537 if (TREE_CODE (field) == FIELD_DECL)
6538 return 1;
6539 else
6540 /* Before PR77728 fix, we were incorrectly considering also
6541 other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
6542 Make sure we can warn about that with -Wpsabi. */
6543 ret = -1;
6546 return ret;
6550 /* Determine where to put an argument to a function.
6551 Value is zero to push the argument on the stack,
6552 or a hard register in which to store the argument.
6554 MODE is the argument's machine mode.
6555 TYPE is the data type of the argument (as a tree).
6556 This is null for libcalls where that information may
6557 not be available.
6558 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6559 the preceding args and about the function being called.
6560 NAMED is nonzero if this argument is a named parameter
6561 (otherwise it is an extra parameter matching an ellipsis).
6563 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6564 other arguments are passed on the stack. If (NAMED == 0) (which happens
6565 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6566 defined), say it is passed in the stack (function_prologue will
6567 indeed make it pass in the stack if necessary). */
6569 static rtx
6570 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
6571 const_tree type, bool named)
6573 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6574 int nregs;
6576 /* Handle the special case quickly. Pick an arbitrary value for op2 of
6577 a call insn (op3 of a call_value insn). */
6578 if (mode == VOIDmode)
6579 return const0_rtx;
6581 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6583 aapcs_layout_arg (pcum, mode, type, named);
6584 return pcum->aapcs_reg;
6587 /* Varargs vectors are treated the same as long long.
6588 named_count avoids having to change the way arm handles 'named' */
6589 if (TARGET_IWMMXT_ABI
6590 && arm_vector_mode_supported_p (mode)
6591 && pcum->named_count > pcum->nargs + 1)
6593 if (pcum->iwmmxt_nregs <= 9)
6594 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
6595 else
6597 pcum->can_split = false;
6598 return NULL_RTX;
6602 /* Put doubleword aligned quantities in even register pairs. */
6603 if ((pcum->nregs & 1) && ARM_DOUBLEWORD_ALIGN)
6605 int res = arm_needs_doubleword_align (mode, type);
6606 if (res < 0 && warn_psabi)
6607 inform (input_location, "parameter passing for argument of type "
6608 "%qT changed in GCC 7.1", type);
6609 else if (res > 0)
6610 pcum->nregs++;
6613 /* Only allow splitting an arg between regs and memory if all preceding
6614 args were allocated to regs. For args passed by reference we only count
6615 the reference pointer. */
6616 if (pcum->can_split)
6617 nregs = 1;
6618 else
6619 nregs = ARM_NUM_REGS2 (mode, type);
6621 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
6622 return NULL_RTX;
6624 return gen_rtx_REG (mode, pcum->nregs);
6627 static unsigned int
6628 arm_function_arg_boundary (machine_mode mode, const_tree type)
6630 if (!ARM_DOUBLEWORD_ALIGN)
6631 return PARM_BOUNDARY;
6633 int res = arm_needs_doubleword_align (mode, type);
6634 if (res < 0 && warn_psabi)
6635 inform (input_location, "parameter passing for argument of type %qT "
6636 "changed in GCC 7.1", type);
6638 return res > 0 ? DOUBLEWORD_ALIGNMENT : PARM_BOUNDARY;
6641 static int
6642 arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
6643 tree type, bool named)
6645 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6646 int nregs = pcum->nregs;
6648 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6650 aapcs_layout_arg (pcum, mode, type, named);
6651 return pcum->aapcs_partial;
6654 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6655 return 0;
6657 if (NUM_ARG_REGS > nregs
6658 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6659 && pcum->can_split)
6660 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6662 return 0;
6665 /* Update the data in PCUM to advance over an argument
6666 of mode MODE and data type TYPE.
6667 (TYPE is null for libcalls where that information may not be available.) */
6669 static void
6670 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6671 const_tree type, bool named)
6673 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6675 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6677 aapcs_layout_arg (pcum, mode, type, named);
6679 if (pcum->aapcs_cprc_slot >= 0)
6681 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6682 type);
6683 pcum->aapcs_cprc_slot = -1;
6686 /* Generic stuff. */
6687 pcum->aapcs_arg_processed = false;
6688 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6689 pcum->aapcs_reg = NULL_RTX;
6690 pcum->aapcs_partial = 0;
6692 else
6694 pcum->nargs += 1;
6695 if (arm_vector_mode_supported_p (mode)
6696 && pcum->named_count > pcum->nargs
6697 && TARGET_IWMMXT_ABI)
6698 pcum->iwmmxt_nregs += 1;
6699 else
6700 pcum->nregs += ARM_NUM_REGS2 (mode, type);
6704 /* Variable sized types are passed by reference. This is a GCC
6705 extension to the ARM ABI. */
6707 static bool
6708 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6709 machine_mode mode ATTRIBUTE_UNUSED,
6710 const_tree type, bool named ATTRIBUTE_UNUSED)
6712 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6715 /* Encode the current state of the #pragma [no_]long_calls. */
6716 typedef enum
6718 OFF, /* No #pragma [no_]long_calls is in effect. */
6719 LONG, /* #pragma long_calls is in effect. */
6720 SHORT /* #pragma no_long_calls is in effect. */
6721 } arm_pragma_enum;
6723 static arm_pragma_enum arm_pragma_long_calls = OFF;
6725 void
6726 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6728 arm_pragma_long_calls = LONG;
6731 void
6732 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6734 arm_pragma_long_calls = SHORT;
6737 void
6738 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6740 arm_pragma_long_calls = OFF;
6743 /* Handle an attribute requiring a FUNCTION_DECL;
6744 arguments as in struct attribute_spec.handler. */
6745 static tree
6746 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6747 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6749 if (TREE_CODE (*node) != FUNCTION_DECL)
6751 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6752 name);
6753 *no_add_attrs = true;
6756 return NULL_TREE;
6759 /* Handle an "interrupt" or "isr" attribute;
6760 arguments as in struct attribute_spec.handler. */
6761 static tree
6762 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6763 bool *no_add_attrs)
6765 if (DECL_P (*node))
6767 if (TREE_CODE (*node) != FUNCTION_DECL)
6769 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6770 name);
6771 *no_add_attrs = true;
6773 /* FIXME: the argument if any is checked for type attributes;
6774 should it be checked for decl ones? */
6776 else
6778 if (TREE_CODE (*node) == FUNCTION_TYPE
6779 || TREE_CODE (*node) == METHOD_TYPE)
6781 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6783 warning (OPT_Wattributes, "%qE attribute ignored",
6784 name);
6785 *no_add_attrs = true;
6788 else if (TREE_CODE (*node) == POINTER_TYPE
6789 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6790 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6791 && arm_isr_value (args) != ARM_FT_UNKNOWN)
6793 *node = build_variant_type_copy (*node);
6794 TREE_TYPE (*node) = build_type_attribute_variant
6795 (TREE_TYPE (*node),
6796 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6797 *no_add_attrs = true;
6799 else
6801 /* Possibly pass this attribute on from the type to a decl. */
6802 if (flags & ((int) ATTR_FLAG_DECL_NEXT
6803 | (int) ATTR_FLAG_FUNCTION_NEXT
6804 | (int) ATTR_FLAG_ARRAY_NEXT))
6806 *no_add_attrs = true;
6807 return tree_cons (name, args, NULL_TREE);
6809 else
6811 warning (OPT_Wattributes, "%qE attribute ignored",
6812 name);
6817 return NULL_TREE;
6820 /* Handle a "pcs" attribute; arguments as in struct
6821 attribute_spec.handler. */
6822 static tree
6823 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6824 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6826 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6828 warning (OPT_Wattributes, "%qE attribute ignored", name);
6829 *no_add_attrs = true;
6831 return NULL_TREE;
6834 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6835 /* Handle the "notshared" attribute. This attribute is another way of
6836 requesting hidden visibility. ARM's compiler supports
6837 "__declspec(notshared)"; we support the same thing via an
6838 attribute. */
6840 static tree
6841 arm_handle_notshared_attribute (tree *node,
6842 tree name ATTRIBUTE_UNUSED,
6843 tree args ATTRIBUTE_UNUSED,
6844 int flags ATTRIBUTE_UNUSED,
6845 bool *no_add_attrs)
6847 tree decl = TYPE_NAME (*node);
6849 if (decl)
6851 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6852 DECL_VISIBILITY_SPECIFIED (decl) = 1;
6853 *no_add_attrs = false;
6855 return NULL_TREE;
6857 #endif
6859 /* This function returns true if a function with declaration FNDECL and type
6860 FNTYPE uses the stack to pass arguments or return variables and false
6861 otherwise. This is used for functions with the attributes
6862 'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
6863 diagnostic messages if the stack is used. NAME is the name of the attribute
6864 used. */
6866 static bool
6867 cmse_func_args_or_return_in_stack (tree fndecl, tree name, tree fntype)
6869 function_args_iterator args_iter;
6870 CUMULATIVE_ARGS args_so_far_v;
6871 cumulative_args_t args_so_far;
6872 bool first_param = true;
6873 tree arg_type, prev_arg_type = NULL_TREE, ret_type;
6875 /* Error out if any argument is passed on the stack. */
6876 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
6877 args_so_far = pack_cumulative_args (&args_so_far_v);
6878 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
6880 rtx arg_rtx;
6881 machine_mode arg_mode = TYPE_MODE (arg_type);
6883 prev_arg_type = arg_type;
6884 if (VOID_TYPE_P (arg_type))
6885 continue;
6887 if (!first_param)
6888 arm_function_arg_advance (args_so_far, arg_mode, arg_type, true);
6889 arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type, true);
6890 if (!arg_rtx
6891 || arm_arg_partial_bytes (args_so_far, arg_mode, arg_type, true))
6893 error ("%qE attribute not available to functions with arguments "
6894 "passed on the stack", name);
6895 return true;
6897 first_param = false;
6900 /* Error out for variadic functions since we cannot control how many
6901 arguments will be passed and thus stack could be used. stdarg_p () is not
6902 used for the checking to avoid browsing arguments twice. */
6903 if (prev_arg_type != NULL_TREE && !VOID_TYPE_P (prev_arg_type))
6905 error ("%qE attribute not available to functions with variable number "
6906 "of arguments", name);
6907 return true;
6910 /* Error out if return value is passed on the stack. */
6911 ret_type = TREE_TYPE (fntype);
6912 if (arm_return_in_memory (ret_type, fntype))
6914 error ("%qE attribute not available to functions that return value on "
6915 "the stack", name);
6916 return true;
6918 return false;
6921 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
6922 function will check whether the attribute is allowed here and will add the
6923 attribute to the function declaration tree or otherwise issue a warning. */
6925 static tree
6926 arm_handle_cmse_nonsecure_entry (tree *node, tree name,
6927 tree /* args */,
6928 int /* flags */,
6929 bool *no_add_attrs)
6931 tree fndecl;
6933 if (!use_cmse)
6935 *no_add_attrs = true;
6936 warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
6937 name);
6938 return NULL_TREE;
6941 /* Ignore attribute for function types. */
6942 if (TREE_CODE (*node) != FUNCTION_DECL)
6944 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6945 name);
6946 *no_add_attrs = true;
6947 return NULL_TREE;
6950 fndecl = *node;
6952 /* Warn for static linkage functions. */
6953 if (!TREE_PUBLIC (fndecl))
6955 warning (OPT_Wattributes, "%qE attribute has no effect on functions "
6956 "with static linkage", name);
6957 *no_add_attrs = true;
6958 return NULL_TREE;
6961 *no_add_attrs |= cmse_func_args_or_return_in_stack (fndecl, name,
6962 TREE_TYPE (fndecl));
6963 return NULL_TREE;
6967 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
6968 function will check whether the attribute is allowed here and will add the
6969 attribute to the function type tree or otherwise issue a diagnostic. The
6970 reason we check this at declaration time is to only allow the use of the
6971 attribute with declarations of function pointers and not function
6972 declarations. This function checks NODE is of the expected type and issues
6973 diagnostics otherwise using NAME. If it is not of the expected type
6974 *NO_ADD_ATTRS will be set to true. */
6976 static tree
6977 arm_handle_cmse_nonsecure_call (tree *node, tree name,
6978 tree /* args */,
6979 int /* flags */,
6980 bool *no_add_attrs)
6982 tree decl = NULL_TREE, fntype = NULL_TREE;
6983 tree type;
6985 if (!use_cmse)
6987 *no_add_attrs = true;
6988 warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
6989 name);
6990 return NULL_TREE;
6993 if (TREE_CODE (*node) == VAR_DECL || TREE_CODE (*node) == TYPE_DECL)
6995 decl = *node;
6996 fntype = TREE_TYPE (decl);
6999 while (fntype != NULL_TREE && TREE_CODE (fntype) == POINTER_TYPE)
7000 fntype = TREE_TYPE (fntype);
7002 if (!decl || TREE_CODE (fntype) != FUNCTION_TYPE)
7004 warning (OPT_Wattributes, "%qE attribute only applies to base type of a "
7005 "function pointer", name);
7006 *no_add_attrs = true;
7007 return NULL_TREE;
7010 *no_add_attrs |= cmse_func_args_or_return_in_stack (NULL, name, fntype);
7012 if (*no_add_attrs)
7013 return NULL_TREE;
7015 /* Prevent trees being shared among function types with and without
7016 cmse_nonsecure_call attribute. */
7017 type = TREE_TYPE (decl);
7019 type = build_distinct_type_copy (type);
7020 TREE_TYPE (decl) = type;
7021 fntype = type;
7023 while (TREE_CODE (fntype) != FUNCTION_TYPE)
7025 type = fntype;
7026 fntype = TREE_TYPE (fntype);
7027 fntype = build_distinct_type_copy (fntype);
7028 TREE_TYPE (type) = fntype;
7031 /* Construct a type attribute and add it to the function type. */
7032 tree attrs = tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE,
7033 TYPE_ATTRIBUTES (fntype));
7034 TYPE_ATTRIBUTES (fntype) = attrs;
7035 return NULL_TREE;
7038 /* Return 0 if the attributes for two types are incompatible, 1 if they
7039 are compatible, and 2 if they are nearly compatible (which causes a
7040 warning to be generated). */
7041 static int
7042 arm_comp_type_attributes (const_tree type1, const_tree type2)
7044 int l1, l2, s1, s2;
7046 /* Check for mismatch of non-default calling convention. */
7047 if (TREE_CODE (type1) != FUNCTION_TYPE)
7048 return 1;
7050 /* Check for mismatched call attributes. */
7051 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
7052 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
7053 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
7054 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
7056 /* Only bother to check if an attribute is defined. */
7057 if (l1 | l2 | s1 | s2)
7059 /* If one type has an attribute, the other must have the same attribute. */
7060 if ((l1 != l2) || (s1 != s2))
7061 return 0;
7063 /* Disallow mixed attributes. */
7064 if ((l1 & s2) || (l2 & s1))
7065 return 0;
7068 /* Check for mismatched ISR attribute. */
7069 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
7070 if (! l1)
7071 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
7072 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
7073 if (! l2)
7074 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
7075 if (l1 != l2)
7076 return 0;
7078 l1 = lookup_attribute ("cmse_nonsecure_call",
7079 TYPE_ATTRIBUTES (type1)) != NULL;
7080 l2 = lookup_attribute ("cmse_nonsecure_call",
7081 TYPE_ATTRIBUTES (type2)) != NULL;
7083 if (l1 != l2)
7084 return 0;
7086 return 1;
7089 /* Assigns default attributes to newly defined type. This is used to
7090 set short_call/long_call attributes for function types of
7091 functions defined inside corresponding #pragma scopes. */
7092 static void
7093 arm_set_default_type_attributes (tree type)
7095 /* Add __attribute__ ((long_call)) to all functions, when
7096 inside #pragma long_calls or __attribute__ ((short_call)),
7097 when inside #pragma no_long_calls. */
7098 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
7100 tree type_attr_list, attr_name;
7101 type_attr_list = TYPE_ATTRIBUTES (type);
7103 if (arm_pragma_long_calls == LONG)
7104 attr_name = get_identifier ("long_call");
7105 else if (arm_pragma_long_calls == SHORT)
7106 attr_name = get_identifier ("short_call");
7107 else
7108 return;
7110 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
7111 TYPE_ATTRIBUTES (type) = type_attr_list;
7115 /* Return true if DECL is known to be linked into section SECTION. */
7117 static bool
7118 arm_function_in_section_p (tree decl, section *section)
7120 /* We can only be certain about the prevailing symbol definition. */
7121 if (!decl_binds_to_current_def_p (decl))
7122 return false;
7124 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
7125 if (!DECL_SECTION_NAME (decl))
7127 /* Make sure that we will not create a unique section for DECL. */
7128 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
7129 return false;
7132 return function_section (decl) == section;
7135 /* Return nonzero if a 32-bit "long_call" should be generated for
7136 a call from the current function to DECL. We generate a long_call
7137 if the function:
7139 a. has an __attribute__((long call))
7140 or b. is within the scope of a #pragma long_calls
7141 or c. the -mlong-calls command line switch has been specified
7143 However we do not generate a long call if the function:
7145 d. has an __attribute__ ((short_call))
7146 or e. is inside the scope of a #pragma no_long_calls
7147 or f. is defined in the same section as the current function. */
7149 bool
7150 arm_is_long_call_p (tree decl)
7152 tree attrs;
7154 if (!decl)
7155 return TARGET_LONG_CALLS;
7157 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
7158 if (lookup_attribute ("short_call", attrs))
7159 return false;
7161 /* For "f", be conservative, and only cater for cases in which the
7162 whole of the current function is placed in the same section. */
7163 if (!flag_reorder_blocks_and_partition
7164 && TREE_CODE (decl) == FUNCTION_DECL
7165 && arm_function_in_section_p (decl, current_function_section ()))
7166 return false;
7168 if (lookup_attribute ("long_call", attrs))
7169 return true;
7171 return TARGET_LONG_CALLS;
7174 /* Return nonzero if it is ok to make a tail-call to DECL. */
7175 static bool
7176 arm_function_ok_for_sibcall (tree decl, tree exp)
7178 unsigned long func_type;
7180 if (cfun->machine->sibcall_blocked)
7181 return false;
7183 /* Never tailcall something if we are generating code for Thumb-1. */
7184 if (TARGET_THUMB1)
7185 return false;
7187 /* The PIC register is live on entry to VxWorks PLT entries, so we
7188 must make the call before restoring the PIC register. */
7189 if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
7190 return false;
7192 /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7193 may be used both as target of the call and base register for restoring
7194 the VFP registers */
7195 if (TARGET_APCS_FRAME && TARGET_ARM
7196 && TARGET_HARD_FLOAT
7197 && decl && arm_is_long_call_p (decl))
7198 return false;
7200 /* If we are interworking and the function is not declared static
7201 then we can't tail-call it unless we know that it exists in this
7202 compilation unit (since it might be a Thumb routine). */
7203 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
7204 && !TREE_ASM_WRITTEN (decl))
7205 return false;
7207 func_type = arm_current_func_type ();
7208 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
7209 if (IS_INTERRUPT (func_type))
7210 return false;
7212 /* ARMv8-M non-secure entry functions need to return with bxns which is only
7213 generated for entry functions themselves. */
7214 if (IS_CMSE_ENTRY (arm_current_func_type ()))
7215 return false;
7217 /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7218 this would complicate matters for later code generation. */
7219 if (TREE_CODE (exp) == CALL_EXPR)
7221 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7222 if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype)))
7223 return false;
7226 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
7228 /* Check that the return value locations are the same. For
7229 example that we aren't returning a value from the sibling in
7230 a VFP register but then need to transfer it to a core
7231 register. */
7232 rtx a, b;
7233 tree decl_or_type = decl;
7235 /* If it is an indirect function pointer, get the function type. */
7236 if (!decl)
7237 decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7239 a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
7240 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
7241 cfun->decl, false);
7242 if (!rtx_equal_p (a, b))
7243 return false;
7246 /* Never tailcall if function may be called with a misaligned SP. */
7247 if (IS_STACKALIGN (func_type))
7248 return false;
7250 /* The AAPCS says that, on bare-metal, calls to unresolved weak
7251 references should become a NOP. Don't convert such calls into
7252 sibling calls. */
7253 if (TARGET_AAPCS_BASED
7254 && arm_abi == ARM_ABI_AAPCS
7255 && decl
7256 && DECL_WEAK (decl))
7257 return false;
7259 /* We cannot do a tailcall for an indirect call by descriptor if all the
7260 argument registers are used because the only register left to load the
7261 address is IP and it will already contain the static chain. */
7262 if (!decl && CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines)
7264 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7265 CUMULATIVE_ARGS cum;
7266 cumulative_args_t cum_v;
7268 arm_init_cumulative_args (&cum, fntype, NULL_RTX, NULL_TREE);
7269 cum_v = pack_cumulative_args (&cum);
7271 for (tree t = TYPE_ARG_TYPES (fntype); t; t = TREE_CHAIN (t))
7273 tree type = TREE_VALUE (t);
7274 if (!VOID_TYPE_P (type))
7275 arm_function_arg_advance (cum_v, TYPE_MODE (type), type, true);
7278 if (!arm_function_arg (cum_v, SImode, integer_type_node, true))
7279 return false;
7282 /* Everything else is ok. */
7283 return true;
7287 /* Addressing mode support functions. */
7289 /* Return nonzero if X is a legitimate immediate operand when compiling
7290 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
7292 legitimate_pic_operand_p (rtx x)
7294 if (GET_CODE (x) == SYMBOL_REF
7295 || (GET_CODE (x) == CONST
7296 && GET_CODE (XEXP (x, 0)) == PLUS
7297 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
7298 return 0;
7300 return 1;
7303 /* Record that the current function needs a PIC register. Initialize
7304 cfun->machine->pic_reg if we have not already done so. */
7306 static void
7307 require_pic_register (void)
7309 /* A lot of the logic here is made obscure by the fact that this
7310 routine gets called as part of the rtx cost estimation process.
7311 We don't want those calls to affect any assumptions about the real
7312 function; and further, we can't call entry_of_function() until we
7313 start the real expansion process. */
7314 if (!crtl->uses_pic_offset_table)
7316 gcc_assert (can_create_pseudo_p ());
7317 if (arm_pic_register != INVALID_REGNUM
7318 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
7320 if (!cfun->machine->pic_reg)
7321 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
7323 /* Play games to avoid marking the function as needing pic
7324 if we are being called as part of the cost-estimation
7325 process. */
7326 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7327 crtl->uses_pic_offset_table = 1;
7329 else
7331 rtx_insn *seq, *insn;
7333 if (!cfun->machine->pic_reg)
7334 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
7336 /* Play games to avoid marking the function as needing pic
7337 if we are being called as part of the cost-estimation
7338 process. */
7339 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7341 crtl->uses_pic_offset_table = 1;
7342 start_sequence ();
7344 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
7345 && arm_pic_register > LAST_LO_REGNUM)
7346 emit_move_insn (cfun->machine->pic_reg,
7347 gen_rtx_REG (Pmode, arm_pic_register));
7348 else
7349 arm_load_pic_register (0UL);
7351 seq = get_insns ();
7352 end_sequence ();
7354 for (insn = seq; insn; insn = NEXT_INSN (insn))
7355 if (INSN_P (insn))
7356 INSN_LOCATION (insn) = prologue_location;
7358 /* We can be called during expansion of PHI nodes, where
7359 we can't yet emit instructions directly in the final
7360 insn stream. Queue the insns on the entry edge, they will
7361 be committed after everything else is expanded. */
7362 insert_insn_on_edge (seq,
7363 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
7370 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
7372 if (GET_CODE (orig) == SYMBOL_REF
7373 || GET_CODE (orig) == LABEL_REF)
7375 if (reg == 0)
7377 gcc_assert (can_create_pseudo_p ());
7378 reg = gen_reg_rtx (Pmode);
7381 /* VxWorks does not impose a fixed gap between segments; the run-time
7382 gap can be different from the object-file gap. We therefore can't
7383 use GOTOFF unless we are absolutely sure that the symbol is in the
7384 same segment as the GOT. Unfortunately, the flexibility of linker
7385 scripts means that we can't be sure of that in general, so assume
7386 that GOTOFF is never valid on VxWorks. */
7387 /* References to weak symbols cannot be resolved locally: they
7388 may be overridden by a non-weak definition at link time. */
7389 rtx_insn *insn;
7390 if ((GET_CODE (orig) == LABEL_REF
7391 || (GET_CODE (orig) == SYMBOL_REF
7392 && SYMBOL_REF_LOCAL_P (orig)
7393 && (SYMBOL_REF_DECL (orig)
7394 ? !DECL_WEAK (SYMBOL_REF_DECL (orig)) : 1)))
7395 && NEED_GOT_RELOC
7396 && arm_pic_data_is_text_relative)
7397 insn = arm_pic_static_addr (orig, reg);
7398 else
7400 rtx pat;
7401 rtx mem;
7403 /* If this function doesn't have a pic register, create one now. */
7404 require_pic_register ();
7406 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
7408 /* Make the MEM as close to a constant as possible. */
7409 mem = SET_SRC (pat);
7410 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
7411 MEM_READONLY_P (mem) = 1;
7412 MEM_NOTRAP_P (mem) = 1;
7414 insn = emit_insn (pat);
7417 /* Put a REG_EQUAL note on this insn, so that it can be optimized
7418 by loop. */
7419 set_unique_reg_note (insn, REG_EQUAL, orig);
7421 return reg;
7423 else if (GET_CODE (orig) == CONST)
7425 rtx base, offset;
7427 if (GET_CODE (XEXP (orig, 0)) == PLUS
7428 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
7429 return orig;
7431 /* Handle the case where we have: const (UNSPEC_TLS). */
7432 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
7433 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
7434 return orig;
7436 /* Handle the case where we have:
7437 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
7438 CONST_INT. */
7439 if (GET_CODE (XEXP (orig, 0)) == PLUS
7440 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
7441 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
7443 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
7444 return orig;
7447 if (reg == 0)
7449 gcc_assert (can_create_pseudo_p ());
7450 reg = gen_reg_rtx (Pmode);
7453 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
7455 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
7456 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
7457 base == reg ? 0 : reg);
7459 if (CONST_INT_P (offset))
7461 /* The base register doesn't really matter, we only want to
7462 test the index for the appropriate mode. */
7463 if (!arm_legitimate_index_p (mode, offset, SET, 0))
7465 gcc_assert (can_create_pseudo_p ());
7466 offset = force_reg (Pmode, offset);
7469 if (CONST_INT_P (offset))
7470 return plus_constant (Pmode, base, INTVAL (offset));
7473 if (GET_MODE_SIZE (mode) > 4
7474 && (GET_MODE_CLASS (mode) == MODE_INT
7475 || TARGET_SOFT_FLOAT))
7477 emit_insn (gen_addsi3 (reg, base, offset));
7478 return reg;
7481 return gen_rtx_PLUS (Pmode, base, offset);
7484 return orig;
7488 /* Find a spare register to use during the prolog of a function. */
7490 static int
7491 thumb_find_work_register (unsigned long pushed_regs_mask)
7493 int reg;
7495 /* Check the argument registers first as these are call-used. The
7496 register allocation order means that sometimes r3 might be used
7497 but earlier argument registers might not, so check them all. */
7498 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
7499 if (!df_regs_ever_live_p (reg))
7500 return reg;
7502 /* Before going on to check the call-saved registers we can try a couple
7503 more ways of deducing that r3 is available. The first is when we are
7504 pushing anonymous arguments onto the stack and we have less than 4
7505 registers worth of fixed arguments(*). In this case r3 will be part of
7506 the variable argument list and so we can be sure that it will be
7507 pushed right at the start of the function. Hence it will be available
7508 for the rest of the prologue.
7509 (*): ie crtl->args.pretend_args_size is greater than 0. */
7510 if (cfun->machine->uses_anonymous_args
7511 && crtl->args.pretend_args_size > 0)
7512 return LAST_ARG_REGNUM;
7514 /* The other case is when we have fixed arguments but less than 4 registers
7515 worth. In this case r3 might be used in the body of the function, but
7516 it is not being used to convey an argument into the function. In theory
7517 we could just check crtl->args.size to see how many bytes are
7518 being passed in argument registers, but it seems that it is unreliable.
7519 Sometimes it will have the value 0 when in fact arguments are being
7520 passed. (See testcase execute/20021111-1.c for an example). So we also
7521 check the args_info.nregs field as well. The problem with this field is
7522 that it makes no allowances for arguments that are passed to the
7523 function but which are not used. Hence we could miss an opportunity
7524 when a function has an unused argument in r3. But it is better to be
7525 safe than to be sorry. */
7526 if (! cfun->machine->uses_anonymous_args
7527 && crtl->args.size >= 0
7528 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
7529 && (TARGET_AAPCS_BASED
7530 ? crtl->args.info.aapcs_ncrn < 4
7531 : crtl->args.info.nregs < 4))
7532 return LAST_ARG_REGNUM;
7534 /* Otherwise look for a call-saved register that is going to be pushed. */
7535 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
7536 if (pushed_regs_mask & (1 << reg))
7537 return reg;
7539 if (TARGET_THUMB2)
7541 /* Thumb-2 can use high regs. */
7542 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
7543 if (pushed_regs_mask & (1 << reg))
7544 return reg;
7546 /* Something went wrong - thumb_compute_save_reg_mask()
7547 should have arranged for a suitable register to be pushed. */
7548 gcc_unreachable ();
7551 static GTY(()) int pic_labelno;
7553 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
7554 low register. */
7556 void
7557 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
7559 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
7561 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
7562 return;
7564 gcc_assert (flag_pic);
7566 pic_reg = cfun->machine->pic_reg;
7567 if (TARGET_VXWORKS_RTP)
7569 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
7570 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7571 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
7573 emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
7575 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7576 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
7578 else
7580 /* We use an UNSPEC rather than a LABEL_REF because this label
7581 never appears in the code stream. */
7583 labelno = GEN_INT (pic_labelno++);
7584 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7585 l1 = gen_rtx_CONST (VOIDmode, l1);
7587 /* On the ARM the PC register contains 'dot + 8' at the time of the
7588 addition, on the Thumb it is 'dot + 4'. */
7589 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7590 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
7591 UNSPEC_GOTSYM_OFF);
7592 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7594 if (TARGET_32BIT)
7596 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7598 else /* TARGET_THUMB1 */
7600 if (arm_pic_register != INVALID_REGNUM
7601 && REGNO (pic_reg) > LAST_LO_REGNUM)
7603 /* We will have pushed the pic register, so we should always be
7604 able to find a work register. */
7605 pic_tmp = gen_rtx_REG (SImode,
7606 thumb_find_work_register (saved_regs));
7607 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
7608 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
7609 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
7611 else if (arm_pic_register != INVALID_REGNUM
7612 && arm_pic_register > LAST_LO_REGNUM
7613 && REGNO (pic_reg) <= LAST_LO_REGNUM)
7615 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7616 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
7617 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
7619 else
7620 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7624 /* Need to emit this whether or not we obey regdecls,
7625 since setjmp/longjmp can cause life info to screw up. */
7626 emit_use (pic_reg);
7629 /* Generate code to load the address of a static var when flag_pic is set. */
7630 static rtx_insn *
7631 arm_pic_static_addr (rtx orig, rtx reg)
7633 rtx l1, labelno, offset_rtx;
7635 gcc_assert (flag_pic);
7637 /* We use an UNSPEC rather than a LABEL_REF because this label
7638 never appears in the code stream. */
7639 labelno = GEN_INT (pic_labelno++);
7640 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7641 l1 = gen_rtx_CONST (VOIDmode, l1);
7643 /* On the ARM the PC register contains 'dot + 8' at the time of the
7644 addition, on the Thumb it is 'dot + 4'. */
7645 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7646 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
7647 UNSPEC_SYMBOL_OFFSET);
7648 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
7650 return emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
7653 /* Return nonzero if X is valid as an ARM state addressing register. */
7654 static int
7655 arm_address_register_rtx_p (rtx x, int strict_p)
7657 int regno;
7659 if (!REG_P (x))
7660 return 0;
7662 regno = REGNO (x);
7664 if (strict_p)
7665 return ARM_REGNO_OK_FOR_BASE_P (regno);
7667 return (regno <= LAST_ARM_REGNUM
7668 || regno >= FIRST_PSEUDO_REGISTER
7669 || regno == FRAME_POINTER_REGNUM
7670 || regno == ARG_POINTER_REGNUM);
7673 /* Return TRUE if this rtx is the difference of a symbol and a label,
7674 and will reduce to a PC-relative relocation in the object file.
7675 Expressions like this can be left alone when generating PIC, rather
7676 than forced through the GOT. */
7677 static int
7678 pcrel_constant_p (rtx x)
7680 if (GET_CODE (x) == MINUS)
7681 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
7683 return FALSE;
7686 /* Return true if X will surely end up in an index register after next
7687 splitting pass. */
7688 static bool
7689 will_be_in_index_register (const_rtx x)
7691 /* arm.md: calculate_pic_address will split this into a register. */
7692 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
7695 /* Return nonzero if X is a valid ARM state address operand. */
7697 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
7698 int strict_p)
7700 bool use_ldrd;
7701 enum rtx_code code = GET_CODE (x);
7703 if (arm_address_register_rtx_p (x, strict_p))
7704 return 1;
7706 use_ldrd = (TARGET_LDRD
7707 && (mode == DImode || mode == DFmode));
7709 if (code == POST_INC || code == PRE_DEC
7710 || ((code == PRE_INC || code == POST_DEC)
7711 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7712 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7714 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7715 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7716 && GET_CODE (XEXP (x, 1)) == PLUS
7717 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7719 rtx addend = XEXP (XEXP (x, 1), 1);
7721 /* Don't allow ldrd post increment by register because it's hard
7722 to fixup invalid register choices. */
7723 if (use_ldrd
7724 && GET_CODE (x) == POST_MODIFY
7725 && REG_P (addend))
7726 return 0;
7728 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
7729 && arm_legitimate_index_p (mode, addend, outer, strict_p));
7732 /* After reload constants split into minipools will have addresses
7733 from a LABEL_REF. */
7734 else if (reload_completed
7735 && (code == LABEL_REF
7736 || (code == CONST
7737 && GET_CODE (XEXP (x, 0)) == PLUS
7738 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7739 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7740 return 1;
7742 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7743 return 0;
7745 else if (code == PLUS)
7747 rtx xop0 = XEXP (x, 0);
7748 rtx xop1 = XEXP (x, 1);
7750 return ((arm_address_register_rtx_p (xop0, strict_p)
7751 && ((CONST_INT_P (xop1)
7752 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
7753 || (!strict_p && will_be_in_index_register (xop1))))
7754 || (arm_address_register_rtx_p (xop1, strict_p)
7755 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
7758 #if 0
7759 /* Reload currently can't handle MINUS, so disable this for now */
7760 else if (GET_CODE (x) == MINUS)
7762 rtx xop0 = XEXP (x, 0);
7763 rtx xop1 = XEXP (x, 1);
7765 return (arm_address_register_rtx_p (xop0, strict_p)
7766 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
7768 #endif
7770 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7771 && code == SYMBOL_REF
7772 && CONSTANT_POOL_ADDRESS_P (x)
7773 && ! (flag_pic
7774 && symbol_mentioned_p (get_pool_constant (x))
7775 && ! pcrel_constant_p (get_pool_constant (x))))
7776 return 1;
7778 return 0;
7781 /* Return true if we can avoid creating a constant pool entry for x. */
7782 static bool
7783 can_avoid_literal_pool_for_label_p (rtx x)
7785 /* Normally we can assign constant values to target registers without
7786 the help of constant pool. But there are cases we have to use constant
7787 pool like:
7788 1) assign a label to register.
7789 2) sign-extend a 8bit value to 32bit and then assign to register.
7791 Constant pool access in format:
7792 (set (reg r0) (mem (symbol_ref (".LC0"))))
7793 will cause the use of literal pool (later in function arm_reorg).
7794 So here we mark such format as an invalid format, then the compiler
7795 will adjust it into:
7796 (set (reg r0) (symbol_ref (".LC0")))
7797 (set (reg r0) (mem (reg r0))).
7798 No extra register is required, and (mem (reg r0)) won't cause the use
7799 of literal pools. */
7800 if (arm_disable_literal_pool && GET_CODE (x) == SYMBOL_REF
7801 && CONSTANT_POOL_ADDRESS_P (x))
7802 return 1;
7803 return 0;
7807 /* Return nonzero if X is a valid Thumb-2 address operand. */
7808 static int
7809 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7811 bool use_ldrd;
7812 enum rtx_code code = GET_CODE (x);
7814 if (arm_address_register_rtx_p (x, strict_p))
7815 return 1;
7817 use_ldrd = (TARGET_LDRD
7818 && (mode == DImode || mode == DFmode));
7820 if (code == POST_INC || code == PRE_DEC
7821 || ((code == PRE_INC || code == POST_DEC)
7822 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7823 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7825 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7826 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7827 && GET_CODE (XEXP (x, 1)) == PLUS
7828 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7830 /* Thumb-2 only has autoincrement by constant. */
7831 rtx addend = XEXP (XEXP (x, 1), 1);
7832 HOST_WIDE_INT offset;
7834 if (!CONST_INT_P (addend))
7835 return 0;
7837 offset = INTVAL(addend);
7838 if (GET_MODE_SIZE (mode) <= 4)
7839 return (offset > -256 && offset < 256);
7841 return (use_ldrd && offset > -1024 && offset < 1024
7842 && (offset & 3) == 0);
7845 /* After reload constants split into minipools will have addresses
7846 from a LABEL_REF. */
7847 else if (reload_completed
7848 && (code == LABEL_REF
7849 || (code == CONST
7850 && GET_CODE (XEXP (x, 0)) == PLUS
7851 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7852 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7853 return 1;
7855 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7856 return 0;
7858 else if (code == PLUS)
7860 rtx xop0 = XEXP (x, 0);
7861 rtx xop1 = XEXP (x, 1);
7863 return ((arm_address_register_rtx_p (xop0, strict_p)
7864 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
7865 || (!strict_p && will_be_in_index_register (xop1))))
7866 || (arm_address_register_rtx_p (xop1, strict_p)
7867 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
7870 else if (can_avoid_literal_pool_for_label_p (x))
7871 return 0;
7873 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7874 && code == SYMBOL_REF
7875 && CONSTANT_POOL_ADDRESS_P (x)
7876 && ! (flag_pic
7877 && symbol_mentioned_p (get_pool_constant (x))
7878 && ! pcrel_constant_p (get_pool_constant (x))))
7879 return 1;
7881 return 0;
7884 /* Return nonzero if INDEX is valid for an address index operand in
7885 ARM state. */
7886 static int
7887 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
7888 int strict_p)
7890 HOST_WIDE_INT range;
7891 enum rtx_code code = GET_CODE (index);
7893 /* Standard coprocessor addressing modes. */
7894 if (TARGET_HARD_FLOAT
7895 && (mode == SFmode || mode == DFmode))
7896 return (code == CONST_INT && INTVAL (index) < 1024
7897 && INTVAL (index) > -1024
7898 && (INTVAL (index) & 3) == 0);
7900 /* For quad modes, we restrict the constant offset to be slightly less
7901 than what the instruction format permits. We do this because for
7902 quad mode moves, we will actually decompose them into two separate
7903 double-mode reads or writes. INDEX must therefore be a valid
7904 (double-mode) offset and so should INDEX+8. */
7905 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7906 return (code == CONST_INT
7907 && INTVAL (index) < 1016
7908 && INTVAL (index) > -1024
7909 && (INTVAL (index) & 3) == 0);
7911 /* We have no such constraint on double mode offsets, so we permit the
7912 full range of the instruction format. */
7913 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7914 return (code == CONST_INT
7915 && INTVAL (index) < 1024
7916 && INTVAL (index) > -1024
7917 && (INTVAL (index) & 3) == 0);
7919 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7920 return (code == CONST_INT
7921 && INTVAL (index) < 1024
7922 && INTVAL (index) > -1024
7923 && (INTVAL (index) & 3) == 0);
7925 if (arm_address_register_rtx_p (index, strict_p)
7926 && (GET_MODE_SIZE (mode) <= 4))
7927 return 1;
7929 if (mode == DImode || mode == DFmode)
7931 if (code == CONST_INT)
7933 HOST_WIDE_INT val = INTVAL (index);
7935 if (TARGET_LDRD)
7936 return val > -256 && val < 256;
7937 else
7938 return val > -4096 && val < 4092;
7941 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
7944 if (GET_MODE_SIZE (mode) <= 4
7945 && ! (arm_arch4
7946 && (mode == HImode
7947 || mode == HFmode
7948 || (mode == QImode && outer == SIGN_EXTEND))))
7950 if (code == MULT)
7952 rtx xiop0 = XEXP (index, 0);
7953 rtx xiop1 = XEXP (index, 1);
7955 return ((arm_address_register_rtx_p (xiop0, strict_p)
7956 && power_of_two_operand (xiop1, SImode))
7957 || (arm_address_register_rtx_p (xiop1, strict_p)
7958 && power_of_two_operand (xiop0, SImode)));
7960 else if (code == LSHIFTRT || code == ASHIFTRT
7961 || code == ASHIFT || code == ROTATERT)
7963 rtx op = XEXP (index, 1);
7965 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7966 && CONST_INT_P (op)
7967 && INTVAL (op) > 0
7968 && INTVAL (op) <= 31);
7972 /* For ARM v4 we may be doing a sign-extend operation during the
7973 load. */
7974 if (arm_arch4)
7976 if (mode == HImode
7977 || mode == HFmode
7978 || (outer == SIGN_EXTEND && mode == QImode))
7979 range = 256;
7980 else
7981 range = 4096;
7983 else
7984 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
7986 return (code == CONST_INT
7987 && INTVAL (index) < range
7988 && INTVAL (index) > -range);
7991 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7992 index operand. i.e. 1, 2, 4 or 8. */
7993 static bool
7994 thumb2_index_mul_operand (rtx op)
7996 HOST_WIDE_INT val;
7998 if (!CONST_INT_P (op))
7999 return false;
8001 val = INTVAL(op);
8002 return (val == 1 || val == 2 || val == 4 || val == 8);
8005 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
8006 static int
8007 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
8009 enum rtx_code code = GET_CODE (index);
8011 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
8012 /* Standard coprocessor addressing modes. */
8013 if (TARGET_HARD_FLOAT
8014 && (mode == SFmode || mode == DFmode))
8015 return (code == CONST_INT && INTVAL (index) < 1024
8016 /* Thumb-2 allows only > -256 index range for it's core register
8017 load/stores. Since we allow SF/DF in core registers, we have
8018 to use the intersection between -256~4096 (core) and -1024~1024
8019 (coprocessor). */
8020 && INTVAL (index) > -256
8021 && (INTVAL (index) & 3) == 0);
8023 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8025 /* For DImode assume values will usually live in core regs
8026 and only allow LDRD addressing modes. */
8027 if (!TARGET_LDRD || mode != DImode)
8028 return (code == CONST_INT
8029 && INTVAL (index) < 1024
8030 && INTVAL (index) > -1024
8031 && (INTVAL (index) & 3) == 0);
8034 /* For quad modes, we restrict the constant offset to be slightly less
8035 than what the instruction format permits. We do this because for
8036 quad mode moves, we will actually decompose them into two separate
8037 double-mode reads or writes. INDEX must therefore be a valid
8038 (double-mode) offset and so should INDEX+8. */
8039 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8040 return (code == CONST_INT
8041 && INTVAL (index) < 1016
8042 && INTVAL (index) > -1024
8043 && (INTVAL (index) & 3) == 0);
8045 /* We have no such constraint on double mode offsets, so we permit the
8046 full range of the instruction format. */
8047 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8048 return (code == CONST_INT
8049 && INTVAL (index) < 1024
8050 && INTVAL (index) > -1024
8051 && (INTVAL (index) & 3) == 0);
8053 if (arm_address_register_rtx_p (index, strict_p)
8054 && (GET_MODE_SIZE (mode) <= 4))
8055 return 1;
8057 if (mode == DImode || mode == DFmode)
8059 if (code == CONST_INT)
8061 HOST_WIDE_INT val = INTVAL (index);
8062 /* ??? Can we assume ldrd for thumb2? */
8063 /* Thumb-2 ldrd only has reg+const addressing modes. */
8064 /* ldrd supports offsets of +-1020.
8065 However the ldr fallback does not. */
8066 return val > -256 && val < 256 && (val & 3) == 0;
8068 else
8069 return 0;
8072 if (code == MULT)
8074 rtx xiop0 = XEXP (index, 0);
8075 rtx xiop1 = XEXP (index, 1);
8077 return ((arm_address_register_rtx_p (xiop0, strict_p)
8078 && thumb2_index_mul_operand (xiop1))
8079 || (arm_address_register_rtx_p (xiop1, strict_p)
8080 && thumb2_index_mul_operand (xiop0)));
8082 else if (code == ASHIFT)
8084 rtx op = XEXP (index, 1);
8086 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8087 && CONST_INT_P (op)
8088 && INTVAL (op) > 0
8089 && INTVAL (op) <= 3);
8092 return (code == CONST_INT
8093 && INTVAL (index) < 4096
8094 && INTVAL (index) > -256);
8097 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
8098 static int
8099 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
8101 int regno;
8103 if (!REG_P (x))
8104 return 0;
8106 regno = REGNO (x);
8108 if (strict_p)
8109 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
8111 return (regno <= LAST_LO_REGNUM
8112 || regno > LAST_VIRTUAL_REGISTER
8113 || regno == FRAME_POINTER_REGNUM
8114 || (GET_MODE_SIZE (mode) >= 4
8115 && (regno == STACK_POINTER_REGNUM
8116 || regno >= FIRST_PSEUDO_REGISTER
8117 || x == hard_frame_pointer_rtx
8118 || x == arg_pointer_rtx)));
8121 /* Return nonzero if x is a legitimate index register. This is the case
8122 for any base register that can access a QImode object. */
8123 inline static int
8124 thumb1_index_register_rtx_p (rtx x, int strict_p)
8126 return thumb1_base_register_rtx_p (x, QImode, strict_p);
8129 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8131 The AP may be eliminated to either the SP or the FP, so we use the
8132 least common denominator, e.g. SImode, and offsets from 0 to 64.
8134 ??? Verify whether the above is the right approach.
8136 ??? Also, the FP may be eliminated to the SP, so perhaps that
8137 needs special handling also.
8139 ??? Look at how the mips16 port solves this problem. It probably uses
8140 better ways to solve some of these problems.
8142 Although it is not incorrect, we don't accept QImode and HImode
8143 addresses based on the frame pointer or arg pointer until the
8144 reload pass starts. This is so that eliminating such addresses
8145 into stack based ones won't produce impossible code. */
8147 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8149 if (TARGET_HAVE_MOVT && can_avoid_literal_pool_for_label_p (x))
8150 return 0;
8152 /* ??? Not clear if this is right. Experiment. */
8153 if (GET_MODE_SIZE (mode) < 4
8154 && !(reload_in_progress || reload_completed)
8155 && (reg_mentioned_p (frame_pointer_rtx, x)
8156 || reg_mentioned_p (arg_pointer_rtx, x)
8157 || reg_mentioned_p (virtual_incoming_args_rtx, x)
8158 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
8159 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
8160 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
8161 return 0;
8163 /* Accept any base register. SP only in SImode or larger. */
8164 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
8165 return 1;
8167 /* This is PC relative data before arm_reorg runs. */
8168 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
8169 && GET_CODE (x) == SYMBOL_REF
8170 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
8171 return 1;
8173 /* This is PC relative data after arm_reorg runs. */
8174 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
8175 && reload_completed
8176 && (GET_CODE (x) == LABEL_REF
8177 || (GET_CODE (x) == CONST
8178 && GET_CODE (XEXP (x, 0)) == PLUS
8179 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8180 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8181 return 1;
8183 /* Post-inc indexing only supported for SImode and larger. */
8184 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
8185 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
8186 return 1;
8188 else if (GET_CODE (x) == PLUS)
8190 /* REG+REG address can be any two index registers. */
8191 /* We disallow FRAME+REG addressing since we know that FRAME
8192 will be replaced with STACK, and SP relative addressing only
8193 permits SP+OFFSET. */
8194 if (GET_MODE_SIZE (mode) <= 4
8195 && XEXP (x, 0) != frame_pointer_rtx
8196 && XEXP (x, 1) != frame_pointer_rtx
8197 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8198 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
8199 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
8200 return 1;
8202 /* REG+const has 5-7 bit offset for non-SP registers. */
8203 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8204 || XEXP (x, 0) == arg_pointer_rtx)
8205 && CONST_INT_P (XEXP (x, 1))
8206 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
8207 return 1;
8209 /* REG+const has 10-bit offset for SP, but only SImode and
8210 larger is supported. */
8211 /* ??? Should probably check for DI/DFmode overflow here
8212 just like GO_IF_LEGITIMATE_OFFSET does. */
8213 else if (REG_P (XEXP (x, 0))
8214 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
8215 && GET_MODE_SIZE (mode) >= 4
8216 && CONST_INT_P (XEXP (x, 1))
8217 && INTVAL (XEXP (x, 1)) >= 0
8218 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
8219 && (INTVAL (XEXP (x, 1)) & 3) == 0)
8220 return 1;
8222 else if (REG_P (XEXP (x, 0))
8223 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
8224 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
8225 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
8226 && REGNO (XEXP (x, 0))
8227 <= LAST_VIRTUAL_POINTER_REGISTER))
8228 && GET_MODE_SIZE (mode) >= 4
8229 && CONST_INT_P (XEXP (x, 1))
8230 && (INTVAL (XEXP (x, 1)) & 3) == 0)
8231 return 1;
8234 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8235 && GET_MODE_SIZE (mode) == 4
8236 && GET_CODE (x) == SYMBOL_REF
8237 && CONSTANT_POOL_ADDRESS_P (x)
8238 && ! (flag_pic
8239 && symbol_mentioned_p (get_pool_constant (x))
8240 && ! pcrel_constant_p (get_pool_constant (x))))
8241 return 1;
8243 return 0;
8246 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
8247 instruction of mode MODE. */
8249 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
8251 switch (GET_MODE_SIZE (mode))
8253 case 1:
8254 return val >= 0 && val < 32;
8256 case 2:
8257 return val >= 0 && val < 64 && (val & 1) == 0;
8259 default:
8260 return (val >= 0
8261 && (val + GET_MODE_SIZE (mode)) <= 128
8262 && (val & 3) == 0);
8266 bool
8267 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
8269 if (TARGET_ARM)
8270 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
8271 else if (TARGET_THUMB2)
8272 return thumb2_legitimate_address_p (mode, x, strict_p);
8273 else /* if (TARGET_THUMB1) */
8274 return thumb1_legitimate_address_p (mode, x, strict_p);
8277 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
8279 Given an rtx X being reloaded into a reg required to be
8280 in class CLASS, return the class of reg to actually use.
8281 In general this is just CLASS, but for the Thumb core registers and
8282 immediate constants we prefer a LO_REGS class or a subset. */
8284 static reg_class_t
8285 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
8287 if (TARGET_32BIT)
8288 return rclass;
8289 else
8291 if (rclass == GENERAL_REGS)
8292 return LO_REGS;
8293 else
8294 return rclass;
8298 /* Build the SYMBOL_REF for __tls_get_addr. */
8300 static GTY(()) rtx tls_get_addr_libfunc;
8302 static rtx
8303 get_tls_get_addr (void)
8305 if (!tls_get_addr_libfunc)
8306 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
8307 return tls_get_addr_libfunc;
8311 arm_load_tp (rtx target)
8313 if (!target)
8314 target = gen_reg_rtx (SImode);
8316 if (TARGET_HARD_TP)
8318 /* Can return in any reg. */
8319 emit_insn (gen_load_tp_hard (target));
8321 else
8323 /* Always returned in r0. Immediately copy the result into a pseudo,
8324 otherwise other uses of r0 (e.g. setting up function arguments) may
8325 clobber the value. */
8327 rtx tmp;
8329 emit_insn (gen_load_tp_soft ());
8331 tmp = gen_rtx_REG (SImode, R0_REGNUM);
8332 emit_move_insn (target, tmp);
8334 return target;
8337 static rtx
8338 load_tls_operand (rtx x, rtx reg)
8340 rtx tmp;
8342 if (reg == NULL_RTX)
8343 reg = gen_reg_rtx (SImode);
8345 tmp = gen_rtx_CONST (SImode, x);
8347 emit_move_insn (reg, tmp);
8349 return reg;
8352 static rtx_insn *
8353 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
8355 rtx label, labelno, sum;
8357 gcc_assert (reloc != TLS_DESCSEQ);
8358 start_sequence ();
8360 labelno = GEN_INT (pic_labelno++);
8361 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8362 label = gen_rtx_CONST (VOIDmode, label);
8364 sum = gen_rtx_UNSPEC (Pmode,
8365 gen_rtvec (4, x, GEN_INT (reloc), label,
8366 GEN_INT (TARGET_ARM ? 8 : 4)),
8367 UNSPEC_TLS);
8368 reg = load_tls_operand (sum, reg);
8370 if (TARGET_ARM)
8371 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
8372 else
8373 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8375 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
8376 LCT_PURE, /* LCT_CONST? */
8377 Pmode, reg, Pmode);
8379 rtx_insn *insns = get_insns ();
8380 end_sequence ();
8382 return insns;
8385 static rtx
8386 arm_tls_descseq_addr (rtx x, rtx reg)
8388 rtx labelno = GEN_INT (pic_labelno++);
8389 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8390 rtx sum = gen_rtx_UNSPEC (Pmode,
8391 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
8392 gen_rtx_CONST (VOIDmode, label),
8393 GEN_INT (!TARGET_ARM)),
8394 UNSPEC_TLS);
8395 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
8397 emit_insn (gen_tlscall (x, labelno));
8398 if (!reg)
8399 reg = gen_reg_rtx (SImode);
8400 else
8401 gcc_assert (REGNO (reg) != R0_REGNUM);
8403 emit_move_insn (reg, reg0);
8405 return reg;
8409 legitimize_tls_address (rtx x, rtx reg)
8411 rtx dest, tp, label, labelno, sum, ret, eqv, addend;
8412 rtx_insn *insns;
8413 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
8415 switch (model)
8417 case TLS_MODEL_GLOBAL_DYNAMIC:
8418 if (TARGET_GNU2_TLS)
8420 reg = arm_tls_descseq_addr (x, reg);
8422 tp = arm_load_tp (NULL_RTX);
8424 dest = gen_rtx_PLUS (Pmode, tp, reg);
8426 else
8428 /* Original scheme */
8429 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
8430 dest = gen_reg_rtx (Pmode);
8431 emit_libcall_block (insns, dest, ret, x);
8433 return dest;
8435 case TLS_MODEL_LOCAL_DYNAMIC:
8436 if (TARGET_GNU2_TLS)
8438 reg = arm_tls_descseq_addr (x, reg);
8440 tp = arm_load_tp (NULL_RTX);
8442 dest = gen_rtx_PLUS (Pmode, tp, reg);
8444 else
8446 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
8448 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
8449 share the LDM result with other LD model accesses. */
8450 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
8451 UNSPEC_TLS);
8452 dest = gen_reg_rtx (Pmode);
8453 emit_libcall_block (insns, dest, ret, eqv);
8455 /* Load the addend. */
8456 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
8457 GEN_INT (TLS_LDO32)),
8458 UNSPEC_TLS);
8459 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
8460 dest = gen_rtx_PLUS (Pmode, dest, addend);
8462 return dest;
8464 case TLS_MODEL_INITIAL_EXEC:
8465 labelno = GEN_INT (pic_labelno++);
8466 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8467 label = gen_rtx_CONST (VOIDmode, label);
8468 sum = gen_rtx_UNSPEC (Pmode,
8469 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
8470 GEN_INT (TARGET_ARM ? 8 : 4)),
8471 UNSPEC_TLS);
8472 reg = load_tls_operand (sum, reg);
8474 if (TARGET_ARM)
8475 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
8476 else if (TARGET_THUMB2)
8477 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
8478 else
8480 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8481 emit_move_insn (reg, gen_const_mem (SImode, reg));
8484 tp = arm_load_tp (NULL_RTX);
8486 return gen_rtx_PLUS (Pmode, tp, reg);
8488 case TLS_MODEL_LOCAL_EXEC:
8489 tp = arm_load_tp (NULL_RTX);
8491 reg = gen_rtx_UNSPEC (Pmode,
8492 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
8493 UNSPEC_TLS);
8494 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
8496 return gen_rtx_PLUS (Pmode, tp, reg);
8498 default:
8499 abort ();
8503 /* Try machine-dependent ways of modifying an illegitimate address
8504 to be legitimate. If we find one, return the new, valid address. */
8506 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8508 if (arm_tls_referenced_p (x))
8510 rtx addend = NULL;
8512 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
8514 addend = XEXP (XEXP (x, 0), 1);
8515 x = XEXP (XEXP (x, 0), 0);
8518 if (GET_CODE (x) != SYMBOL_REF)
8519 return x;
8521 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
8523 x = legitimize_tls_address (x, NULL_RTX);
8525 if (addend)
8527 x = gen_rtx_PLUS (SImode, x, addend);
8528 orig_x = x;
8530 else
8531 return x;
8534 if (!TARGET_ARM)
8536 /* TODO: legitimize_address for Thumb2. */
8537 if (TARGET_THUMB2)
8538 return x;
8539 return thumb_legitimize_address (x, orig_x, mode);
8542 if (GET_CODE (x) == PLUS)
8544 rtx xop0 = XEXP (x, 0);
8545 rtx xop1 = XEXP (x, 1);
8547 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
8548 xop0 = force_reg (SImode, xop0);
8550 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
8551 && !symbol_mentioned_p (xop1))
8552 xop1 = force_reg (SImode, xop1);
8554 if (ARM_BASE_REGISTER_RTX_P (xop0)
8555 && CONST_INT_P (xop1))
8557 HOST_WIDE_INT n, low_n;
8558 rtx base_reg, val;
8559 n = INTVAL (xop1);
8561 /* VFP addressing modes actually allow greater offsets, but for
8562 now we just stick with the lowest common denominator. */
8563 if (mode == DImode || mode == DFmode)
8565 low_n = n & 0x0f;
8566 n &= ~0x0f;
8567 if (low_n > 4)
8569 n += 16;
8570 low_n -= 16;
8573 else
8575 low_n = ((mode) == TImode ? 0
8576 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
8577 n -= low_n;
8580 base_reg = gen_reg_rtx (SImode);
8581 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
8582 emit_move_insn (base_reg, val);
8583 x = plus_constant (Pmode, base_reg, low_n);
8585 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8586 x = gen_rtx_PLUS (SImode, xop0, xop1);
8589 /* XXX We don't allow MINUS any more -- see comment in
8590 arm_legitimate_address_outer_p (). */
8591 else if (GET_CODE (x) == MINUS)
8593 rtx xop0 = XEXP (x, 0);
8594 rtx xop1 = XEXP (x, 1);
8596 if (CONSTANT_P (xop0))
8597 xop0 = force_reg (SImode, xop0);
8599 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
8600 xop1 = force_reg (SImode, xop1);
8602 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8603 x = gen_rtx_MINUS (SImode, xop0, xop1);
8606 /* Make sure to take full advantage of the pre-indexed addressing mode
8607 with absolute addresses which often allows for the base register to
8608 be factorized for multiple adjacent memory references, and it might
8609 even allows for the mini pool to be avoided entirely. */
8610 else if (CONST_INT_P (x) && optimize > 0)
8612 unsigned int bits;
8613 HOST_WIDE_INT mask, base, index;
8614 rtx base_reg;
8616 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
8617 use a 8-bit index. So let's use a 12-bit index for SImode only and
8618 hope that arm_gen_constant will enable ldrb to use more bits. */
8619 bits = (mode == SImode) ? 12 : 8;
8620 mask = (1 << bits) - 1;
8621 base = INTVAL (x) & ~mask;
8622 index = INTVAL (x) & mask;
8623 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
8625 /* It'll most probably be more efficient to generate the base
8626 with more bits set and use a negative index instead. */
8627 base |= mask;
8628 index -= mask;
8630 base_reg = force_reg (SImode, GEN_INT (base));
8631 x = plus_constant (Pmode, base_reg, index);
8634 if (flag_pic)
8636 /* We need to find and carefully transform any SYMBOL and LABEL
8637 references; so go back to the original address expression. */
8638 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8640 if (new_x != orig_x)
8641 x = new_x;
8644 return x;
8648 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8649 to be legitimate. If we find one, return the new, valid address. */
8651 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8653 if (GET_CODE (x) == PLUS
8654 && CONST_INT_P (XEXP (x, 1))
8655 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
8656 || INTVAL (XEXP (x, 1)) < 0))
8658 rtx xop0 = XEXP (x, 0);
8659 rtx xop1 = XEXP (x, 1);
8660 HOST_WIDE_INT offset = INTVAL (xop1);
8662 /* Try and fold the offset into a biasing of the base register and
8663 then offsetting that. Don't do this when optimizing for space
8664 since it can cause too many CSEs. */
8665 if (optimize_size && offset >= 0
8666 && offset < 256 + 31 * GET_MODE_SIZE (mode))
8668 HOST_WIDE_INT delta;
8670 if (offset >= 256)
8671 delta = offset - (256 - GET_MODE_SIZE (mode));
8672 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
8673 delta = 31 * GET_MODE_SIZE (mode);
8674 else
8675 delta = offset & (~31 * GET_MODE_SIZE (mode));
8677 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
8678 NULL_RTX);
8679 x = plus_constant (Pmode, xop0, delta);
8681 else if (offset < 0 && offset > -256)
8682 /* Small negative offsets are best done with a subtract before the
8683 dereference, forcing these into a register normally takes two
8684 instructions. */
8685 x = force_operand (x, NULL_RTX);
8686 else
8688 /* For the remaining cases, force the constant into a register. */
8689 xop1 = force_reg (SImode, xop1);
8690 x = gen_rtx_PLUS (SImode, xop0, xop1);
8693 else if (GET_CODE (x) == PLUS
8694 && s_register_operand (XEXP (x, 1), SImode)
8695 && !s_register_operand (XEXP (x, 0), SImode))
8697 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
8699 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
8702 if (flag_pic)
8704 /* We need to find and carefully transform any SYMBOL and LABEL
8705 references; so go back to the original address expression. */
8706 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8708 if (new_x != orig_x)
8709 x = new_x;
8712 return x;
8715 /* Return TRUE if X contains any TLS symbol references. */
8717 bool
8718 arm_tls_referenced_p (rtx x)
8720 if (! TARGET_HAVE_TLS)
8721 return false;
8723 subrtx_iterator::array_type array;
8724 FOR_EACH_SUBRTX (iter, array, x, ALL)
8726 const_rtx x = *iter;
8727 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
8729 /* ARM currently does not provide relocations to encode TLS variables
8730 into AArch32 instructions, only data, so there is no way to
8731 currently implement these if a literal pool is disabled. */
8732 if (arm_disable_literal_pool)
8733 sorry ("accessing thread-local storage is not currently supported "
8734 "with -mpure-code or -mslow-flash-data");
8736 return true;
8739 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8740 TLS offsets, not real symbol references. */
8741 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8742 iter.skip_subrtxes ();
8744 return false;
8747 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8749 On the ARM, allow any integer (invalid ones are removed later by insn
8750 patterns), nice doubles and symbol_refs which refer to the function's
8751 constant pool XXX.
8753 When generating pic allow anything. */
8755 static bool
8756 arm_legitimate_constant_p_1 (machine_mode, rtx x)
8758 return flag_pic || !label_mentioned_p (x);
8761 static bool
8762 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8764 /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
8765 RTX. These RTX must therefore be allowed for Thumb-1 so that when run
8766 for ARMv8-M Baseline or later the result is valid. */
8767 if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
8768 x = XEXP (x, 0);
8770 return (CONST_INT_P (x)
8771 || CONST_DOUBLE_P (x)
8772 || CONSTANT_ADDRESS_P (x)
8773 || (TARGET_HAVE_MOVT && GET_CODE (x) == SYMBOL_REF)
8774 || flag_pic);
8777 static bool
8778 arm_legitimate_constant_p (machine_mode mode, rtx x)
8780 return (!arm_cannot_force_const_mem (mode, x)
8781 && (TARGET_32BIT
8782 ? arm_legitimate_constant_p_1 (mode, x)
8783 : thumb_legitimate_constant_p (mode, x)));
8786 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8788 static bool
8789 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8791 rtx base, offset;
8793 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8795 split_const (x, &base, &offset);
8796 if (GET_CODE (base) == SYMBOL_REF
8797 && !offset_within_block_p (base, INTVAL (offset)))
8798 return true;
8800 return arm_tls_referenced_p (x);
8803 #define REG_OR_SUBREG_REG(X) \
8804 (REG_P (X) \
8805 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8807 #define REG_OR_SUBREG_RTX(X) \
8808 (REG_P (X) ? (X) : SUBREG_REG (X))
8810 static inline int
8811 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8813 machine_mode mode = GET_MODE (x);
8814 int total, words;
8816 switch (code)
8818 case ASHIFT:
8819 case ASHIFTRT:
8820 case LSHIFTRT:
8821 case ROTATERT:
8822 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8824 case PLUS:
8825 case MINUS:
8826 case COMPARE:
8827 case NEG:
8828 case NOT:
8829 return COSTS_N_INSNS (1);
8831 case MULT:
8832 if (arm_arch6m && arm_m_profile_small_mul)
8833 return COSTS_N_INSNS (32);
8835 if (CONST_INT_P (XEXP (x, 1)))
8837 int cycles = 0;
8838 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8840 while (i)
8842 i >>= 2;
8843 cycles++;
8845 return COSTS_N_INSNS (2) + cycles;
8847 return COSTS_N_INSNS (1) + 16;
8849 case SET:
8850 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8851 the mode. */
8852 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8853 return (COSTS_N_INSNS (words)
8854 + 4 * ((MEM_P (SET_SRC (x)))
8855 + MEM_P (SET_DEST (x))));
8857 case CONST_INT:
8858 if (outer == SET)
8860 if (UINTVAL (x) < 256
8861 /* 16-bit constant. */
8862 || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
8863 return 0;
8864 if (thumb_shiftable_const (INTVAL (x)))
8865 return COSTS_N_INSNS (2);
8866 return COSTS_N_INSNS (3);
8868 else if ((outer == PLUS || outer == COMPARE)
8869 && INTVAL (x) < 256 && INTVAL (x) > -256)
8870 return 0;
8871 else if ((outer == IOR || outer == XOR || outer == AND)
8872 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8873 return COSTS_N_INSNS (1);
8874 else if (outer == AND)
8876 int i;
8877 /* This duplicates the tests in the andsi3 expander. */
8878 for (i = 9; i <= 31; i++)
8879 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
8880 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
8881 return COSTS_N_INSNS (2);
8883 else if (outer == ASHIFT || outer == ASHIFTRT
8884 || outer == LSHIFTRT)
8885 return 0;
8886 return COSTS_N_INSNS (2);
8888 case CONST:
8889 case CONST_DOUBLE:
8890 case LABEL_REF:
8891 case SYMBOL_REF:
8892 return COSTS_N_INSNS (3);
8894 case UDIV:
8895 case UMOD:
8896 case DIV:
8897 case MOD:
8898 return 100;
8900 case TRUNCATE:
8901 return 99;
8903 case AND:
8904 case XOR:
8905 case IOR:
8906 /* XXX guess. */
8907 return 8;
8909 case MEM:
8910 /* XXX another guess. */
8911 /* Memory costs quite a lot for the first word, but subsequent words
8912 load at the equivalent of a single insn each. */
8913 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8914 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8915 ? 4 : 0));
8917 case IF_THEN_ELSE:
8918 /* XXX a guess. */
8919 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8920 return 14;
8921 return 2;
8923 case SIGN_EXTEND:
8924 case ZERO_EXTEND:
8925 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
8926 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
8928 if (mode == SImode)
8929 return total;
8931 if (arm_arch6)
8932 return total + COSTS_N_INSNS (1);
8934 /* Assume a two-shift sequence. Increase the cost slightly so
8935 we prefer actual shifts over an extend operation. */
8936 return total + 1 + COSTS_N_INSNS (2);
8938 default:
8939 return 99;
8943 /* Estimates the size cost of thumb1 instructions.
8944 For now most of the code is copied from thumb1_rtx_costs. We need more
8945 fine grain tuning when we have more related test cases. */
8946 static inline int
8947 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8949 machine_mode mode = GET_MODE (x);
8950 int words, cost;
8952 switch (code)
8954 case ASHIFT:
8955 case ASHIFTRT:
8956 case LSHIFTRT:
8957 case ROTATERT:
8958 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8960 case PLUS:
8961 case MINUS:
8962 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8963 defined by RTL expansion, especially for the expansion of
8964 multiplication. */
8965 if ((GET_CODE (XEXP (x, 0)) == MULT
8966 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
8967 || (GET_CODE (XEXP (x, 1)) == MULT
8968 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
8969 return COSTS_N_INSNS (2);
8970 /* Fall through. */
8971 case COMPARE:
8972 case NEG:
8973 case NOT:
8974 return COSTS_N_INSNS (1);
8976 case MULT:
8977 if (CONST_INT_P (XEXP (x, 1)))
8979 /* Thumb1 mul instruction can't operate on const. We must Load it
8980 into a register first. */
8981 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
8982 /* For the targets which have a very small and high-latency multiply
8983 unit, we prefer to synthesize the mult with up to 5 instructions,
8984 giving a good balance between size and performance. */
8985 if (arm_arch6m && arm_m_profile_small_mul)
8986 return COSTS_N_INSNS (5);
8987 else
8988 return COSTS_N_INSNS (1) + const_size;
8990 return COSTS_N_INSNS (1);
8992 case SET:
8993 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8994 the mode. */
8995 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8996 cost = COSTS_N_INSNS (words);
8997 if (satisfies_constraint_J (SET_SRC (x))
8998 || satisfies_constraint_K (SET_SRC (x))
8999 /* Too big an immediate for a 2-byte mov, using MOVT. */
9000 || (CONST_INT_P (SET_SRC (x))
9001 && UINTVAL (SET_SRC (x)) >= 256
9002 && TARGET_HAVE_MOVT
9003 && satisfies_constraint_j (SET_SRC (x)))
9004 /* thumb1_movdi_insn. */
9005 || ((words > 1) && MEM_P (SET_SRC (x))))
9006 cost += COSTS_N_INSNS (1);
9007 return cost;
9009 case CONST_INT:
9010 if (outer == SET)
9012 if (UINTVAL (x) < 256)
9013 return COSTS_N_INSNS (1);
9014 /* movw is 4byte long. */
9015 if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
9016 return COSTS_N_INSNS (2);
9017 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
9018 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
9019 return COSTS_N_INSNS (2);
9020 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
9021 if (thumb_shiftable_const (INTVAL (x)))
9022 return COSTS_N_INSNS (2);
9023 return COSTS_N_INSNS (3);
9025 else if ((outer == PLUS || outer == COMPARE)
9026 && INTVAL (x) < 256 && INTVAL (x) > -256)
9027 return 0;
9028 else if ((outer == IOR || outer == XOR || outer == AND)
9029 && INTVAL (x) < 256 && INTVAL (x) >= -256)
9030 return COSTS_N_INSNS (1);
9031 else if (outer == AND)
9033 int i;
9034 /* This duplicates the tests in the andsi3 expander. */
9035 for (i = 9; i <= 31; i++)
9036 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9037 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9038 return COSTS_N_INSNS (2);
9040 else if (outer == ASHIFT || outer == ASHIFTRT
9041 || outer == LSHIFTRT)
9042 return 0;
9043 return COSTS_N_INSNS (2);
9045 case CONST:
9046 case CONST_DOUBLE:
9047 case LABEL_REF:
9048 case SYMBOL_REF:
9049 return COSTS_N_INSNS (3);
9051 case UDIV:
9052 case UMOD:
9053 case DIV:
9054 case MOD:
9055 return 100;
9057 case TRUNCATE:
9058 return 99;
9060 case AND:
9061 case XOR:
9062 case IOR:
9063 return COSTS_N_INSNS (1);
9065 case MEM:
9066 return (COSTS_N_INSNS (1)
9067 + COSTS_N_INSNS (1)
9068 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9069 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9070 ? COSTS_N_INSNS (1) : 0));
9072 case IF_THEN_ELSE:
9073 /* XXX a guess. */
9074 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9075 return 14;
9076 return 2;
9078 case ZERO_EXTEND:
9079 /* XXX still guessing. */
9080 switch (GET_MODE (XEXP (x, 0)))
9082 case E_QImode:
9083 return (1 + (mode == DImode ? 4 : 0)
9084 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9086 case E_HImode:
9087 return (4 + (mode == DImode ? 4 : 0)
9088 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9090 case E_SImode:
9091 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9093 default:
9094 return 99;
9097 default:
9098 return 99;
9102 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9103 operand, then return the operand that is being shifted. If the shift
9104 is not by a constant, then set SHIFT_REG to point to the operand.
9105 Return NULL if OP is not a shifter operand. */
9106 static rtx
9107 shifter_op_p (rtx op, rtx *shift_reg)
9109 enum rtx_code code = GET_CODE (op);
9111 if (code == MULT && CONST_INT_P (XEXP (op, 1))
9112 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9113 return XEXP (op, 0);
9114 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9115 return XEXP (op, 0);
9116 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9117 || code == ASHIFTRT)
9119 if (!CONST_INT_P (XEXP (op, 1)))
9120 *shift_reg = XEXP (op, 1);
9121 return XEXP (op, 0);
9124 return NULL;
9127 static bool
9128 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9130 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9131 rtx_code code = GET_CODE (x);
9132 gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
9134 switch (XINT (x, 1))
9136 case UNSPEC_UNALIGNED_LOAD:
9137 /* We can only do unaligned loads into the integer unit, and we can't
9138 use LDM or LDRD. */
9139 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9140 if (speed_p)
9141 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9142 + extra_cost->ldst.load_unaligned);
9144 #ifdef NOT_YET
9145 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9146 ADDR_SPACE_GENERIC, speed_p);
9147 #endif
9148 return true;
9150 case UNSPEC_UNALIGNED_STORE:
9151 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9152 if (speed_p)
9153 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9154 + extra_cost->ldst.store_unaligned);
9156 *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
9157 #ifdef NOT_YET
9158 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9159 ADDR_SPACE_GENERIC, speed_p);
9160 #endif
9161 return true;
9163 case UNSPEC_VRINTZ:
9164 case UNSPEC_VRINTP:
9165 case UNSPEC_VRINTM:
9166 case UNSPEC_VRINTR:
9167 case UNSPEC_VRINTX:
9168 case UNSPEC_VRINTA:
9169 if (speed_p)
9170 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9172 return true;
9173 default:
9174 *cost = COSTS_N_INSNS (2);
9175 break;
9177 return true;
9180 /* Cost of a libcall. We assume one insn per argument, an amount for the
9181 call (one insn for -Os) and then one for processing the result. */
9182 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9184 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9185 do \
9187 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9188 if (shift_op != NULL \
9189 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9191 if (shift_reg) \
9193 if (speed_p) \
9194 *cost += extra_cost->alu.arith_shift_reg; \
9195 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9196 ASHIFT, 1, speed_p); \
9198 else if (speed_p) \
9199 *cost += extra_cost->alu.arith_shift; \
9201 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
9202 ASHIFT, 0, speed_p) \
9203 + rtx_cost (XEXP (x, 1 - IDX), \
9204 GET_MODE (shift_op), \
9205 OP, 1, speed_p)); \
9206 return true; \
9209 while (0);
9211 /* RTX costs. Make an estimate of the cost of executing the operation
9212 X, which is contained with an operation with code OUTER_CODE.
9213 SPEED_P indicates whether the cost desired is the performance cost,
9214 or the size cost. The estimate is stored in COST and the return
9215 value is TRUE if the cost calculation is final, or FALSE if the
9216 caller should recurse through the operands of X to add additional
9217 costs.
9219 We currently make no attempt to model the size savings of Thumb-2
9220 16-bit instructions. At the normal points in compilation where
9221 this code is called we have no measure of whether the condition
9222 flags are live or not, and thus no realistic way to determine what
9223 the size will eventually be. */
9224 static bool
9225 arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
9226 const struct cpu_cost_table *extra_cost,
9227 int *cost, bool speed_p)
9229 machine_mode mode = GET_MODE (x);
9231 *cost = COSTS_N_INSNS (1);
9233 if (TARGET_THUMB1)
9235 if (speed_p)
9236 *cost = thumb1_rtx_costs (x, code, outer_code);
9237 else
9238 *cost = thumb1_size_rtx_costs (x, code, outer_code);
9239 return true;
9242 switch (code)
9244 case SET:
9245 *cost = 0;
9246 /* SET RTXs don't have a mode so we get it from the destination. */
9247 mode = GET_MODE (SET_DEST (x));
9249 if (REG_P (SET_SRC (x))
9250 && REG_P (SET_DEST (x)))
9252 /* Assume that most copies can be done with a single insn,
9253 unless we don't have HW FP, in which case everything
9254 larger than word mode will require two insns. */
9255 *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9256 && GET_MODE_SIZE (mode) > 4)
9257 || mode == DImode)
9258 ? 2 : 1);
9259 /* Conditional register moves can be encoded
9260 in 16 bits in Thumb mode. */
9261 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9262 *cost >>= 1;
9264 return true;
9267 if (CONST_INT_P (SET_SRC (x)))
9269 /* Handle CONST_INT here, since the value doesn't have a mode
9270 and we would otherwise be unable to work out the true cost. */
9271 *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
9272 0, speed_p);
9273 outer_code = SET;
9274 /* Slightly lower the cost of setting a core reg to a constant.
9275 This helps break up chains and allows for better scheduling. */
9276 if (REG_P (SET_DEST (x))
9277 && REGNO (SET_DEST (x)) <= LR_REGNUM)
9278 *cost -= 1;
9279 x = SET_SRC (x);
9280 /* Immediate moves with an immediate in the range [0, 255] can be
9281 encoded in 16 bits in Thumb mode. */
9282 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9283 && INTVAL (x) >= 0 && INTVAL (x) <=255)
9284 *cost >>= 1;
9285 goto const_int_cost;
9288 return false;
9290 case MEM:
9291 /* A memory access costs 1 insn if the mode is small, or the address is
9292 a single register, otherwise it costs one insn per word. */
9293 if (REG_P (XEXP (x, 0)))
9294 *cost = COSTS_N_INSNS (1);
9295 else if (flag_pic
9296 && GET_CODE (XEXP (x, 0)) == PLUS
9297 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9298 /* This will be split into two instructions.
9299 See arm.md:calculate_pic_address. */
9300 *cost = COSTS_N_INSNS (2);
9301 else
9302 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9304 /* For speed optimizations, add the costs of the address and
9305 accessing memory. */
9306 if (speed_p)
9307 #ifdef NOT_YET
9308 *cost += (extra_cost->ldst.load
9309 + arm_address_cost (XEXP (x, 0), mode,
9310 ADDR_SPACE_GENERIC, speed_p));
9311 #else
9312 *cost += extra_cost->ldst.load;
9313 #endif
9314 return true;
9316 case PARALLEL:
9318 /* Calculations of LDM costs are complex. We assume an initial cost
9319 (ldm_1st) which will load the number of registers mentioned in
9320 ldm_regs_per_insn_1st registers; then each additional
9321 ldm_regs_per_insn_subsequent registers cost one more insn. The
9322 formula for N regs is thus:
9324 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9325 + ldm_regs_per_insn_subsequent - 1)
9326 / ldm_regs_per_insn_subsequent).
9328 Additional costs may also be added for addressing. A similar
9329 formula is used for STM. */
9331 bool is_ldm = load_multiple_operation (x, SImode);
9332 bool is_stm = store_multiple_operation (x, SImode);
9334 if (is_ldm || is_stm)
9336 if (speed_p)
9338 HOST_WIDE_INT nregs = XVECLEN (x, 0);
9339 HOST_WIDE_INT regs_per_insn_1st = is_ldm
9340 ? extra_cost->ldst.ldm_regs_per_insn_1st
9341 : extra_cost->ldst.stm_regs_per_insn_1st;
9342 HOST_WIDE_INT regs_per_insn_sub = is_ldm
9343 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9344 : extra_cost->ldst.stm_regs_per_insn_subsequent;
9346 *cost += regs_per_insn_1st
9347 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9348 + regs_per_insn_sub - 1)
9349 / regs_per_insn_sub);
9350 return true;
9354 return false;
9356 case DIV:
9357 case UDIV:
9358 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9359 && (mode == SFmode || !TARGET_VFP_SINGLE))
9360 *cost += COSTS_N_INSNS (speed_p
9361 ? extra_cost->fp[mode != SFmode].div : 0);
9362 else if (mode == SImode && TARGET_IDIV)
9363 *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
9364 else
9365 *cost = LIBCALL_COST (2);
9367 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9368 possible udiv is prefered. */
9369 *cost += (code == DIV ? COSTS_N_INSNS (1) : 0);
9370 return false; /* All arguments must be in registers. */
9372 case MOD:
9373 /* MOD by a power of 2 can be expanded as:
9374 rsbs r1, r0, #0
9375 and r0, r0, #(n - 1)
9376 and r1, r1, #(n - 1)
9377 rsbpl r0, r1, #0. */
9378 if (CONST_INT_P (XEXP (x, 1))
9379 && exact_log2 (INTVAL (XEXP (x, 1))) > 0
9380 && mode == SImode)
9382 *cost += COSTS_N_INSNS (3);
9384 if (speed_p)
9385 *cost += 2 * extra_cost->alu.logical
9386 + extra_cost->alu.arith;
9387 return true;
9390 /* Fall-through. */
9391 case UMOD:
9392 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9393 possible udiv is prefered. */
9394 *cost = LIBCALL_COST (2) + (code == MOD ? COSTS_N_INSNS (1) : 0);
9395 return false; /* All arguments must be in registers. */
9397 case ROTATE:
9398 if (mode == SImode && REG_P (XEXP (x, 1)))
9400 *cost += (COSTS_N_INSNS (1)
9401 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9402 if (speed_p)
9403 *cost += extra_cost->alu.shift_reg;
9404 return true;
9406 /* Fall through */
9407 case ROTATERT:
9408 case ASHIFT:
9409 case LSHIFTRT:
9410 case ASHIFTRT:
9411 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9413 *cost += (COSTS_N_INSNS (2)
9414 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9415 if (speed_p)
9416 *cost += 2 * extra_cost->alu.shift;
9417 return true;
9419 else if (mode == SImode)
9421 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9422 /* Slightly disparage register shifts at -Os, but not by much. */
9423 if (!CONST_INT_P (XEXP (x, 1)))
9424 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9425 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9426 return true;
9428 else if (GET_MODE_CLASS (mode) == MODE_INT
9429 && GET_MODE_SIZE (mode) < 4)
9431 if (code == ASHIFT)
9433 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9434 /* Slightly disparage register shifts at -Os, but not by
9435 much. */
9436 if (!CONST_INT_P (XEXP (x, 1)))
9437 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9438 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9440 else if (code == LSHIFTRT || code == ASHIFTRT)
9442 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9444 /* Can use SBFX/UBFX. */
9445 if (speed_p)
9446 *cost += extra_cost->alu.bfx;
9447 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9449 else
9451 *cost += COSTS_N_INSNS (1);
9452 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9453 if (speed_p)
9455 if (CONST_INT_P (XEXP (x, 1)))
9456 *cost += 2 * extra_cost->alu.shift;
9457 else
9458 *cost += (extra_cost->alu.shift
9459 + extra_cost->alu.shift_reg);
9461 else
9462 /* Slightly disparage register shifts. */
9463 *cost += !CONST_INT_P (XEXP (x, 1));
9466 else /* Rotates. */
9468 *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
9469 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9470 if (speed_p)
9472 if (CONST_INT_P (XEXP (x, 1)))
9473 *cost += (2 * extra_cost->alu.shift
9474 + extra_cost->alu.log_shift);
9475 else
9476 *cost += (extra_cost->alu.shift
9477 + extra_cost->alu.shift_reg
9478 + extra_cost->alu.log_shift_reg);
9481 return true;
9484 *cost = LIBCALL_COST (2);
9485 return false;
9487 case BSWAP:
9488 if (arm_arch6)
9490 if (mode == SImode)
9492 if (speed_p)
9493 *cost += extra_cost->alu.rev;
9495 return false;
9498 else
9500 /* No rev instruction available. Look at arm_legacy_rev
9501 and thumb_legacy_rev for the form of RTL used then. */
9502 if (TARGET_THUMB)
9504 *cost += COSTS_N_INSNS (9);
9506 if (speed_p)
9508 *cost += 6 * extra_cost->alu.shift;
9509 *cost += 3 * extra_cost->alu.logical;
9512 else
9514 *cost += COSTS_N_INSNS (4);
9516 if (speed_p)
9518 *cost += 2 * extra_cost->alu.shift;
9519 *cost += extra_cost->alu.arith_shift;
9520 *cost += 2 * extra_cost->alu.logical;
9523 return true;
9525 return false;
9527 case MINUS:
9528 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9529 && (mode == SFmode || !TARGET_VFP_SINGLE))
9531 if (GET_CODE (XEXP (x, 0)) == MULT
9532 || GET_CODE (XEXP (x, 1)) == MULT)
9534 rtx mul_op0, mul_op1, sub_op;
9536 if (speed_p)
9537 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9539 if (GET_CODE (XEXP (x, 0)) == MULT)
9541 mul_op0 = XEXP (XEXP (x, 0), 0);
9542 mul_op1 = XEXP (XEXP (x, 0), 1);
9543 sub_op = XEXP (x, 1);
9545 else
9547 mul_op0 = XEXP (XEXP (x, 1), 0);
9548 mul_op1 = XEXP (XEXP (x, 1), 1);
9549 sub_op = XEXP (x, 0);
9552 /* The first operand of the multiply may be optionally
9553 negated. */
9554 if (GET_CODE (mul_op0) == NEG)
9555 mul_op0 = XEXP (mul_op0, 0);
9557 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9558 + rtx_cost (mul_op1, mode, code, 0, speed_p)
9559 + rtx_cost (sub_op, mode, code, 0, speed_p));
9561 return true;
9564 if (speed_p)
9565 *cost += extra_cost->fp[mode != SFmode].addsub;
9566 return false;
9569 if (mode == SImode)
9571 rtx shift_by_reg = NULL;
9572 rtx shift_op;
9573 rtx non_shift_op;
9575 shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9576 if (shift_op == NULL)
9578 shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9579 non_shift_op = XEXP (x, 0);
9581 else
9582 non_shift_op = XEXP (x, 1);
9584 if (shift_op != NULL)
9586 if (shift_by_reg != NULL)
9588 if (speed_p)
9589 *cost += extra_cost->alu.arith_shift_reg;
9590 *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
9592 else if (speed_p)
9593 *cost += extra_cost->alu.arith_shift;
9595 *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
9596 *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
9597 return true;
9600 if (arm_arch_thumb2
9601 && GET_CODE (XEXP (x, 1)) == MULT)
9603 /* MLS. */
9604 if (speed_p)
9605 *cost += extra_cost->mult[0].add;
9606 *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
9607 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
9608 *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
9609 return true;
9612 if (CONST_INT_P (XEXP (x, 0)))
9614 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9615 INTVAL (XEXP (x, 0)), NULL_RTX,
9616 NULL_RTX, 1, 0);
9617 *cost = COSTS_N_INSNS (insns);
9618 if (speed_p)
9619 *cost += insns * extra_cost->alu.arith;
9620 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9621 return true;
9623 else if (speed_p)
9624 *cost += extra_cost->alu.arith;
9626 return false;
9629 if (GET_MODE_CLASS (mode) == MODE_INT
9630 && GET_MODE_SIZE (mode) < 4)
9632 rtx shift_op, shift_reg;
9633 shift_reg = NULL;
9635 /* We check both sides of the MINUS for shifter operands since,
9636 unlike PLUS, it's not commutative. */
9638 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
9639 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
9641 /* Slightly disparage, as we might need to widen the result. */
9642 *cost += 1;
9643 if (speed_p)
9644 *cost += extra_cost->alu.arith;
9646 if (CONST_INT_P (XEXP (x, 0)))
9648 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9649 return true;
9652 return false;
9655 if (mode == DImode)
9657 *cost += COSTS_N_INSNS (1);
9659 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9661 rtx op1 = XEXP (x, 1);
9663 if (speed_p)
9664 *cost += 2 * extra_cost->alu.arith;
9666 if (GET_CODE (op1) == ZERO_EXTEND)
9667 *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
9668 0, speed_p);
9669 else
9670 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
9671 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9672 0, speed_p);
9673 return true;
9675 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9677 if (speed_p)
9678 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9679 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
9680 0, speed_p)
9681 + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
9682 return true;
9684 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9685 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9687 if (speed_p)
9688 *cost += (extra_cost->alu.arith
9689 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9690 ? extra_cost->alu.arith
9691 : extra_cost->alu.arith_shift));
9692 *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
9693 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
9694 GET_CODE (XEXP (x, 1)), 0, speed_p));
9695 return true;
9698 if (speed_p)
9699 *cost += 2 * extra_cost->alu.arith;
9700 return false;
9703 /* Vector mode? */
9705 *cost = LIBCALL_COST (2);
9706 return false;
9708 case PLUS:
9709 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9710 && (mode == SFmode || !TARGET_VFP_SINGLE))
9712 if (GET_CODE (XEXP (x, 0)) == MULT)
9714 rtx mul_op0, mul_op1, add_op;
9716 if (speed_p)
9717 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9719 mul_op0 = XEXP (XEXP (x, 0), 0);
9720 mul_op1 = XEXP (XEXP (x, 0), 1);
9721 add_op = XEXP (x, 1);
9723 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9724 + rtx_cost (mul_op1, mode, code, 0, speed_p)
9725 + rtx_cost (add_op, mode, code, 0, speed_p));
9727 return true;
9730 if (speed_p)
9731 *cost += extra_cost->fp[mode != SFmode].addsub;
9732 return false;
9734 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9736 *cost = LIBCALL_COST (2);
9737 return false;
9740 /* Narrow modes can be synthesized in SImode, but the range
9741 of useful sub-operations is limited. Check for shift operations
9742 on one of the operands. Only left shifts can be used in the
9743 narrow modes. */
9744 if (GET_MODE_CLASS (mode) == MODE_INT
9745 && GET_MODE_SIZE (mode) < 4)
9747 rtx shift_op, shift_reg;
9748 shift_reg = NULL;
9750 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
9752 if (CONST_INT_P (XEXP (x, 1)))
9754 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9755 INTVAL (XEXP (x, 1)), NULL_RTX,
9756 NULL_RTX, 1, 0);
9757 *cost = COSTS_N_INSNS (insns);
9758 if (speed_p)
9759 *cost += insns * extra_cost->alu.arith;
9760 /* Slightly penalize a narrow operation as the result may
9761 need widening. */
9762 *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9763 return true;
9766 /* Slightly penalize a narrow operation as the result may
9767 need widening. */
9768 *cost += 1;
9769 if (speed_p)
9770 *cost += extra_cost->alu.arith;
9772 return false;
9775 if (mode == SImode)
9777 rtx shift_op, shift_reg;
9779 if (TARGET_INT_SIMD
9780 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9781 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9783 /* UXTA[BH] or SXTA[BH]. */
9784 if (speed_p)
9785 *cost += extra_cost->alu.extend_arith;
9786 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9787 0, speed_p)
9788 + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
9789 return true;
9792 shift_reg = NULL;
9793 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9794 if (shift_op != NULL)
9796 if (shift_reg)
9798 if (speed_p)
9799 *cost += extra_cost->alu.arith_shift_reg;
9800 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
9802 else if (speed_p)
9803 *cost += extra_cost->alu.arith_shift;
9805 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
9806 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9807 return true;
9809 if (GET_CODE (XEXP (x, 0)) == MULT)
9811 rtx mul_op = XEXP (x, 0);
9813 if (TARGET_DSP_MULTIPLY
9814 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9815 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9816 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9817 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9818 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9819 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9820 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9821 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9822 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9823 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9824 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9825 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9826 == 16))))))
9828 /* SMLA[BT][BT]. */
9829 if (speed_p)
9830 *cost += extra_cost->mult[0].extend_add;
9831 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
9832 SIGN_EXTEND, 0, speed_p)
9833 + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
9834 SIGN_EXTEND, 0, speed_p)
9835 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9836 return true;
9839 if (speed_p)
9840 *cost += extra_cost->mult[0].add;
9841 *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
9842 + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
9843 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9844 return true;
9846 if (CONST_INT_P (XEXP (x, 1)))
9848 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9849 INTVAL (XEXP (x, 1)), NULL_RTX,
9850 NULL_RTX, 1, 0);
9851 *cost = COSTS_N_INSNS (insns);
9852 if (speed_p)
9853 *cost += insns * extra_cost->alu.arith;
9854 *cost += rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9855 return true;
9857 else if (speed_p)
9858 *cost += extra_cost->alu.arith;
9860 return false;
9863 if (mode == DImode)
9865 if (arm_arch3m
9866 && GET_CODE (XEXP (x, 0)) == MULT
9867 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
9868 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
9869 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
9870 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
9872 if (speed_p)
9873 *cost += extra_cost->mult[1].extend_add;
9874 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
9875 ZERO_EXTEND, 0, speed_p)
9876 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
9877 ZERO_EXTEND, 0, speed_p)
9878 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9879 return true;
9882 *cost += COSTS_N_INSNS (1);
9884 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9885 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9887 if (speed_p)
9888 *cost += (extra_cost->alu.arith
9889 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9890 ? extra_cost->alu.arith
9891 : extra_cost->alu.arith_shift));
9893 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9894 0, speed_p)
9895 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9896 return true;
9899 if (speed_p)
9900 *cost += 2 * extra_cost->alu.arith;
9901 return false;
9904 /* Vector mode? */
9905 *cost = LIBCALL_COST (2);
9906 return false;
9907 case IOR:
9908 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
9910 if (speed_p)
9911 *cost += extra_cost->alu.rev;
9913 return true;
9915 /* Fall through. */
9916 case AND: case XOR:
9917 if (mode == SImode)
9919 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9920 rtx op0 = XEXP (x, 0);
9921 rtx shift_op, shift_reg;
9923 if (subcode == NOT
9924 && (code == AND
9925 || (code == IOR && TARGET_THUMB2)))
9926 op0 = XEXP (op0, 0);
9928 shift_reg = NULL;
9929 shift_op = shifter_op_p (op0, &shift_reg);
9930 if (shift_op != NULL)
9932 if (shift_reg)
9934 if (speed_p)
9935 *cost += extra_cost->alu.log_shift_reg;
9936 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
9938 else if (speed_p)
9939 *cost += extra_cost->alu.log_shift;
9941 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
9942 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9943 return true;
9946 if (CONST_INT_P (XEXP (x, 1)))
9948 int insns = arm_gen_constant (code, SImode, NULL_RTX,
9949 INTVAL (XEXP (x, 1)), NULL_RTX,
9950 NULL_RTX, 1, 0);
9952 *cost = COSTS_N_INSNS (insns);
9953 if (speed_p)
9954 *cost += insns * extra_cost->alu.logical;
9955 *cost += rtx_cost (op0, mode, code, 0, speed_p);
9956 return true;
9959 if (speed_p)
9960 *cost += extra_cost->alu.logical;
9961 *cost += (rtx_cost (op0, mode, code, 0, speed_p)
9962 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9963 return true;
9966 if (mode == DImode)
9968 rtx op0 = XEXP (x, 0);
9969 enum rtx_code subcode = GET_CODE (op0);
9971 *cost += COSTS_N_INSNS (1);
9973 if (subcode == NOT
9974 && (code == AND
9975 || (code == IOR && TARGET_THUMB2)))
9976 op0 = XEXP (op0, 0);
9978 if (GET_CODE (op0) == ZERO_EXTEND)
9980 if (speed_p)
9981 *cost += 2 * extra_cost->alu.logical;
9983 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
9984 0, speed_p)
9985 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
9986 return true;
9988 else if (GET_CODE (op0) == SIGN_EXTEND)
9990 if (speed_p)
9991 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
9993 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
9994 0, speed_p)
9995 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
9996 return true;
9999 if (speed_p)
10000 *cost += 2 * extra_cost->alu.logical;
10002 return true;
10004 /* Vector mode? */
10006 *cost = LIBCALL_COST (2);
10007 return false;
10009 case MULT:
10010 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10011 && (mode == SFmode || !TARGET_VFP_SINGLE))
10013 rtx op0 = XEXP (x, 0);
10015 if (GET_CODE (op0) == NEG && !flag_rounding_math)
10016 op0 = XEXP (op0, 0);
10018 if (speed_p)
10019 *cost += extra_cost->fp[mode != SFmode].mult;
10021 *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
10022 + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
10023 return true;
10025 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10027 *cost = LIBCALL_COST (2);
10028 return false;
10031 if (mode == SImode)
10033 if (TARGET_DSP_MULTIPLY
10034 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10035 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10036 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10037 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10038 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10039 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10040 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10041 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10042 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10043 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10044 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10045 && (INTVAL (XEXP (XEXP (x, 1), 1))
10046 == 16))))))
10048 /* SMUL[TB][TB]. */
10049 if (speed_p)
10050 *cost += extra_cost->mult[0].extend;
10051 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
10052 SIGN_EXTEND, 0, speed_p);
10053 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
10054 SIGN_EXTEND, 1, speed_p);
10055 return true;
10057 if (speed_p)
10058 *cost += extra_cost->mult[0].simple;
10059 return false;
10062 if (mode == DImode)
10064 if (arm_arch3m
10065 && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10066 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10067 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10068 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
10070 if (speed_p)
10071 *cost += extra_cost->mult[1].extend;
10072 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
10073 ZERO_EXTEND, 0, speed_p)
10074 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10075 ZERO_EXTEND, 0, speed_p));
10076 return true;
10079 *cost = LIBCALL_COST (2);
10080 return false;
10083 /* Vector mode? */
10084 *cost = LIBCALL_COST (2);
10085 return false;
10087 case NEG:
10088 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10089 && (mode == SFmode || !TARGET_VFP_SINGLE))
10091 if (GET_CODE (XEXP (x, 0)) == MULT)
10093 /* VNMUL. */
10094 *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
10095 return true;
10098 if (speed_p)
10099 *cost += extra_cost->fp[mode != SFmode].neg;
10101 return false;
10103 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10105 *cost = LIBCALL_COST (1);
10106 return false;
10109 if (mode == SImode)
10111 if (GET_CODE (XEXP (x, 0)) == ABS)
10113 *cost += COSTS_N_INSNS (1);
10114 /* Assume the non-flag-changing variant. */
10115 if (speed_p)
10116 *cost += (extra_cost->alu.log_shift
10117 + extra_cost->alu.arith_shift);
10118 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
10119 return true;
10122 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10123 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10125 *cost += COSTS_N_INSNS (1);
10126 /* No extra cost for MOV imm and MVN imm. */
10127 /* If the comparison op is using the flags, there's no further
10128 cost, otherwise we need to add the cost of the comparison. */
10129 if (!(REG_P (XEXP (XEXP (x, 0), 0))
10130 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10131 && XEXP (XEXP (x, 0), 1) == const0_rtx))
10133 mode = GET_MODE (XEXP (XEXP (x, 0), 0));
10134 *cost += (COSTS_N_INSNS (1)
10135 + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
10136 0, speed_p)
10137 + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
10138 1, speed_p));
10139 if (speed_p)
10140 *cost += extra_cost->alu.arith;
10142 return true;
10145 if (speed_p)
10146 *cost += extra_cost->alu.arith;
10147 return false;
10150 if (GET_MODE_CLASS (mode) == MODE_INT
10151 && GET_MODE_SIZE (mode) < 4)
10153 /* Slightly disparage, as we might need an extend operation. */
10154 *cost += 1;
10155 if (speed_p)
10156 *cost += extra_cost->alu.arith;
10157 return false;
10160 if (mode == DImode)
10162 *cost += COSTS_N_INSNS (1);
10163 if (speed_p)
10164 *cost += 2 * extra_cost->alu.arith;
10165 return false;
10168 /* Vector mode? */
10169 *cost = LIBCALL_COST (1);
10170 return false;
10172 case NOT:
10173 if (mode == SImode)
10175 rtx shift_op;
10176 rtx shift_reg = NULL;
10178 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10180 if (shift_op)
10182 if (shift_reg != NULL)
10184 if (speed_p)
10185 *cost += extra_cost->alu.log_shift_reg;
10186 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10188 else if (speed_p)
10189 *cost += extra_cost->alu.log_shift;
10190 *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
10191 return true;
10194 if (speed_p)
10195 *cost += extra_cost->alu.logical;
10196 return false;
10198 if (mode == DImode)
10200 *cost += COSTS_N_INSNS (1);
10201 return false;
10204 /* Vector mode? */
10206 *cost += LIBCALL_COST (1);
10207 return false;
10209 case IF_THEN_ELSE:
10211 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10213 *cost += COSTS_N_INSNS (3);
10214 return true;
10216 int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
10217 int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
10219 *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
10220 /* Assume that if one arm of the if_then_else is a register,
10221 that it will be tied with the result and eliminate the
10222 conditional insn. */
10223 if (REG_P (XEXP (x, 1)))
10224 *cost += op2cost;
10225 else if (REG_P (XEXP (x, 2)))
10226 *cost += op1cost;
10227 else
10229 if (speed_p)
10231 if (extra_cost->alu.non_exec_costs_exec)
10232 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10233 else
10234 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10236 else
10237 *cost += op1cost + op2cost;
10240 return true;
10242 case COMPARE:
10243 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10244 *cost = 0;
10245 else
10247 machine_mode op0mode;
10248 /* We'll mostly assume that the cost of a compare is the cost of the
10249 LHS. However, there are some notable exceptions. */
10251 /* Floating point compares are never done as side-effects. */
10252 op0mode = GET_MODE (XEXP (x, 0));
10253 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10254 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10256 if (speed_p)
10257 *cost += extra_cost->fp[op0mode != SFmode].compare;
10259 if (XEXP (x, 1) == CONST0_RTX (op0mode))
10261 *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
10262 return true;
10265 return false;
10267 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10269 *cost = LIBCALL_COST (2);
10270 return false;
10273 /* DImode compares normally take two insns. */
10274 if (op0mode == DImode)
10276 *cost += COSTS_N_INSNS (1);
10277 if (speed_p)
10278 *cost += 2 * extra_cost->alu.arith;
10279 return false;
10282 if (op0mode == SImode)
10284 rtx shift_op;
10285 rtx shift_reg;
10287 if (XEXP (x, 1) == const0_rtx
10288 && !(REG_P (XEXP (x, 0))
10289 || (GET_CODE (XEXP (x, 0)) == SUBREG
10290 && REG_P (SUBREG_REG (XEXP (x, 0))))))
10292 *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10294 /* Multiply operations that set the flags are often
10295 significantly more expensive. */
10296 if (speed_p
10297 && GET_CODE (XEXP (x, 0)) == MULT
10298 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10299 *cost += extra_cost->mult[0].flag_setting;
10301 if (speed_p
10302 && GET_CODE (XEXP (x, 0)) == PLUS
10303 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10304 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10305 0), 1), mode))
10306 *cost += extra_cost->mult[0].flag_setting;
10307 return true;
10310 shift_reg = NULL;
10311 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10312 if (shift_op != NULL)
10314 if (shift_reg != NULL)
10316 *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
10317 1, speed_p);
10318 if (speed_p)
10319 *cost += extra_cost->alu.arith_shift_reg;
10321 else if (speed_p)
10322 *cost += extra_cost->alu.arith_shift;
10323 *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
10324 *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
10325 return true;
10328 if (speed_p)
10329 *cost += extra_cost->alu.arith;
10330 if (CONST_INT_P (XEXP (x, 1))
10331 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10333 *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10334 return true;
10336 return false;
10339 /* Vector mode? */
10341 *cost = LIBCALL_COST (2);
10342 return false;
10344 return true;
10346 case EQ:
10347 case NE:
10348 case LT:
10349 case LE:
10350 case GT:
10351 case GE:
10352 case LTU:
10353 case LEU:
10354 case GEU:
10355 case GTU:
10356 case ORDERED:
10357 case UNORDERED:
10358 case UNEQ:
10359 case UNLE:
10360 case UNLT:
10361 case UNGE:
10362 case UNGT:
10363 case LTGT:
10364 if (outer_code == SET)
10366 /* Is it a store-flag operation? */
10367 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10368 && XEXP (x, 1) == const0_rtx)
10370 /* Thumb also needs an IT insn. */
10371 *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
10372 return true;
10374 if (XEXP (x, 1) == const0_rtx)
10376 switch (code)
10378 case LT:
10379 /* LSR Rd, Rn, #31. */
10380 if (speed_p)
10381 *cost += extra_cost->alu.shift;
10382 break;
10384 case EQ:
10385 /* RSBS T1, Rn, #0
10386 ADC Rd, Rn, T1. */
10388 case NE:
10389 /* SUBS T1, Rn, #1
10390 SBC Rd, Rn, T1. */
10391 *cost += COSTS_N_INSNS (1);
10392 break;
10394 case LE:
10395 /* RSBS T1, Rn, Rn, LSR #31
10396 ADC Rd, Rn, T1. */
10397 *cost += COSTS_N_INSNS (1);
10398 if (speed_p)
10399 *cost += extra_cost->alu.arith_shift;
10400 break;
10402 case GT:
10403 /* RSB Rd, Rn, Rn, ASR #1
10404 LSR Rd, Rd, #31. */
10405 *cost += COSTS_N_INSNS (1);
10406 if (speed_p)
10407 *cost += (extra_cost->alu.arith_shift
10408 + extra_cost->alu.shift);
10409 break;
10411 case GE:
10412 /* ASR Rd, Rn, #31
10413 ADD Rd, Rn, #1. */
10414 *cost += COSTS_N_INSNS (1);
10415 if (speed_p)
10416 *cost += extra_cost->alu.shift;
10417 break;
10419 default:
10420 /* Remaining cases are either meaningless or would take
10421 three insns anyway. */
10422 *cost = COSTS_N_INSNS (3);
10423 break;
10425 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10426 return true;
10428 else
10430 *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10431 if (CONST_INT_P (XEXP (x, 1))
10432 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10434 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10435 return true;
10438 return false;
10441 /* Not directly inside a set. If it involves the condition code
10442 register it must be the condition for a branch, cond_exec or
10443 I_T_E operation. Since the comparison is performed elsewhere
10444 this is just the control part which has no additional
10445 cost. */
10446 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10447 && XEXP (x, 1) == const0_rtx)
10449 *cost = 0;
10450 return true;
10452 return false;
10454 case ABS:
10455 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10456 && (mode == SFmode || !TARGET_VFP_SINGLE))
10458 if (speed_p)
10459 *cost += extra_cost->fp[mode != SFmode].neg;
10461 return false;
10463 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10465 *cost = LIBCALL_COST (1);
10466 return false;
10469 if (mode == SImode)
10471 if (speed_p)
10472 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10473 return false;
10475 /* Vector mode? */
10476 *cost = LIBCALL_COST (1);
10477 return false;
10479 case SIGN_EXTEND:
10480 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10481 && MEM_P (XEXP (x, 0)))
10483 if (mode == DImode)
10484 *cost += COSTS_N_INSNS (1);
10486 if (!speed_p)
10487 return true;
10489 if (GET_MODE (XEXP (x, 0)) == SImode)
10490 *cost += extra_cost->ldst.load;
10491 else
10492 *cost += extra_cost->ldst.load_sign_extend;
10494 if (mode == DImode)
10495 *cost += extra_cost->alu.shift;
10497 return true;
10500 /* Widening from less than 32-bits requires an extend operation. */
10501 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10503 /* We have SXTB/SXTH. */
10504 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10505 if (speed_p)
10506 *cost += extra_cost->alu.extend;
10508 else if (GET_MODE (XEXP (x, 0)) != SImode)
10510 /* Needs two shifts. */
10511 *cost += COSTS_N_INSNS (1);
10512 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10513 if (speed_p)
10514 *cost += 2 * extra_cost->alu.shift;
10517 /* Widening beyond 32-bits requires one more insn. */
10518 if (mode == DImode)
10520 *cost += COSTS_N_INSNS (1);
10521 if (speed_p)
10522 *cost += extra_cost->alu.shift;
10525 return true;
10527 case ZERO_EXTEND:
10528 if ((arm_arch4
10529 || GET_MODE (XEXP (x, 0)) == SImode
10530 || GET_MODE (XEXP (x, 0)) == QImode)
10531 && MEM_P (XEXP (x, 0)))
10533 *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10535 if (mode == DImode)
10536 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10538 return true;
10541 /* Widening from less than 32-bits requires an extend operation. */
10542 if (GET_MODE (XEXP (x, 0)) == QImode)
10544 /* UXTB can be a shorter instruction in Thumb2, but it might
10545 be slower than the AND Rd, Rn, #255 alternative. When
10546 optimizing for speed it should never be slower to use
10547 AND, and we don't really model 16-bit vs 32-bit insns
10548 here. */
10549 if (speed_p)
10550 *cost += extra_cost->alu.logical;
10552 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10554 /* We have UXTB/UXTH. */
10555 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10556 if (speed_p)
10557 *cost += extra_cost->alu.extend;
10559 else if (GET_MODE (XEXP (x, 0)) != SImode)
10561 /* Needs two shifts. It's marginally preferable to use
10562 shifts rather than two BIC instructions as the second
10563 shift may merge with a subsequent insn as a shifter
10564 op. */
10565 *cost = COSTS_N_INSNS (2);
10566 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10567 if (speed_p)
10568 *cost += 2 * extra_cost->alu.shift;
10571 /* Widening beyond 32-bits requires one more insn. */
10572 if (mode == DImode)
10574 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10577 return true;
10579 case CONST_INT:
10580 *cost = 0;
10581 /* CONST_INT has no mode, so we cannot tell for sure how many
10582 insns are really going to be needed. The best we can do is
10583 look at the value passed. If it fits in SImode, then assume
10584 that's the mode it will be used for. Otherwise assume it
10585 will be used in DImode. */
10586 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10587 mode = SImode;
10588 else
10589 mode = DImode;
10591 /* Avoid blowing up in arm_gen_constant (). */
10592 if (!(outer_code == PLUS
10593 || outer_code == AND
10594 || outer_code == IOR
10595 || outer_code == XOR
10596 || outer_code == MINUS))
10597 outer_code = SET;
10599 const_int_cost:
10600 if (mode == SImode)
10602 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10603 INTVAL (x), NULL, NULL,
10604 0, 0));
10605 /* Extra costs? */
10607 else
10609 *cost += COSTS_N_INSNS (arm_gen_constant
10610 (outer_code, SImode, NULL,
10611 trunc_int_for_mode (INTVAL (x), SImode),
10612 NULL, NULL, 0, 0)
10613 + arm_gen_constant (outer_code, SImode, NULL,
10614 INTVAL (x) >> 32, NULL,
10615 NULL, 0, 0));
10616 /* Extra costs? */
10619 return true;
10621 case CONST:
10622 case LABEL_REF:
10623 case SYMBOL_REF:
10624 if (speed_p)
10626 if (arm_arch_thumb2 && !flag_pic)
10627 *cost += COSTS_N_INSNS (1);
10628 else
10629 *cost += extra_cost->ldst.load;
10631 else
10632 *cost += COSTS_N_INSNS (1);
10634 if (flag_pic)
10636 *cost += COSTS_N_INSNS (1);
10637 if (speed_p)
10638 *cost += extra_cost->alu.arith;
10641 return true;
10643 case CONST_FIXED:
10644 *cost = COSTS_N_INSNS (4);
10645 /* Fixme. */
10646 return true;
10648 case CONST_DOUBLE:
10649 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10650 && (mode == SFmode || !TARGET_VFP_SINGLE))
10652 if (vfp3_const_double_rtx (x))
10654 if (speed_p)
10655 *cost += extra_cost->fp[mode == DFmode].fpconst;
10656 return true;
10659 if (speed_p)
10661 if (mode == DFmode)
10662 *cost += extra_cost->ldst.loadd;
10663 else
10664 *cost += extra_cost->ldst.loadf;
10666 else
10667 *cost += COSTS_N_INSNS (1 + (mode == DFmode));
10669 return true;
10671 *cost = COSTS_N_INSNS (4);
10672 return true;
10674 case CONST_VECTOR:
10675 /* Fixme. */
10676 if (TARGET_NEON
10677 && TARGET_HARD_FLOAT
10678 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10679 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10680 *cost = COSTS_N_INSNS (1);
10681 else
10682 *cost = COSTS_N_INSNS (4);
10683 return true;
10685 case HIGH:
10686 case LO_SUM:
10687 /* When optimizing for size, we prefer constant pool entries to
10688 MOVW/MOVT pairs, so bump the cost of these slightly. */
10689 if (!speed_p)
10690 *cost += 1;
10691 return true;
10693 case CLZ:
10694 if (speed_p)
10695 *cost += extra_cost->alu.clz;
10696 return false;
10698 case SMIN:
10699 if (XEXP (x, 1) == const0_rtx)
10701 if (speed_p)
10702 *cost += extra_cost->alu.log_shift;
10703 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10704 return true;
10706 /* Fall through. */
10707 case SMAX:
10708 case UMIN:
10709 case UMAX:
10710 *cost += COSTS_N_INSNS (1);
10711 return false;
10713 case TRUNCATE:
10714 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10715 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10716 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10717 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10718 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10719 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10720 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10721 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10722 == ZERO_EXTEND))))
10724 if (speed_p)
10725 *cost += extra_cost->mult[1].extend;
10726 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
10727 ZERO_EXTEND, 0, speed_p)
10728 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
10729 ZERO_EXTEND, 0, speed_p));
10730 return true;
10732 *cost = LIBCALL_COST (1);
10733 return false;
10735 case UNSPEC_VOLATILE:
10736 case UNSPEC:
10737 return arm_unspec_cost (x, outer_code, speed_p, cost);
10739 case PC:
10740 /* Reading the PC is like reading any other register. Writing it
10741 is more expensive, but we take that into account elsewhere. */
10742 *cost = 0;
10743 return true;
10745 case ZERO_EXTRACT:
10746 /* TODO: Simple zero_extract of bottom bits using AND. */
10747 /* Fall through. */
10748 case SIGN_EXTRACT:
10749 if (arm_arch6
10750 && mode == SImode
10751 && CONST_INT_P (XEXP (x, 1))
10752 && CONST_INT_P (XEXP (x, 2)))
10754 if (speed_p)
10755 *cost += extra_cost->alu.bfx;
10756 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10757 return true;
10759 /* Without UBFX/SBFX, need to resort to shift operations. */
10760 *cost += COSTS_N_INSNS (1);
10761 if (speed_p)
10762 *cost += 2 * extra_cost->alu.shift;
10763 *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
10764 return true;
10766 case FLOAT_EXTEND:
10767 if (TARGET_HARD_FLOAT)
10769 if (speed_p)
10770 *cost += extra_cost->fp[mode == DFmode].widen;
10771 if (!TARGET_VFP5
10772 && GET_MODE (XEXP (x, 0)) == HFmode)
10774 /* Pre v8, widening HF->DF is a two-step process, first
10775 widening to SFmode. */
10776 *cost += COSTS_N_INSNS (1);
10777 if (speed_p)
10778 *cost += extra_cost->fp[0].widen;
10780 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10781 return true;
10784 *cost = LIBCALL_COST (1);
10785 return false;
10787 case FLOAT_TRUNCATE:
10788 if (TARGET_HARD_FLOAT)
10790 if (speed_p)
10791 *cost += extra_cost->fp[mode == DFmode].narrow;
10792 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10793 return true;
10794 /* Vector modes? */
10796 *cost = LIBCALL_COST (1);
10797 return false;
10799 case FMA:
10800 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
10802 rtx op0 = XEXP (x, 0);
10803 rtx op1 = XEXP (x, 1);
10804 rtx op2 = XEXP (x, 2);
10807 /* vfms or vfnma. */
10808 if (GET_CODE (op0) == NEG)
10809 op0 = XEXP (op0, 0);
10811 /* vfnms or vfnma. */
10812 if (GET_CODE (op2) == NEG)
10813 op2 = XEXP (op2, 0);
10815 *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
10816 *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
10817 *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
10819 if (speed_p)
10820 *cost += extra_cost->fp[mode ==DFmode].fma;
10822 return true;
10825 *cost = LIBCALL_COST (3);
10826 return false;
10828 case FIX:
10829 case UNSIGNED_FIX:
10830 if (TARGET_HARD_FLOAT)
10832 /* The *combine_vcvtf2i reduces a vmul+vcvt into
10833 a vcvt fixed-point conversion. */
10834 if (code == FIX && mode == SImode
10835 && GET_CODE (XEXP (x, 0)) == FIX
10836 && GET_MODE (XEXP (x, 0)) == SFmode
10837 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10838 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
10839 > 0)
10841 if (speed_p)
10842 *cost += extra_cost->fp[0].toint;
10844 *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10845 code, 0, speed_p);
10846 return true;
10849 if (GET_MODE_CLASS (mode) == MODE_INT)
10851 mode = GET_MODE (XEXP (x, 0));
10852 if (speed_p)
10853 *cost += extra_cost->fp[mode == DFmode].toint;
10854 /* Strip of the 'cost' of rounding towards zero. */
10855 if (GET_CODE (XEXP (x, 0)) == FIX)
10856 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
10857 0, speed_p);
10858 else
10859 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10860 /* ??? Increase the cost to deal with transferring from
10861 FP -> CORE registers? */
10862 return true;
10864 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
10865 && TARGET_VFP5)
10867 if (speed_p)
10868 *cost += extra_cost->fp[mode == DFmode].roundint;
10869 return false;
10871 /* Vector costs? */
10873 *cost = LIBCALL_COST (1);
10874 return false;
10876 case FLOAT:
10877 case UNSIGNED_FLOAT:
10878 if (TARGET_HARD_FLOAT)
10880 /* ??? Increase the cost to deal with transferring from CORE
10881 -> FP registers? */
10882 if (speed_p)
10883 *cost += extra_cost->fp[mode == DFmode].fromint;
10884 return false;
10886 *cost = LIBCALL_COST (1);
10887 return false;
10889 case CALL:
10890 return true;
10892 case ASM_OPERANDS:
10894 /* Just a guess. Guess number of instructions in the asm
10895 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
10896 though (see PR60663). */
10897 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
10898 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
10900 *cost = COSTS_N_INSNS (asm_length + num_operands);
10901 return true;
10903 default:
10904 if (mode != VOIDmode)
10905 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
10906 else
10907 *cost = COSTS_N_INSNS (4); /* Who knows? */
10908 return false;
10912 #undef HANDLE_NARROW_SHIFT_ARITH
10914 /* RTX costs entry point. */
10916 static bool
10917 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
10918 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
10920 bool result;
10921 int code = GET_CODE (x);
10922 gcc_assert (current_tune->insn_extra_cost);
10924 result = arm_rtx_costs_internal (x, (enum rtx_code) code,
10925 (enum rtx_code) outer_code,
10926 current_tune->insn_extra_cost,
10927 total, speed);
10929 if (dump_file && (dump_flags & TDF_DETAILS))
10931 print_rtl_single (dump_file, x);
10932 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
10933 *total, result ? "final" : "partial");
10935 return result;
10938 /* All address computations that can be done are free, but rtx cost returns
10939 the same for practically all of them. So we weight the different types
10940 of address here in the order (most pref first):
10941 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
10942 static inline int
10943 arm_arm_address_cost (rtx x)
10945 enum rtx_code c = GET_CODE (x);
10947 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
10948 return 0;
10949 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
10950 return 10;
10952 if (c == PLUS)
10954 if (CONST_INT_P (XEXP (x, 1)))
10955 return 2;
10957 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
10958 return 3;
10960 return 4;
10963 return 6;
10966 static inline int
10967 arm_thumb_address_cost (rtx x)
10969 enum rtx_code c = GET_CODE (x);
10971 if (c == REG)
10972 return 1;
10973 if (c == PLUS
10974 && REG_P (XEXP (x, 0))
10975 && CONST_INT_P (XEXP (x, 1)))
10976 return 1;
10978 return 2;
10981 static int
10982 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
10983 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
10985 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
10988 /* Adjust cost hook for XScale. */
10989 static bool
10990 xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
10991 int * cost)
10993 /* Some true dependencies can have a higher cost depending
10994 on precisely how certain input operands are used. */
10995 if (dep_type == 0
10996 && recog_memoized (insn) >= 0
10997 && recog_memoized (dep) >= 0)
10999 int shift_opnum = get_attr_shift (insn);
11000 enum attr_type attr_type = get_attr_type (dep);
11002 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11003 operand for INSN. If we have a shifted input operand and the
11004 instruction we depend on is another ALU instruction, then we may
11005 have to account for an additional stall. */
11006 if (shift_opnum != 0
11007 && (attr_type == TYPE_ALU_SHIFT_IMM
11008 || attr_type == TYPE_ALUS_SHIFT_IMM
11009 || attr_type == TYPE_LOGIC_SHIFT_IMM
11010 || attr_type == TYPE_LOGICS_SHIFT_IMM
11011 || attr_type == TYPE_ALU_SHIFT_REG
11012 || attr_type == TYPE_ALUS_SHIFT_REG
11013 || attr_type == TYPE_LOGIC_SHIFT_REG
11014 || attr_type == TYPE_LOGICS_SHIFT_REG
11015 || attr_type == TYPE_MOV_SHIFT
11016 || attr_type == TYPE_MVN_SHIFT
11017 || attr_type == TYPE_MOV_SHIFT_REG
11018 || attr_type == TYPE_MVN_SHIFT_REG))
11020 rtx shifted_operand;
11021 int opno;
11023 /* Get the shifted operand. */
11024 extract_insn (insn);
11025 shifted_operand = recog_data.operand[shift_opnum];
11027 /* Iterate over all the operands in DEP. If we write an operand
11028 that overlaps with SHIFTED_OPERAND, then we have increase the
11029 cost of this dependency. */
11030 extract_insn (dep);
11031 preprocess_constraints (dep);
11032 for (opno = 0; opno < recog_data.n_operands; opno++)
11034 /* We can ignore strict inputs. */
11035 if (recog_data.operand_type[opno] == OP_IN)
11036 continue;
11038 if (reg_overlap_mentioned_p (recog_data.operand[opno],
11039 shifted_operand))
11041 *cost = 2;
11042 return false;
11047 return true;
11050 /* Adjust cost hook for Cortex A9. */
11051 static bool
11052 cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11053 int * cost)
11055 switch (dep_type)
11057 case REG_DEP_ANTI:
11058 *cost = 0;
11059 return false;
11061 case REG_DEP_TRUE:
11062 case REG_DEP_OUTPUT:
11063 if (recog_memoized (insn) >= 0
11064 && recog_memoized (dep) >= 0)
11066 if (GET_CODE (PATTERN (insn)) == SET)
11068 if (GET_MODE_CLASS
11069 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11070 || GET_MODE_CLASS
11071 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11073 enum attr_type attr_type_insn = get_attr_type (insn);
11074 enum attr_type attr_type_dep = get_attr_type (dep);
11076 /* By default all dependencies of the form
11077 s0 = s0 <op> s1
11078 s0 = s0 <op> s2
11079 have an extra latency of 1 cycle because
11080 of the input and output dependency in this
11081 case. However this gets modeled as an true
11082 dependency and hence all these checks. */
11083 if (REG_P (SET_DEST (PATTERN (insn)))
11084 && reg_set_p (SET_DEST (PATTERN (insn)), dep))
11086 /* FMACS is a special case where the dependent
11087 instruction can be issued 3 cycles before
11088 the normal latency in case of an output
11089 dependency. */
11090 if ((attr_type_insn == TYPE_FMACS
11091 || attr_type_insn == TYPE_FMACD)
11092 && (attr_type_dep == TYPE_FMACS
11093 || attr_type_dep == TYPE_FMACD))
11095 if (dep_type == REG_DEP_OUTPUT)
11096 *cost = insn_default_latency (dep) - 3;
11097 else
11098 *cost = insn_default_latency (dep);
11099 return false;
11101 else
11103 if (dep_type == REG_DEP_OUTPUT)
11104 *cost = insn_default_latency (dep) + 1;
11105 else
11106 *cost = insn_default_latency (dep);
11108 return false;
11113 break;
11115 default:
11116 gcc_unreachable ();
11119 return true;
11122 /* Adjust cost hook for FA726TE. */
11123 static bool
11124 fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11125 int * cost)
11127 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11128 have penalty of 3. */
11129 if (dep_type == REG_DEP_TRUE
11130 && recog_memoized (insn) >= 0
11131 && recog_memoized (dep) >= 0
11132 && get_attr_conds (dep) == CONDS_SET)
11134 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11135 if (get_attr_conds (insn) == CONDS_USE
11136 && get_attr_type (insn) != TYPE_BRANCH)
11138 *cost = 3;
11139 return false;
11142 if (GET_CODE (PATTERN (insn)) == COND_EXEC
11143 || get_attr_conds (insn) == CONDS_USE)
11145 *cost = 0;
11146 return false;
11150 return true;
11153 /* Implement TARGET_REGISTER_MOVE_COST.
11155 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11156 it is typically more expensive than a single memory access. We set
11157 the cost to less than two memory accesses so that floating
11158 point to integer conversion does not go through memory. */
11161 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11162 reg_class_t from, reg_class_t to)
11164 if (TARGET_32BIT)
11166 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11167 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11168 return 15;
11169 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11170 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11171 return 4;
11172 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11173 return 20;
11174 else
11175 return 2;
11177 else
11179 if (from == HI_REGS || to == HI_REGS)
11180 return 4;
11181 else
11182 return 2;
11186 /* Implement TARGET_MEMORY_MOVE_COST. */
11189 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11190 bool in ATTRIBUTE_UNUSED)
11192 if (TARGET_32BIT)
11193 return 10;
11194 else
11196 if (GET_MODE_SIZE (mode) < 4)
11197 return 8;
11198 else
11199 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11203 /* Vectorizer cost model implementation. */
11205 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11206 static int
11207 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11208 tree vectype,
11209 int misalign ATTRIBUTE_UNUSED)
11211 unsigned elements;
11213 switch (type_of_cost)
11215 case scalar_stmt:
11216 return current_tune->vec_costs->scalar_stmt_cost;
11218 case scalar_load:
11219 return current_tune->vec_costs->scalar_load_cost;
11221 case scalar_store:
11222 return current_tune->vec_costs->scalar_store_cost;
11224 case vector_stmt:
11225 return current_tune->vec_costs->vec_stmt_cost;
11227 case vector_load:
11228 return current_tune->vec_costs->vec_align_load_cost;
11230 case vector_store:
11231 return current_tune->vec_costs->vec_store_cost;
11233 case vec_to_scalar:
11234 return current_tune->vec_costs->vec_to_scalar_cost;
11236 case scalar_to_vec:
11237 return current_tune->vec_costs->scalar_to_vec_cost;
11239 case unaligned_load:
11240 return current_tune->vec_costs->vec_unalign_load_cost;
11242 case unaligned_store:
11243 return current_tune->vec_costs->vec_unalign_store_cost;
11245 case cond_branch_taken:
11246 return current_tune->vec_costs->cond_taken_branch_cost;
11248 case cond_branch_not_taken:
11249 return current_tune->vec_costs->cond_not_taken_branch_cost;
11251 case vec_perm:
11252 case vec_promote_demote:
11253 return current_tune->vec_costs->vec_stmt_cost;
11255 case vec_construct:
11256 elements = TYPE_VECTOR_SUBPARTS (vectype);
11257 return elements / 2 + 1;
11259 default:
11260 gcc_unreachable ();
11264 /* Implement targetm.vectorize.add_stmt_cost. */
11266 static unsigned
11267 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11268 struct _stmt_vec_info *stmt_info, int misalign,
11269 enum vect_cost_model_location where)
11271 unsigned *cost = (unsigned *) data;
11272 unsigned retval = 0;
11274 if (flag_vect_cost_model)
11276 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11277 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11279 /* Statements in an inner loop relative to the loop being
11280 vectorized are weighted more heavily. The value here is
11281 arbitrary and could potentially be improved with analysis. */
11282 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11283 count *= 50; /* FIXME. */
11285 retval = (unsigned) (count * stmt_cost);
11286 cost[where] += retval;
11289 return retval;
11292 /* Return true if and only if this insn can dual-issue only as older. */
11293 static bool
11294 cortexa7_older_only (rtx_insn *insn)
11296 if (recog_memoized (insn) < 0)
11297 return false;
11299 switch (get_attr_type (insn))
11301 case TYPE_ALU_DSP_REG:
11302 case TYPE_ALU_SREG:
11303 case TYPE_ALUS_SREG:
11304 case TYPE_LOGIC_REG:
11305 case TYPE_LOGICS_REG:
11306 case TYPE_ADC_REG:
11307 case TYPE_ADCS_REG:
11308 case TYPE_ADR:
11309 case TYPE_BFM:
11310 case TYPE_REV:
11311 case TYPE_MVN_REG:
11312 case TYPE_SHIFT_IMM:
11313 case TYPE_SHIFT_REG:
11314 case TYPE_LOAD_BYTE:
11315 case TYPE_LOAD1:
11316 case TYPE_STORE1:
11317 case TYPE_FFARITHS:
11318 case TYPE_FADDS:
11319 case TYPE_FFARITHD:
11320 case TYPE_FADDD:
11321 case TYPE_FMOV:
11322 case TYPE_F_CVT:
11323 case TYPE_FCMPS:
11324 case TYPE_FCMPD:
11325 case TYPE_FCONSTS:
11326 case TYPE_FCONSTD:
11327 case TYPE_FMULS:
11328 case TYPE_FMACS:
11329 case TYPE_FMULD:
11330 case TYPE_FMACD:
11331 case TYPE_FDIVS:
11332 case TYPE_FDIVD:
11333 case TYPE_F_MRC:
11334 case TYPE_F_MRRC:
11335 case TYPE_F_FLAG:
11336 case TYPE_F_LOADS:
11337 case TYPE_F_STORES:
11338 return true;
11339 default:
11340 return false;
11344 /* Return true if and only if this insn can dual-issue as younger. */
11345 static bool
11346 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11348 if (recog_memoized (insn) < 0)
11350 if (verbose > 5)
11351 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11352 return false;
11355 switch (get_attr_type (insn))
11357 case TYPE_ALU_IMM:
11358 case TYPE_ALUS_IMM:
11359 case TYPE_LOGIC_IMM:
11360 case TYPE_LOGICS_IMM:
11361 case TYPE_EXTEND:
11362 case TYPE_MVN_IMM:
11363 case TYPE_MOV_IMM:
11364 case TYPE_MOV_REG:
11365 case TYPE_MOV_SHIFT:
11366 case TYPE_MOV_SHIFT_REG:
11367 case TYPE_BRANCH:
11368 case TYPE_CALL:
11369 return true;
11370 default:
11371 return false;
11376 /* Look for an instruction that can dual issue only as an older
11377 instruction, and move it in front of any instructions that can
11378 dual-issue as younger, while preserving the relative order of all
11379 other instructions in the ready list. This is a hueuristic to help
11380 dual-issue in later cycles, by postponing issue of more flexible
11381 instructions. This heuristic may affect dual issue opportunities
11382 in the current cycle. */
11383 static void
11384 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11385 int *n_readyp, int clock)
11387 int i;
11388 int first_older_only = -1, first_younger = -1;
11390 if (verbose > 5)
11391 fprintf (file,
11392 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11393 clock,
11394 *n_readyp);
11396 /* Traverse the ready list from the head (the instruction to issue
11397 first), and looking for the first instruction that can issue as
11398 younger and the first instruction that can dual-issue only as
11399 older. */
11400 for (i = *n_readyp - 1; i >= 0; i--)
11402 rtx_insn *insn = ready[i];
11403 if (cortexa7_older_only (insn))
11405 first_older_only = i;
11406 if (verbose > 5)
11407 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11408 break;
11410 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11411 first_younger = i;
11414 /* Nothing to reorder because either no younger insn found or insn
11415 that can dual-issue only as older appears before any insn that
11416 can dual-issue as younger. */
11417 if (first_younger == -1)
11419 if (verbose > 5)
11420 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11421 return;
11424 /* Nothing to reorder because no older-only insn in the ready list. */
11425 if (first_older_only == -1)
11427 if (verbose > 5)
11428 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11429 return;
11432 /* Move first_older_only insn before first_younger. */
11433 if (verbose > 5)
11434 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11435 INSN_UID(ready [first_older_only]),
11436 INSN_UID(ready [first_younger]));
11437 rtx_insn *first_older_only_insn = ready [first_older_only];
11438 for (i = first_older_only; i < first_younger; i++)
11440 ready[i] = ready[i+1];
11443 ready[i] = first_older_only_insn;
11444 return;
11447 /* Implement TARGET_SCHED_REORDER. */
11448 static int
11449 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
11450 int clock)
11452 switch (arm_tune)
11454 case TARGET_CPU_cortexa7:
11455 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11456 break;
11457 default:
11458 /* Do nothing for other cores. */
11459 break;
11462 return arm_issue_rate ();
11465 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11466 It corrects the value of COST based on the relationship between
11467 INSN and DEP through the dependence LINK. It returns the new
11468 value. There is a per-core adjust_cost hook to adjust scheduler costs
11469 and the per-core hook can choose to completely override the generic
11470 adjust_cost function. Only put bits of code into arm_adjust_cost that
11471 are common across all cores. */
11472 static int
11473 arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
11474 unsigned int)
11476 rtx i_pat, d_pat;
11478 /* When generating Thumb-1 code, we want to place flag-setting operations
11479 close to a conditional branch which depends on them, so that we can
11480 omit the comparison. */
11481 if (TARGET_THUMB1
11482 && dep_type == 0
11483 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11484 && recog_memoized (dep) >= 0
11485 && get_attr_conds (dep) == CONDS_SET)
11486 return 0;
11488 if (current_tune->sched_adjust_cost != NULL)
11490 if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost))
11491 return cost;
11494 /* XXX Is this strictly true? */
11495 if (dep_type == REG_DEP_ANTI
11496 || dep_type == REG_DEP_OUTPUT)
11497 return 0;
11499 /* Call insns don't incur a stall, even if they follow a load. */
11500 if (dep_type == 0
11501 && CALL_P (insn))
11502 return 1;
11504 if ((i_pat = single_set (insn)) != NULL
11505 && MEM_P (SET_SRC (i_pat))
11506 && (d_pat = single_set (dep)) != NULL
11507 && MEM_P (SET_DEST (d_pat)))
11509 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11510 /* This is a load after a store, there is no conflict if the load reads
11511 from a cached area. Assume that loads from the stack, and from the
11512 constant pool are cached, and that others will miss. This is a
11513 hack. */
11515 if ((GET_CODE (src_mem) == SYMBOL_REF
11516 && CONSTANT_POOL_ADDRESS_P (src_mem))
11517 || reg_mentioned_p (stack_pointer_rtx, src_mem)
11518 || reg_mentioned_p (frame_pointer_rtx, src_mem)
11519 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11520 return 1;
11523 return cost;
11527 arm_max_conditional_execute (void)
11529 return max_insns_skipped;
11532 static int
11533 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11535 if (TARGET_32BIT)
11536 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
11537 else
11538 return (optimize > 0) ? 2 : 0;
11541 static int
11542 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
11544 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11547 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11548 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11549 sequences of non-executed instructions in IT blocks probably take the same
11550 amount of time as executed instructions (and the IT instruction itself takes
11551 space in icache). This function was experimentally determined to give good
11552 results on a popular embedded benchmark. */
11554 static int
11555 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
11557 return (TARGET_32BIT && speed_p) ? 1
11558 : arm_default_branch_cost (speed_p, predictable_p);
11561 static int
11562 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
11564 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11567 static bool fp_consts_inited = false;
11569 static REAL_VALUE_TYPE value_fp0;
11571 static void
11572 init_fp_table (void)
11574 REAL_VALUE_TYPE r;
11576 r = REAL_VALUE_ATOF ("0", DFmode);
11577 value_fp0 = r;
11578 fp_consts_inited = true;
11581 /* Return TRUE if rtx X is a valid immediate FP constant. */
11583 arm_const_double_rtx (rtx x)
11585 const REAL_VALUE_TYPE *r;
11587 if (!fp_consts_inited)
11588 init_fp_table ();
11590 r = CONST_DOUBLE_REAL_VALUE (x);
11591 if (REAL_VALUE_MINUS_ZERO (*r))
11592 return 0;
11594 if (real_equal (r, &value_fp0))
11595 return 1;
11597 return 0;
11600 /* VFPv3 has a fairly wide range of representable immediates, formed from
11601 "quarter-precision" floating-point values. These can be evaluated using this
11602 formula (with ^ for exponentiation):
11604 -1^s * n * 2^-r
11606 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11607 16 <= n <= 31 and 0 <= r <= 7.
11609 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11611 - A (most-significant) is the sign bit.
11612 - BCD are the exponent (encoded as r XOR 3).
11613 - EFGH are the mantissa (encoded as n - 16).
11616 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11617 fconst[sd] instruction, or -1 if X isn't suitable. */
11618 static int
11619 vfp3_const_double_index (rtx x)
11621 REAL_VALUE_TYPE r, m;
11622 int sign, exponent;
11623 unsigned HOST_WIDE_INT mantissa, mant_hi;
11624 unsigned HOST_WIDE_INT mask;
11625 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
11626 bool fail;
11628 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
11629 return -1;
11631 r = *CONST_DOUBLE_REAL_VALUE (x);
11633 /* We can't represent these things, so detect them first. */
11634 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
11635 return -1;
11637 /* Extract sign, exponent and mantissa. */
11638 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
11639 r = real_value_abs (&r);
11640 exponent = REAL_EXP (&r);
11641 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11642 highest (sign) bit, with a fixed binary point at bit point_pos.
11643 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11644 bits for the mantissa, this may fail (low bits would be lost). */
11645 real_ldexp (&m, &r, point_pos - exponent);
11646 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
11647 mantissa = w.elt (0);
11648 mant_hi = w.elt (1);
11650 /* If there are bits set in the low part of the mantissa, we can't
11651 represent this value. */
11652 if (mantissa != 0)
11653 return -1;
11655 /* Now make it so that mantissa contains the most-significant bits, and move
11656 the point_pos to indicate that the least-significant bits have been
11657 discarded. */
11658 point_pos -= HOST_BITS_PER_WIDE_INT;
11659 mantissa = mant_hi;
11661 /* We can permit four significant bits of mantissa only, plus a high bit
11662 which is always 1. */
11663 mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
11664 if ((mantissa & mask) != 0)
11665 return -1;
11667 /* Now we know the mantissa is in range, chop off the unneeded bits. */
11668 mantissa >>= point_pos - 5;
11670 /* The mantissa may be zero. Disallow that case. (It's possible to load the
11671 floating-point immediate zero with Neon using an integer-zero load, but
11672 that case is handled elsewhere.) */
11673 if (mantissa == 0)
11674 return -1;
11676 gcc_assert (mantissa >= 16 && mantissa <= 31);
11678 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11679 normalized significands are in the range [1, 2). (Our mantissa is shifted
11680 left 4 places at this point relative to normalized IEEE754 values). GCC
11681 internally uses [0.5, 1) (see real.c), so the exponent returned from
11682 REAL_EXP must be altered. */
11683 exponent = 5 - exponent;
11685 if (exponent < 0 || exponent > 7)
11686 return -1;
11688 /* Sign, mantissa and exponent are now in the correct form to plug into the
11689 formula described in the comment above. */
11690 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
11693 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
11695 vfp3_const_double_rtx (rtx x)
11697 if (!TARGET_VFP3)
11698 return 0;
11700 return vfp3_const_double_index (x) != -1;
11703 /* Recognize immediates which can be used in various Neon instructions. Legal
11704 immediates are described by the following table (for VMVN variants, the
11705 bitwise inverse of the constant shown is recognized. In either case, VMOV
11706 is output and the correct instruction to use for a given constant is chosen
11707 by the assembler). The constant shown is replicated across all elements of
11708 the destination vector.
11710 insn elems variant constant (binary)
11711 ---- ----- ------- -----------------
11712 vmov i32 0 00000000 00000000 00000000 abcdefgh
11713 vmov i32 1 00000000 00000000 abcdefgh 00000000
11714 vmov i32 2 00000000 abcdefgh 00000000 00000000
11715 vmov i32 3 abcdefgh 00000000 00000000 00000000
11716 vmov i16 4 00000000 abcdefgh
11717 vmov i16 5 abcdefgh 00000000
11718 vmvn i32 6 00000000 00000000 00000000 abcdefgh
11719 vmvn i32 7 00000000 00000000 abcdefgh 00000000
11720 vmvn i32 8 00000000 abcdefgh 00000000 00000000
11721 vmvn i32 9 abcdefgh 00000000 00000000 00000000
11722 vmvn i16 10 00000000 abcdefgh
11723 vmvn i16 11 abcdefgh 00000000
11724 vmov i32 12 00000000 00000000 abcdefgh 11111111
11725 vmvn i32 13 00000000 00000000 abcdefgh 11111111
11726 vmov i32 14 00000000 abcdefgh 11111111 11111111
11727 vmvn i32 15 00000000 abcdefgh 11111111 11111111
11728 vmov i8 16 abcdefgh
11729 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
11730 eeeeeeee ffffffff gggggggg hhhhhhhh
11731 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
11732 vmov f32 19 00000000 00000000 00000000 00000000
11734 For case 18, B = !b. Representable values are exactly those accepted by
11735 vfp3_const_double_index, but are output as floating-point numbers rather
11736 than indices.
11738 For case 19, we will change it to vmov.i32 when assembling.
11740 Variants 0-5 (inclusive) may also be used as immediates for the second
11741 operand of VORR/VBIC instructions.
11743 The INVERSE argument causes the bitwise inverse of the given operand to be
11744 recognized instead (used for recognizing legal immediates for the VAND/VORN
11745 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11746 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11747 output, rather than the real insns vbic/vorr).
11749 INVERSE makes no difference to the recognition of float vectors.
11751 The return value is the variant of immediate as shown in the above table, or
11752 -1 if the given value doesn't match any of the listed patterns.
11754 static int
11755 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
11756 rtx *modconst, int *elementwidth)
11758 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
11759 matches = 1; \
11760 for (i = 0; i < idx; i += (STRIDE)) \
11761 if (!(TEST)) \
11762 matches = 0; \
11763 if (matches) \
11765 immtype = (CLASS); \
11766 elsize = (ELSIZE); \
11767 break; \
11770 unsigned int i, elsize = 0, idx = 0, n_elts;
11771 unsigned int innersize;
11772 unsigned char bytes[16];
11773 int immtype = -1, matches;
11774 unsigned int invmask = inverse ? 0xff : 0;
11775 bool vector = GET_CODE (op) == CONST_VECTOR;
11777 if (vector)
11778 n_elts = CONST_VECTOR_NUNITS (op);
11779 else
11781 n_elts = 1;
11782 if (mode == VOIDmode)
11783 mode = DImode;
11786 innersize = GET_MODE_UNIT_SIZE (mode);
11788 /* Vectors of float constants. */
11789 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
11791 rtx el0 = CONST_VECTOR_ELT (op, 0);
11793 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
11794 return -1;
11796 /* FP16 vectors cannot be represented. */
11797 if (GET_MODE_INNER (mode) == HFmode)
11798 return -1;
11800 /* All elements in the vector must be the same. Note that 0.0 and -0.0
11801 are distinct in this context. */
11802 if (!const_vec_duplicate_p (op))
11803 return -1;
11805 if (modconst)
11806 *modconst = CONST_VECTOR_ELT (op, 0);
11808 if (elementwidth)
11809 *elementwidth = 0;
11811 if (el0 == CONST0_RTX (GET_MODE (el0)))
11812 return 19;
11813 else
11814 return 18;
11817 /* The tricks done in the code below apply for little-endian vector layout.
11818 For big-endian vectors only allow vectors of the form { a, a, a..., a }.
11819 FIXME: Implement logic for big-endian vectors. */
11820 if (BYTES_BIG_ENDIAN && vector && !const_vec_duplicate_p (op))
11821 return -1;
11823 /* Splat vector constant out into a byte vector. */
11824 for (i = 0; i < n_elts; i++)
11826 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
11827 unsigned HOST_WIDE_INT elpart;
11829 gcc_assert (CONST_INT_P (el));
11830 elpart = INTVAL (el);
11832 for (unsigned int byte = 0; byte < innersize; byte++)
11834 bytes[idx++] = (elpart & 0xff) ^ invmask;
11835 elpart >>= BITS_PER_UNIT;
11839 /* Sanity check. */
11840 gcc_assert (idx == GET_MODE_SIZE (mode));
11844 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
11845 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11847 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11848 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11850 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
11851 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11853 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
11854 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
11856 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
11858 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
11860 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
11861 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11863 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11864 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11866 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
11867 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11869 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
11870 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
11872 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
11874 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
11876 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11877 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11879 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11880 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11882 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
11883 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11885 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
11886 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11888 CHECK (1, 8, 16, bytes[i] == bytes[0]);
11890 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
11891 && bytes[i] == bytes[(i + 8) % idx]);
11893 while (0);
11895 if (immtype == -1)
11896 return -1;
11898 if (elementwidth)
11899 *elementwidth = elsize;
11901 if (modconst)
11903 unsigned HOST_WIDE_INT imm = 0;
11905 /* Un-invert bytes of recognized vector, if necessary. */
11906 if (invmask != 0)
11907 for (i = 0; i < idx; i++)
11908 bytes[i] ^= invmask;
11910 if (immtype == 17)
11912 /* FIXME: Broken on 32-bit H_W_I hosts. */
11913 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
11915 for (i = 0; i < 8; i++)
11916 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
11917 << (i * BITS_PER_UNIT);
11919 *modconst = GEN_INT (imm);
11921 else
11923 unsigned HOST_WIDE_INT imm = 0;
11925 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
11926 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
11928 *modconst = GEN_INT (imm);
11932 return immtype;
11933 #undef CHECK
11936 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
11937 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
11938 float elements), and a modified constant (whatever should be output for a
11939 VMOV) in *MODCONST. */
11942 neon_immediate_valid_for_move (rtx op, machine_mode mode,
11943 rtx *modconst, int *elementwidth)
11945 rtx tmpconst;
11946 int tmpwidth;
11947 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
11949 if (retval == -1)
11950 return 0;
11952 if (modconst)
11953 *modconst = tmpconst;
11955 if (elementwidth)
11956 *elementwidth = tmpwidth;
11958 return 1;
11961 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
11962 the immediate is valid, write a constant suitable for using as an operand
11963 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
11964 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
11967 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
11968 rtx *modconst, int *elementwidth)
11970 rtx tmpconst;
11971 int tmpwidth;
11972 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
11974 if (retval < 0 || retval > 5)
11975 return 0;
11977 if (modconst)
11978 *modconst = tmpconst;
11980 if (elementwidth)
11981 *elementwidth = tmpwidth;
11983 return 1;
11986 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
11987 the immediate is valid, write a constant suitable for using as an operand
11988 to VSHR/VSHL to *MODCONST and the corresponding element width to
11989 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
11990 because they have different limitations. */
11993 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
11994 rtx *modconst, int *elementwidth,
11995 bool isleftshift)
11997 unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
11998 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
11999 unsigned HOST_WIDE_INT last_elt = 0;
12000 unsigned HOST_WIDE_INT maxshift;
12002 /* Split vector constant out into a byte vector. */
12003 for (i = 0; i < n_elts; i++)
12005 rtx el = CONST_VECTOR_ELT (op, i);
12006 unsigned HOST_WIDE_INT elpart;
12008 if (CONST_INT_P (el))
12009 elpart = INTVAL (el);
12010 else if (CONST_DOUBLE_P (el))
12011 return 0;
12012 else
12013 gcc_unreachable ();
12015 if (i != 0 && elpart != last_elt)
12016 return 0;
12018 last_elt = elpart;
12021 /* Shift less than element size. */
12022 maxshift = innersize * 8;
12024 if (isleftshift)
12026 /* Left shift immediate value can be from 0 to <size>-1. */
12027 if (last_elt >= maxshift)
12028 return 0;
12030 else
12032 /* Right shift immediate value can be from 1 to <size>. */
12033 if (last_elt == 0 || last_elt > maxshift)
12034 return 0;
12037 if (elementwidth)
12038 *elementwidth = innersize * 8;
12040 if (modconst)
12041 *modconst = CONST_VECTOR_ELT (op, 0);
12043 return 1;
12046 /* Return a string suitable for output of Neon immediate logic operation
12047 MNEM. */
12049 char *
12050 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12051 int inverse, int quad)
12053 int width, is_valid;
12054 static char templ[40];
12056 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12058 gcc_assert (is_valid != 0);
12060 if (quad)
12061 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12062 else
12063 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12065 return templ;
12068 /* Return a string suitable for output of Neon immediate shift operation
12069 (VSHR or VSHL) MNEM. */
12071 char *
12072 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12073 machine_mode mode, int quad,
12074 bool isleftshift)
12076 int width, is_valid;
12077 static char templ[40];
12079 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12080 gcc_assert (is_valid != 0);
12082 if (quad)
12083 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12084 else
12085 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12087 return templ;
12090 /* Output a sequence of pairwise operations to implement a reduction.
12091 NOTE: We do "too much work" here, because pairwise operations work on two
12092 registers-worth of operands in one go. Unfortunately we can't exploit those
12093 extra calculations to do the full operation in fewer steps, I don't think.
12094 Although all vector elements of the result but the first are ignored, we
12095 actually calculate the same result in each of the elements. An alternative
12096 such as initially loading a vector with zero to use as each of the second
12097 operands would use up an additional register and take an extra instruction,
12098 for no particular gain. */
12100 void
12101 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12102 rtx (*reduc) (rtx, rtx, rtx))
12104 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
12105 rtx tmpsum = op1;
12107 for (i = parts / 2; i >= 1; i /= 2)
12109 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12110 emit_insn (reduc (dest, tmpsum, tmpsum));
12111 tmpsum = dest;
12115 /* If VALS is a vector constant that can be loaded into a register
12116 using VDUP, generate instructions to do so and return an RTX to
12117 assign to the register. Otherwise return NULL_RTX. */
12119 static rtx
12120 neon_vdup_constant (rtx vals)
12122 machine_mode mode = GET_MODE (vals);
12123 machine_mode inner_mode = GET_MODE_INNER (mode);
12124 rtx x;
12126 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12127 return NULL_RTX;
12129 if (!const_vec_duplicate_p (vals, &x))
12130 /* The elements are not all the same. We could handle repeating
12131 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12132 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12133 vdup.i16). */
12134 return NULL_RTX;
12136 /* We can load this constant by using VDUP and a constant in a
12137 single ARM register. This will be cheaper than a vector
12138 load. */
12140 x = copy_to_mode_reg (inner_mode, x);
12141 return gen_rtx_VEC_DUPLICATE (mode, x);
12144 /* Generate code to load VALS, which is a PARALLEL containing only
12145 constants (for vec_init) or CONST_VECTOR, efficiently into a
12146 register. Returns an RTX to copy into the register, or NULL_RTX
12147 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12150 neon_make_constant (rtx vals)
12152 machine_mode mode = GET_MODE (vals);
12153 rtx target;
12154 rtx const_vec = NULL_RTX;
12155 int n_elts = GET_MODE_NUNITS (mode);
12156 int n_const = 0;
12157 int i;
12159 if (GET_CODE (vals) == CONST_VECTOR)
12160 const_vec = vals;
12161 else if (GET_CODE (vals) == PARALLEL)
12163 /* A CONST_VECTOR must contain only CONST_INTs and
12164 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12165 Only store valid constants in a CONST_VECTOR. */
12166 for (i = 0; i < n_elts; ++i)
12168 rtx x = XVECEXP (vals, 0, i);
12169 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12170 n_const++;
12172 if (n_const == n_elts)
12173 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12175 else
12176 gcc_unreachable ();
12178 if (const_vec != NULL
12179 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12180 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12181 return const_vec;
12182 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12183 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12184 pipeline cycle; creating the constant takes one or two ARM
12185 pipeline cycles. */
12186 return target;
12187 else if (const_vec != NULL_RTX)
12188 /* Load from constant pool. On Cortex-A8 this takes two cycles
12189 (for either double or quad vectors). We can not take advantage
12190 of single-cycle VLD1 because we need a PC-relative addressing
12191 mode. */
12192 return const_vec;
12193 else
12194 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12195 We can not construct an initializer. */
12196 return NULL_RTX;
12199 /* Initialize vector TARGET to VALS. */
12201 void
12202 neon_expand_vector_init (rtx target, rtx vals)
12204 machine_mode mode = GET_MODE (target);
12205 machine_mode inner_mode = GET_MODE_INNER (mode);
12206 int n_elts = GET_MODE_NUNITS (mode);
12207 int n_var = 0, one_var = -1;
12208 bool all_same = true;
12209 rtx x, mem;
12210 int i;
12212 for (i = 0; i < n_elts; ++i)
12214 x = XVECEXP (vals, 0, i);
12215 if (!CONSTANT_P (x))
12216 ++n_var, one_var = i;
12218 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12219 all_same = false;
12222 if (n_var == 0)
12224 rtx constant = neon_make_constant (vals);
12225 if (constant != NULL_RTX)
12227 emit_move_insn (target, constant);
12228 return;
12232 /* Splat a single non-constant element if we can. */
12233 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12235 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12236 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
12237 return;
12240 /* One field is non-constant. Load constant then overwrite varying
12241 field. This is more efficient than using the stack. */
12242 if (n_var == 1)
12244 rtx copy = copy_rtx (vals);
12245 rtx index = GEN_INT (one_var);
12247 /* Load constant part of vector, substitute neighboring value for
12248 varying element. */
12249 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12250 neon_expand_vector_init (target, copy);
12252 /* Insert variable. */
12253 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12254 switch (mode)
12256 case E_V8QImode:
12257 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12258 break;
12259 case E_V16QImode:
12260 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12261 break;
12262 case E_V4HImode:
12263 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12264 break;
12265 case E_V8HImode:
12266 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12267 break;
12268 case E_V2SImode:
12269 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12270 break;
12271 case E_V4SImode:
12272 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12273 break;
12274 case E_V2SFmode:
12275 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12276 break;
12277 case E_V4SFmode:
12278 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12279 break;
12280 case E_V2DImode:
12281 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12282 break;
12283 default:
12284 gcc_unreachable ();
12286 return;
12289 /* Construct the vector in memory one field at a time
12290 and load the whole vector. */
12291 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12292 for (i = 0; i < n_elts; i++)
12293 emit_move_insn (adjust_address_nv (mem, inner_mode,
12294 i * GET_MODE_SIZE (inner_mode)),
12295 XVECEXP (vals, 0, i));
12296 emit_move_insn (target, mem);
12299 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12300 ERR if it doesn't. EXP indicates the source location, which includes the
12301 inlining history for intrinsics. */
12303 static void
12304 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12305 const_tree exp, const char *desc)
12307 HOST_WIDE_INT lane;
12309 gcc_assert (CONST_INT_P (operand));
12311 lane = INTVAL (operand);
12313 if (lane < low || lane >= high)
12315 if (exp)
12316 error ("%K%s %wd out of range %wd - %wd",
12317 exp, desc, lane, low, high - 1);
12318 else
12319 error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
12323 /* Bounds-check lanes. */
12325 void
12326 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12327 const_tree exp)
12329 bounds_check (operand, low, high, exp, "lane");
12332 /* Bounds-check constants. */
12334 void
12335 arm_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12337 bounds_check (operand, low, high, NULL_TREE, "constant");
12340 HOST_WIDE_INT
12341 neon_element_bits (machine_mode mode)
12343 return GET_MODE_UNIT_BITSIZE (mode);
12347 /* Predicates for `match_operand' and `match_operator'. */
12349 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12350 WB is true if full writeback address modes are allowed and is false
12351 if limited writeback address modes (POST_INC and PRE_DEC) are
12352 allowed. */
12355 arm_coproc_mem_operand (rtx op, bool wb)
12357 rtx ind;
12359 /* Reject eliminable registers. */
12360 if (! (reload_in_progress || reload_completed || lra_in_progress)
12361 && ( reg_mentioned_p (frame_pointer_rtx, op)
12362 || reg_mentioned_p (arg_pointer_rtx, op)
12363 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12364 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12365 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12366 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12367 return FALSE;
12369 /* Constants are converted into offsets from labels. */
12370 if (!MEM_P (op))
12371 return FALSE;
12373 ind = XEXP (op, 0);
12375 if (reload_completed
12376 && (GET_CODE (ind) == LABEL_REF
12377 || (GET_CODE (ind) == CONST
12378 && GET_CODE (XEXP (ind, 0)) == PLUS
12379 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12380 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12381 return TRUE;
12383 /* Match: (mem (reg)). */
12384 if (REG_P (ind))
12385 return arm_address_register_rtx_p (ind, 0);
12387 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12388 acceptable in any case (subject to verification by
12389 arm_address_register_rtx_p). We need WB to be true to accept
12390 PRE_INC and POST_DEC. */
12391 if (GET_CODE (ind) == POST_INC
12392 || GET_CODE (ind) == PRE_DEC
12393 || (wb
12394 && (GET_CODE (ind) == PRE_INC
12395 || GET_CODE (ind) == POST_DEC)))
12396 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12398 if (wb
12399 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12400 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12401 && GET_CODE (XEXP (ind, 1)) == PLUS
12402 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12403 ind = XEXP (ind, 1);
12405 /* Match:
12406 (plus (reg)
12407 (const)). */
12408 if (GET_CODE (ind) == PLUS
12409 && REG_P (XEXP (ind, 0))
12410 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12411 && CONST_INT_P (XEXP (ind, 1))
12412 && INTVAL (XEXP (ind, 1)) > -1024
12413 && INTVAL (XEXP (ind, 1)) < 1024
12414 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12415 return TRUE;
12417 return FALSE;
12420 /* Return TRUE if OP is a memory operand which we can load or store a vector
12421 to/from. TYPE is one of the following values:
12422 0 - Vector load/stor (vldr)
12423 1 - Core registers (ldm)
12424 2 - Element/structure loads (vld1)
12427 neon_vector_mem_operand (rtx op, int type, bool strict)
12429 rtx ind;
12431 /* Reject eliminable registers. */
12432 if (strict && ! (reload_in_progress || reload_completed)
12433 && (reg_mentioned_p (frame_pointer_rtx, op)
12434 || reg_mentioned_p (arg_pointer_rtx, op)
12435 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12436 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12437 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12438 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12439 return FALSE;
12441 /* Constants are converted into offsets from labels. */
12442 if (!MEM_P (op))
12443 return FALSE;
12445 ind = XEXP (op, 0);
12447 if (reload_completed
12448 && (GET_CODE (ind) == LABEL_REF
12449 || (GET_CODE (ind) == CONST
12450 && GET_CODE (XEXP (ind, 0)) == PLUS
12451 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12452 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12453 return TRUE;
12455 /* Match: (mem (reg)). */
12456 if (REG_P (ind))
12457 return arm_address_register_rtx_p (ind, 0);
12459 /* Allow post-increment with Neon registers. */
12460 if ((type != 1 && GET_CODE (ind) == POST_INC)
12461 || (type == 0 && GET_CODE (ind) == PRE_DEC))
12462 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12464 /* Allow post-increment by register for VLDn */
12465 if (type == 2 && GET_CODE (ind) == POST_MODIFY
12466 && GET_CODE (XEXP (ind, 1)) == PLUS
12467 && REG_P (XEXP (XEXP (ind, 1), 1)))
12468 return true;
12470 /* Match:
12471 (plus (reg)
12472 (const)). */
12473 if (type == 0
12474 && GET_CODE (ind) == PLUS
12475 && REG_P (XEXP (ind, 0))
12476 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12477 && CONST_INT_P (XEXP (ind, 1))
12478 && INTVAL (XEXP (ind, 1)) > -1024
12479 /* For quad modes, we restrict the constant offset to be slightly less
12480 than what the instruction format permits. We have no such constraint
12481 on double mode offsets. (This must match arm_legitimate_index_p.) */
12482 && (INTVAL (XEXP (ind, 1))
12483 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12484 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12485 return TRUE;
12487 return FALSE;
12490 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12491 type. */
12493 neon_struct_mem_operand (rtx op)
12495 rtx ind;
12497 /* Reject eliminable registers. */
12498 if (! (reload_in_progress || reload_completed)
12499 && ( reg_mentioned_p (frame_pointer_rtx, op)
12500 || reg_mentioned_p (arg_pointer_rtx, op)
12501 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12502 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12503 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12504 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12505 return FALSE;
12507 /* Constants are converted into offsets from labels. */
12508 if (!MEM_P (op))
12509 return FALSE;
12511 ind = XEXP (op, 0);
12513 if (reload_completed
12514 && (GET_CODE (ind) == LABEL_REF
12515 || (GET_CODE (ind) == CONST
12516 && GET_CODE (XEXP (ind, 0)) == PLUS
12517 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12518 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12519 return TRUE;
12521 /* Match: (mem (reg)). */
12522 if (REG_P (ind))
12523 return arm_address_register_rtx_p (ind, 0);
12525 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12526 if (GET_CODE (ind) == POST_INC
12527 || GET_CODE (ind) == PRE_DEC)
12528 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12530 return FALSE;
12533 /* Return true if X is a register that will be eliminated later on. */
12535 arm_eliminable_register (rtx x)
12537 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
12538 || REGNO (x) == ARG_POINTER_REGNUM
12539 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
12540 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
12543 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12544 coprocessor registers. Otherwise return NO_REGS. */
12546 enum reg_class
12547 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
12549 if (mode == HFmode)
12551 if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
12552 return GENERAL_REGS;
12553 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
12554 return NO_REGS;
12555 return GENERAL_REGS;
12558 /* The neon move patterns handle all legitimate vector and struct
12559 addresses. */
12560 if (TARGET_NEON
12561 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
12562 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
12563 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
12564 || VALID_NEON_STRUCT_MODE (mode)))
12565 return NO_REGS;
12567 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
12568 return NO_REGS;
12570 return GENERAL_REGS;
12573 /* Values which must be returned in the most-significant end of the return
12574 register. */
12576 static bool
12577 arm_return_in_msb (const_tree valtype)
12579 return (TARGET_AAPCS_BASED
12580 && BYTES_BIG_ENDIAN
12581 && (AGGREGATE_TYPE_P (valtype)
12582 || TREE_CODE (valtype) == COMPLEX_TYPE
12583 || FIXED_POINT_TYPE_P (valtype)));
12586 /* Return TRUE if X references a SYMBOL_REF. */
12588 symbol_mentioned_p (rtx x)
12590 const char * fmt;
12591 int i;
12593 if (GET_CODE (x) == SYMBOL_REF)
12594 return 1;
12596 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12597 are constant offsets, not symbols. */
12598 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12599 return 0;
12601 fmt = GET_RTX_FORMAT (GET_CODE (x));
12603 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12605 if (fmt[i] == 'E')
12607 int j;
12609 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12610 if (symbol_mentioned_p (XVECEXP (x, i, j)))
12611 return 1;
12613 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
12614 return 1;
12617 return 0;
12620 /* Return TRUE if X references a LABEL_REF. */
12622 label_mentioned_p (rtx x)
12624 const char * fmt;
12625 int i;
12627 if (GET_CODE (x) == LABEL_REF)
12628 return 1;
12630 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12631 instruction, but they are constant offsets, not symbols. */
12632 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12633 return 0;
12635 fmt = GET_RTX_FORMAT (GET_CODE (x));
12636 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12638 if (fmt[i] == 'E')
12640 int j;
12642 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12643 if (label_mentioned_p (XVECEXP (x, i, j)))
12644 return 1;
12646 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
12647 return 1;
12650 return 0;
12654 tls_mentioned_p (rtx x)
12656 switch (GET_CODE (x))
12658 case CONST:
12659 return tls_mentioned_p (XEXP (x, 0));
12661 case UNSPEC:
12662 if (XINT (x, 1) == UNSPEC_TLS)
12663 return 1;
12665 /* Fall through. */
12666 default:
12667 return 0;
12671 /* Must not copy any rtx that uses a pc-relative address.
12672 Also, disallow copying of load-exclusive instructions that
12673 may appear after splitting of compare-and-swap-style operations
12674 so as to prevent those loops from being transformed away from their
12675 canonical forms (see PR 69904). */
12677 static bool
12678 arm_cannot_copy_insn_p (rtx_insn *insn)
12680 /* The tls call insn cannot be copied, as it is paired with a data
12681 word. */
12682 if (recog_memoized (insn) == CODE_FOR_tlscall)
12683 return true;
12685 subrtx_iterator::array_type array;
12686 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
12688 const_rtx x = *iter;
12689 if (GET_CODE (x) == UNSPEC
12690 && (XINT (x, 1) == UNSPEC_PIC_BASE
12691 || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
12692 return true;
12695 rtx set = single_set (insn);
12696 if (set)
12698 rtx src = SET_SRC (set);
12699 if (GET_CODE (src) == ZERO_EXTEND)
12700 src = XEXP (src, 0);
12702 /* Catch the load-exclusive and load-acquire operations. */
12703 if (GET_CODE (src) == UNSPEC_VOLATILE
12704 && (XINT (src, 1) == VUNSPEC_LL
12705 || XINT (src, 1) == VUNSPEC_LAX))
12706 return true;
12708 return false;
12711 enum rtx_code
12712 minmax_code (rtx x)
12714 enum rtx_code code = GET_CODE (x);
12716 switch (code)
12718 case SMAX:
12719 return GE;
12720 case SMIN:
12721 return LE;
12722 case UMIN:
12723 return LEU;
12724 case UMAX:
12725 return GEU;
12726 default:
12727 gcc_unreachable ();
12731 /* Match pair of min/max operators that can be implemented via usat/ssat. */
12733 bool
12734 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
12735 int *mask, bool *signed_sat)
12737 /* The high bound must be a power of two minus one. */
12738 int log = exact_log2 (INTVAL (hi_bound) + 1);
12739 if (log == -1)
12740 return false;
12742 /* The low bound is either zero (for usat) or one less than the
12743 negation of the high bound (for ssat). */
12744 if (INTVAL (lo_bound) == 0)
12746 if (mask)
12747 *mask = log;
12748 if (signed_sat)
12749 *signed_sat = false;
12751 return true;
12754 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
12756 if (mask)
12757 *mask = log + 1;
12758 if (signed_sat)
12759 *signed_sat = true;
12761 return true;
12764 return false;
12767 /* Return 1 if memory locations are adjacent. */
12769 adjacent_mem_locations (rtx a, rtx b)
12771 /* We don't guarantee to preserve the order of these memory refs. */
12772 if (volatile_refs_p (a) || volatile_refs_p (b))
12773 return 0;
12775 if ((REG_P (XEXP (a, 0))
12776 || (GET_CODE (XEXP (a, 0)) == PLUS
12777 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
12778 && (REG_P (XEXP (b, 0))
12779 || (GET_CODE (XEXP (b, 0)) == PLUS
12780 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
12782 HOST_WIDE_INT val0 = 0, val1 = 0;
12783 rtx reg0, reg1;
12784 int val_diff;
12786 if (GET_CODE (XEXP (a, 0)) == PLUS)
12788 reg0 = XEXP (XEXP (a, 0), 0);
12789 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
12791 else
12792 reg0 = XEXP (a, 0);
12794 if (GET_CODE (XEXP (b, 0)) == PLUS)
12796 reg1 = XEXP (XEXP (b, 0), 0);
12797 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
12799 else
12800 reg1 = XEXP (b, 0);
12802 /* Don't accept any offset that will require multiple
12803 instructions to handle, since this would cause the
12804 arith_adjacentmem pattern to output an overlong sequence. */
12805 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
12806 return 0;
12808 /* Don't allow an eliminable register: register elimination can make
12809 the offset too large. */
12810 if (arm_eliminable_register (reg0))
12811 return 0;
12813 val_diff = val1 - val0;
12815 if (arm_ld_sched)
12817 /* If the target has load delay slots, then there's no benefit
12818 to using an ldm instruction unless the offset is zero and
12819 we are optimizing for size. */
12820 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
12821 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
12822 && (val_diff == 4 || val_diff == -4));
12825 return ((REGNO (reg0) == REGNO (reg1))
12826 && (val_diff == 4 || val_diff == -4));
12829 return 0;
12832 /* Return true if OP is a valid load or store multiple operation. LOAD is true
12833 for load operations, false for store operations. CONSECUTIVE is true
12834 if the register numbers in the operation must be consecutive in the register
12835 bank. RETURN_PC is true if value is to be loaded in PC.
12836 The pattern we are trying to match for load is:
12837 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
12838 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
12841 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
12843 where
12844 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
12845 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
12846 3. If consecutive is TRUE, then for kth register being loaded,
12847 REGNO (R_dk) = REGNO (R_d0) + k.
12848 The pattern for store is similar. */
12849 bool
12850 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
12851 bool consecutive, bool return_pc)
12853 HOST_WIDE_INT count = XVECLEN (op, 0);
12854 rtx reg, mem, addr;
12855 unsigned regno;
12856 unsigned first_regno;
12857 HOST_WIDE_INT i = 1, base = 0, offset = 0;
12858 rtx elt;
12859 bool addr_reg_in_reglist = false;
12860 bool update = false;
12861 int reg_increment;
12862 int offset_adj;
12863 int regs_per_val;
12865 /* If not in SImode, then registers must be consecutive
12866 (e.g., VLDM instructions for DFmode). */
12867 gcc_assert ((mode == SImode) || consecutive);
12868 /* Setting return_pc for stores is illegal. */
12869 gcc_assert (!return_pc || load);
12871 /* Set up the increments and the regs per val based on the mode. */
12872 reg_increment = GET_MODE_SIZE (mode);
12873 regs_per_val = reg_increment / 4;
12874 offset_adj = return_pc ? 1 : 0;
12876 if (count <= 1
12877 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
12878 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
12879 return false;
12881 /* Check if this is a write-back. */
12882 elt = XVECEXP (op, 0, offset_adj);
12883 if (GET_CODE (SET_SRC (elt)) == PLUS)
12885 i++;
12886 base = 1;
12887 update = true;
12889 /* The offset adjustment must be the number of registers being
12890 popped times the size of a single register. */
12891 if (!REG_P (SET_DEST (elt))
12892 || !REG_P (XEXP (SET_SRC (elt), 0))
12893 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
12894 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
12895 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
12896 ((count - 1 - offset_adj) * reg_increment))
12897 return false;
12900 i = i + offset_adj;
12901 base = base + offset_adj;
12902 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
12903 success depends on the type: VLDM can do just one reg,
12904 LDM must do at least two. */
12905 if ((count <= i) && (mode == SImode))
12906 return false;
12908 elt = XVECEXP (op, 0, i - 1);
12909 if (GET_CODE (elt) != SET)
12910 return false;
12912 if (load)
12914 reg = SET_DEST (elt);
12915 mem = SET_SRC (elt);
12917 else
12919 reg = SET_SRC (elt);
12920 mem = SET_DEST (elt);
12923 if (!REG_P (reg) || !MEM_P (mem))
12924 return false;
12926 regno = REGNO (reg);
12927 first_regno = regno;
12928 addr = XEXP (mem, 0);
12929 if (GET_CODE (addr) == PLUS)
12931 if (!CONST_INT_P (XEXP (addr, 1)))
12932 return false;
12934 offset = INTVAL (XEXP (addr, 1));
12935 addr = XEXP (addr, 0);
12938 if (!REG_P (addr))
12939 return false;
12941 /* Don't allow SP to be loaded unless it is also the base register. It
12942 guarantees that SP is reset correctly when an LDM instruction
12943 is interrupted. Otherwise, we might end up with a corrupt stack. */
12944 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
12945 return false;
12947 for (; i < count; i++)
12949 elt = XVECEXP (op, 0, i);
12950 if (GET_CODE (elt) != SET)
12951 return false;
12953 if (load)
12955 reg = SET_DEST (elt);
12956 mem = SET_SRC (elt);
12958 else
12960 reg = SET_SRC (elt);
12961 mem = SET_DEST (elt);
12964 if (!REG_P (reg)
12965 || GET_MODE (reg) != mode
12966 || REGNO (reg) <= regno
12967 || (consecutive
12968 && (REGNO (reg) !=
12969 (unsigned int) (first_regno + regs_per_val * (i - base))))
12970 /* Don't allow SP to be loaded unless it is also the base register. It
12971 guarantees that SP is reset correctly when an LDM instruction
12972 is interrupted. Otherwise, we might end up with a corrupt stack. */
12973 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
12974 || !MEM_P (mem)
12975 || GET_MODE (mem) != mode
12976 || ((GET_CODE (XEXP (mem, 0)) != PLUS
12977 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
12978 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
12979 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
12980 offset + (i - base) * reg_increment))
12981 && (!REG_P (XEXP (mem, 0))
12982 || offset + (i - base) * reg_increment != 0)))
12983 return false;
12985 regno = REGNO (reg);
12986 if (regno == REGNO (addr))
12987 addr_reg_in_reglist = true;
12990 if (load)
12992 if (update && addr_reg_in_reglist)
12993 return false;
12995 /* For Thumb-1, address register is always modified - either by write-back
12996 or by explicit load. If the pattern does not describe an update,
12997 then the address register must be in the list of loaded registers. */
12998 if (TARGET_THUMB1)
12999 return update || addr_reg_in_reglist;
13002 return true;
13005 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13006 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13007 instruction. ADD_OFFSET is nonzero if the base address register needs
13008 to be modified with an add instruction before we can use it. */
13010 static bool
13011 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13012 int nops, HOST_WIDE_INT add_offset)
13014 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13015 if the offset isn't small enough. The reason 2 ldrs are faster
13016 is because these ARMs are able to do more than one cache access
13017 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13018 whilst the ARM8 has a double bandwidth cache. This means that
13019 these cores can do both an instruction fetch and a data fetch in
13020 a single cycle, so the trick of calculating the address into a
13021 scratch register (one of the result regs) and then doing a load
13022 multiple actually becomes slower (and no smaller in code size).
13023 That is the transformation
13025 ldr rd1, [rbase + offset]
13026 ldr rd2, [rbase + offset + 4]
13030 add rd1, rbase, offset
13031 ldmia rd1, {rd1, rd2}
13033 produces worse code -- '3 cycles + any stalls on rd2' instead of
13034 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13035 access per cycle, the first sequence could never complete in less
13036 than 6 cycles, whereas the ldm sequence would only take 5 and
13037 would make better use of sequential accesses if not hitting the
13038 cache.
13040 We cheat here and test 'arm_ld_sched' which we currently know to
13041 only be true for the ARM8, ARM9 and StrongARM. If this ever
13042 changes, then the test below needs to be reworked. */
13043 if (nops == 2 && arm_ld_sched && add_offset != 0)
13044 return false;
13046 /* XScale has load-store double instructions, but they have stricter
13047 alignment requirements than load-store multiple, so we cannot
13048 use them.
13050 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13051 the pipeline until completion.
13053 NREGS CYCLES
13059 An ldr instruction takes 1-3 cycles, but does not block the
13060 pipeline.
13062 NREGS CYCLES
13063 1 1-3
13064 2 2-6
13065 3 3-9
13066 4 4-12
13068 Best case ldr will always win. However, the more ldr instructions
13069 we issue, the less likely we are to be able to schedule them well.
13070 Using ldr instructions also increases code size.
13072 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13073 for counts of 3 or 4 regs. */
13074 if (nops <= 2 && arm_tune_xscale && !optimize_size)
13075 return false;
13076 return true;
13079 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13080 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13081 an array ORDER which describes the sequence to use when accessing the
13082 offsets that produces an ascending order. In this sequence, each
13083 offset must be larger by exactly 4 than the previous one. ORDER[0]
13084 must have been filled in with the lowest offset by the caller.
13085 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13086 we use to verify that ORDER produces an ascending order of registers.
13087 Return true if it was possible to construct such an order, false if
13088 not. */
13090 static bool
13091 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13092 int *unsorted_regs)
13094 int i;
13095 for (i = 1; i < nops; i++)
13097 int j;
13099 order[i] = order[i - 1];
13100 for (j = 0; j < nops; j++)
13101 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13103 /* We must find exactly one offset that is higher than the
13104 previous one by 4. */
13105 if (order[i] != order[i - 1])
13106 return false;
13107 order[i] = j;
13109 if (order[i] == order[i - 1])
13110 return false;
13111 /* The register numbers must be ascending. */
13112 if (unsorted_regs != NULL
13113 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13114 return false;
13116 return true;
13119 /* Used to determine in a peephole whether a sequence of load
13120 instructions can be changed into a load-multiple instruction.
13121 NOPS is the number of separate load instructions we are examining. The
13122 first NOPS entries in OPERANDS are the destination registers, the
13123 next NOPS entries are memory operands. If this function is
13124 successful, *BASE is set to the common base register of the memory
13125 accesses; *LOAD_OFFSET is set to the first memory location's offset
13126 from that base register.
13127 REGS is an array filled in with the destination register numbers.
13128 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13129 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13130 the sequence of registers in REGS matches the loads from ascending memory
13131 locations, and the function verifies that the register numbers are
13132 themselves ascending. If CHECK_REGS is false, the register numbers
13133 are stored in the order they are found in the operands. */
13134 static int
13135 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13136 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13138 int unsorted_regs[MAX_LDM_STM_OPS];
13139 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13140 int order[MAX_LDM_STM_OPS];
13141 rtx base_reg_rtx = NULL;
13142 int base_reg = -1;
13143 int i, ldm_case;
13145 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13146 easily extended if required. */
13147 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13149 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13151 /* Loop over the operands and check that the memory references are
13152 suitable (i.e. immediate offsets from the same base register). At
13153 the same time, extract the target register, and the memory
13154 offsets. */
13155 for (i = 0; i < nops; i++)
13157 rtx reg;
13158 rtx offset;
13160 /* Convert a subreg of a mem into the mem itself. */
13161 if (GET_CODE (operands[nops + i]) == SUBREG)
13162 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13164 gcc_assert (MEM_P (operands[nops + i]));
13166 /* Don't reorder volatile memory references; it doesn't seem worth
13167 looking for the case where the order is ok anyway. */
13168 if (MEM_VOLATILE_P (operands[nops + i]))
13169 return 0;
13171 offset = const0_rtx;
13173 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13174 || (GET_CODE (reg) == SUBREG
13175 && REG_P (reg = SUBREG_REG (reg))))
13176 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13177 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13178 || (GET_CODE (reg) == SUBREG
13179 && REG_P (reg = SUBREG_REG (reg))))
13180 && (CONST_INT_P (offset
13181 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13183 if (i == 0)
13185 base_reg = REGNO (reg);
13186 base_reg_rtx = reg;
13187 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13188 return 0;
13190 else if (base_reg != (int) REGNO (reg))
13191 /* Not addressed from the same base register. */
13192 return 0;
13194 unsorted_regs[i] = (REG_P (operands[i])
13195 ? REGNO (operands[i])
13196 : REGNO (SUBREG_REG (operands[i])));
13198 /* If it isn't an integer register, or if it overwrites the
13199 base register but isn't the last insn in the list, then
13200 we can't do this. */
13201 if (unsorted_regs[i] < 0
13202 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13203 || unsorted_regs[i] > 14
13204 || (i != nops - 1 && unsorted_regs[i] == base_reg))
13205 return 0;
13207 /* Don't allow SP to be loaded unless it is also the base
13208 register. It guarantees that SP is reset correctly when
13209 an LDM instruction is interrupted. Otherwise, we might
13210 end up with a corrupt stack. */
13211 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13212 return 0;
13214 unsorted_offsets[i] = INTVAL (offset);
13215 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13216 order[0] = i;
13218 else
13219 /* Not a suitable memory address. */
13220 return 0;
13223 /* All the useful information has now been extracted from the
13224 operands into unsorted_regs and unsorted_offsets; additionally,
13225 order[0] has been set to the lowest offset in the list. Sort
13226 the offsets into order, verifying that they are adjacent, and
13227 check that the register numbers are ascending. */
13228 if (!compute_offset_order (nops, unsorted_offsets, order,
13229 check_regs ? unsorted_regs : NULL))
13230 return 0;
13232 if (saved_order)
13233 memcpy (saved_order, order, sizeof order);
13235 if (base)
13237 *base = base_reg;
13239 for (i = 0; i < nops; i++)
13240 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13242 *load_offset = unsorted_offsets[order[0]];
13245 if (TARGET_THUMB1
13246 && !peep2_reg_dead_p (nops, base_reg_rtx))
13247 return 0;
13249 if (unsorted_offsets[order[0]] == 0)
13250 ldm_case = 1; /* ldmia */
13251 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13252 ldm_case = 2; /* ldmib */
13253 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13254 ldm_case = 3; /* ldmda */
13255 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13256 ldm_case = 4; /* ldmdb */
13257 else if (const_ok_for_arm (unsorted_offsets[order[0]])
13258 || const_ok_for_arm (-unsorted_offsets[order[0]]))
13259 ldm_case = 5;
13260 else
13261 return 0;
13263 if (!multiple_operation_profitable_p (false, nops,
13264 ldm_case == 5
13265 ? unsorted_offsets[order[0]] : 0))
13266 return 0;
13268 return ldm_case;
13271 /* Used to determine in a peephole whether a sequence of store instructions can
13272 be changed into a store-multiple instruction.
13273 NOPS is the number of separate store instructions we are examining.
13274 NOPS_TOTAL is the total number of instructions recognized by the peephole
13275 pattern.
13276 The first NOPS entries in OPERANDS are the source registers, the next
13277 NOPS entries are memory operands. If this function is successful, *BASE is
13278 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13279 to the first memory location's offset from that base register. REGS is an
13280 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13281 likewise filled with the corresponding rtx's.
13282 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13283 numbers to an ascending order of stores.
13284 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13285 from ascending memory locations, and the function verifies that the register
13286 numbers are themselves ascending. If CHECK_REGS is false, the register
13287 numbers are stored in the order they are found in the operands. */
13288 static int
13289 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13290 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13291 HOST_WIDE_INT *load_offset, bool check_regs)
13293 int unsorted_regs[MAX_LDM_STM_OPS];
13294 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13295 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13296 int order[MAX_LDM_STM_OPS];
13297 int base_reg = -1;
13298 rtx base_reg_rtx = NULL;
13299 int i, stm_case;
13301 /* Write back of base register is currently only supported for Thumb 1. */
13302 int base_writeback = TARGET_THUMB1;
13304 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13305 easily extended if required. */
13306 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13308 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13310 /* Loop over the operands and check that the memory references are
13311 suitable (i.e. immediate offsets from the same base register). At
13312 the same time, extract the target register, and the memory
13313 offsets. */
13314 for (i = 0; i < nops; i++)
13316 rtx reg;
13317 rtx offset;
13319 /* Convert a subreg of a mem into the mem itself. */
13320 if (GET_CODE (operands[nops + i]) == SUBREG)
13321 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13323 gcc_assert (MEM_P (operands[nops + i]));
13325 /* Don't reorder volatile memory references; it doesn't seem worth
13326 looking for the case where the order is ok anyway. */
13327 if (MEM_VOLATILE_P (operands[nops + i]))
13328 return 0;
13330 offset = const0_rtx;
13332 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13333 || (GET_CODE (reg) == SUBREG
13334 && REG_P (reg = SUBREG_REG (reg))))
13335 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13336 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13337 || (GET_CODE (reg) == SUBREG
13338 && REG_P (reg = SUBREG_REG (reg))))
13339 && (CONST_INT_P (offset
13340 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13342 unsorted_reg_rtxs[i] = (REG_P (operands[i])
13343 ? operands[i] : SUBREG_REG (operands[i]));
13344 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13346 if (i == 0)
13348 base_reg = REGNO (reg);
13349 base_reg_rtx = reg;
13350 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13351 return 0;
13353 else if (base_reg != (int) REGNO (reg))
13354 /* Not addressed from the same base register. */
13355 return 0;
13357 /* If it isn't an integer register, then we can't do this. */
13358 if (unsorted_regs[i] < 0
13359 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13360 /* The effects are unpredictable if the base register is
13361 both updated and stored. */
13362 || (base_writeback && unsorted_regs[i] == base_reg)
13363 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13364 || unsorted_regs[i] > 14)
13365 return 0;
13367 unsorted_offsets[i] = INTVAL (offset);
13368 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13369 order[0] = i;
13371 else
13372 /* Not a suitable memory address. */
13373 return 0;
13376 /* All the useful information has now been extracted from the
13377 operands into unsorted_regs and unsorted_offsets; additionally,
13378 order[0] has been set to the lowest offset in the list. Sort
13379 the offsets into order, verifying that they are adjacent, and
13380 check that the register numbers are ascending. */
13381 if (!compute_offset_order (nops, unsorted_offsets, order,
13382 check_regs ? unsorted_regs : NULL))
13383 return 0;
13385 if (saved_order)
13386 memcpy (saved_order, order, sizeof order);
13388 if (base)
13390 *base = base_reg;
13392 for (i = 0; i < nops; i++)
13394 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13395 if (reg_rtxs)
13396 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13399 *load_offset = unsorted_offsets[order[0]];
13402 if (TARGET_THUMB1
13403 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13404 return 0;
13406 if (unsorted_offsets[order[0]] == 0)
13407 stm_case = 1; /* stmia */
13408 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13409 stm_case = 2; /* stmib */
13410 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13411 stm_case = 3; /* stmda */
13412 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13413 stm_case = 4; /* stmdb */
13414 else
13415 return 0;
13417 if (!multiple_operation_profitable_p (false, nops, 0))
13418 return 0;
13420 return stm_case;
13423 /* Routines for use in generating RTL. */
13425 /* Generate a load-multiple instruction. COUNT is the number of loads in
13426 the instruction; REGS and MEMS are arrays containing the operands.
13427 BASEREG is the base register to be used in addressing the memory operands.
13428 WBACK_OFFSET is nonzero if the instruction should update the base
13429 register. */
13431 static rtx
13432 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13433 HOST_WIDE_INT wback_offset)
13435 int i = 0, j;
13436 rtx result;
13438 if (!multiple_operation_profitable_p (false, count, 0))
13440 rtx seq;
13442 start_sequence ();
13444 for (i = 0; i < count; i++)
13445 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13447 if (wback_offset != 0)
13448 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13450 seq = get_insns ();
13451 end_sequence ();
13453 return seq;
13456 result = gen_rtx_PARALLEL (VOIDmode,
13457 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13458 if (wback_offset != 0)
13460 XVECEXP (result, 0, 0)
13461 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13462 i = 1;
13463 count++;
13466 for (j = 0; i < count; i++, j++)
13467 XVECEXP (result, 0, i)
13468 = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
13470 return result;
13473 /* Generate a store-multiple instruction. COUNT is the number of stores in
13474 the instruction; REGS and MEMS are arrays containing the operands.
13475 BASEREG is the base register to be used in addressing the memory operands.
13476 WBACK_OFFSET is nonzero if the instruction should update the base
13477 register. */
13479 static rtx
13480 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13481 HOST_WIDE_INT wback_offset)
13483 int i = 0, j;
13484 rtx result;
13486 if (GET_CODE (basereg) == PLUS)
13487 basereg = XEXP (basereg, 0);
13489 if (!multiple_operation_profitable_p (false, count, 0))
13491 rtx seq;
13493 start_sequence ();
13495 for (i = 0; i < count; i++)
13496 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13498 if (wback_offset != 0)
13499 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13501 seq = get_insns ();
13502 end_sequence ();
13504 return seq;
13507 result = gen_rtx_PARALLEL (VOIDmode,
13508 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13509 if (wback_offset != 0)
13511 XVECEXP (result, 0, 0)
13512 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13513 i = 1;
13514 count++;
13517 for (j = 0; i < count; i++, j++)
13518 XVECEXP (result, 0, i)
13519 = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
13521 return result;
13524 /* Generate either a load-multiple or a store-multiple instruction. This
13525 function can be used in situations where we can start with a single MEM
13526 rtx and adjust its address upwards.
13527 COUNT is the number of operations in the instruction, not counting a
13528 possible update of the base register. REGS is an array containing the
13529 register operands.
13530 BASEREG is the base register to be used in addressing the memory operands,
13531 which are constructed from BASEMEM.
13532 WRITE_BACK specifies whether the generated instruction should include an
13533 update of the base register.
13534 OFFSETP is used to pass an offset to and from this function; this offset
13535 is not used when constructing the address (instead BASEMEM should have an
13536 appropriate offset in its address), it is used only for setting
13537 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13539 static rtx
13540 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
13541 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
13543 rtx mems[MAX_LDM_STM_OPS];
13544 HOST_WIDE_INT offset = *offsetp;
13545 int i;
13547 gcc_assert (count <= MAX_LDM_STM_OPS);
13549 if (GET_CODE (basereg) == PLUS)
13550 basereg = XEXP (basereg, 0);
13552 for (i = 0; i < count; i++)
13554 rtx addr = plus_constant (Pmode, basereg, i * 4);
13555 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
13556 offset += 4;
13559 if (write_back)
13560 *offsetp = offset;
13562 if (is_load)
13563 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
13564 write_back ? 4 * count : 0);
13565 else
13566 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
13567 write_back ? 4 * count : 0);
13571 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
13572 rtx basemem, HOST_WIDE_INT *offsetp)
13574 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
13575 offsetp);
13579 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
13580 rtx basemem, HOST_WIDE_INT *offsetp)
13582 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
13583 offsetp);
13586 /* Called from a peephole2 expander to turn a sequence of loads into an
13587 LDM instruction. OPERANDS are the operands found by the peephole matcher;
13588 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
13589 is true if we can reorder the registers because they are used commutatively
13590 subsequently.
13591 Returns true iff we could generate a new instruction. */
13593 bool
13594 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
13596 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13597 rtx mems[MAX_LDM_STM_OPS];
13598 int i, j, base_reg;
13599 rtx base_reg_rtx;
13600 HOST_WIDE_INT offset;
13601 int write_back = FALSE;
13602 int ldm_case;
13603 rtx addr;
13605 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
13606 &base_reg, &offset, !sort_regs);
13608 if (ldm_case == 0)
13609 return false;
13611 if (sort_regs)
13612 for (i = 0; i < nops - 1; i++)
13613 for (j = i + 1; j < nops; j++)
13614 if (regs[i] > regs[j])
13616 int t = regs[i];
13617 regs[i] = regs[j];
13618 regs[j] = t;
13620 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13622 if (TARGET_THUMB1)
13624 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
13625 gcc_assert (ldm_case == 1 || ldm_case == 5);
13626 write_back = TRUE;
13629 if (ldm_case == 5)
13631 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
13632 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
13633 offset = 0;
13634 if (!TARGET_THUMB1)
13635 base_reg_rtx = newbase;
13638 for (i = 0; i < nops; i++)
13640 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13641 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13642 SImode, addr, 0);
13644 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
13645 write_back ? offset + i * 4 : 0));
13646 return true;
13649 /* Called from a peephole2 expander to turn a sequence of stores into an
13650 STM instruction. OPERANDS are the operands found by the peephole matcher;
13651 NOPS indicates how many separate stores we are trying to combine.
13652 Returns true iff we could generate a new instruction. */
13654 bool
13655 gen_stm_seq (rtx *operands, int nops)
13657 int i;
13658 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13659 rtx mems[MAX_LDM_STM_OPS];
13660 int base_reg;
13661 rtx base_reg_rtx;
13662 HOST_WIDE_INT offset;
13663 int write_back = FALSE;
13664 int stm_case;
13665 rtx addr;
13666 bool base_reg_dies;
13668 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
13669 mem_order, &base_reg, &offset, true);
13671 if (stm_case == 0)
13672 return false;
13674 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13676 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
13677 if (TARGET_THUMB1)
13679 gcc_assert (base_reg_dies);
13680 write_back = TRUE;
13683 if (stm_case == 5)
13685 gcc_assert (base_reg_dies);
13686 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13687 offset = 0;
13690 addr = plus_constant (Pmode, base_reg_rtx, offset);
13692 for (i = 0; i < nops; i++)
13694 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13695 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13696 SImode, addr, 0);
13698 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
13699 write_back ? offset + i * 4 : 0));
13700 return true;
13703 /* Called from a peephole2 expander to turn a sequence of stores that are
13704 preceded by constant loads into an STM instruction. OPERANDS are the
13705 operands found by the peephole matcher; NOPS indicates how many
13706 separate stores we are trying to combine; there are 2 * NOPS
13707 instructions in the peephole.
13708 Returns true iff we could generate a new instruction. */
13710 bool
13711 gen_const_stm_seq (rtx *operands, int nops)
13713 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
13714 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13715 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
13716 rtx mems[MAX_LDM_STM_OPS];
13717 int base_reg;
13718 rtx base_reg_rtx;
13719 HOST_WIDE_INT offset;
13720 int write_back = FALSE;
13721 int stm_case;
13722 rtx addr;
13723 bool base_reg_dies;
13724 int i, j;
13725 HARD_REG_SET allocated;
13727 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
13728 mem_order, &base_reg, &offset, false);
13730 if (stm_case == 0)
13731 return false;
13733 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
13735 /* If the same register is used more than once, try to find a free
13736 register. */
13737 CLEAR_HARD_REG_SET (allocated);
13738 for (i = 0; i < nops; i++)
13740 for (j = i + 1; j < nops; j++)
13741 if (regs[i] == regs[j])
13743 rtx t = peep2_find_free_register (0, nops * 2,
13744 TARGET_THUMB1 ? "l" : "r",
13745 SImode, &allocated);
13746 if (t == NULL_RTX)
13747 return false;
13748 reg_rtxs[i] = t;
13749 regs[i] = REGNO (t);
13753 /* Compute an ordering that maps the register numbers to an ascending
13754 sequence. */
13755 reg_order[0] = 0;
13756 for (i = 0; i < nops; i++)
13757 if (regs[i] < regs[reg_order[0]])
13758 reg_order[0] = i;
13760 for (i = 1; i < nops; i++)
13762 int this_order = reg_order[i - 1];
13763 for (j = 0; j < nops; j++)
13764 if (regs[j] > regs[reg_order[i - 1]]
13765 && (this_order == reg_order[i - 1]
13766 || regs[j] < regs[this_order]))
13767 this_order = j;
13768 reg_order[i] = this_order;
13771 /* Ensure that registers that must be live after the instruction end
13772 up with the correct value. */
13773 for (i = 0; i < nops; i++)
13775 int this_order = reg_order[i];
13776 if ((this_order != mem_order[i]
13777 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
13778 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
13779 return false;
13782 /* Load the constants. */
13783 for (i = 0; i < nops; i++)
13785 rtx op = operands[2 * nops + mem_order[i]];
13786 sorted_regs[i] = regs[reg_order[i]];
13787 emit_move_insn (reg_rtxs[reg_order[i]], op);
13790 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13792 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
13793 if (TARGET_THUMB1)
13795 gcc_assert (base_reg_dies);
13796 write_back = TRUE;
13799 if (stm_case == 5)
13801 gcc_assert (base_reg_dies);
13802 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13803 offset = 0;
13806 addr = plus_constant (Pmode, base_reg_rtx, offset);
13808 for (i = 0; i < nops; i++)
13810 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13811 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13812 SImode, addr, 0);
13814 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
13815 write_back ? offset + i * 4 : 0));
13816 return true;
13819 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
13820 unaligned copies on processors which support unaligned semantics for those
13821 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
13822 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
13823 An interleave factor of 1 (the minimum) will perform no interleaving.
13824 Load/store multiple are used for aligned addresses where possible. */
13826 static void
13827 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
13828 HOST_WIDE_INT length,
13829 unsigned int interleave_factor)
13831 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
13832 int *regnos = XALLOCAVEC (int, interleave_factor);
13833 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
13834 HOST_WIDE_INT i, j;
13835 HOST_WIDE_INT remaining = length, words;
13836 rtx halfword_tmp = NULL, byte_tmp = NULL;
13837 rtx dst, src;
13838 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
13839 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
13840 HOST_WIDE_INT srcoffset, dstoffset;
13841 HOST_WIDE_INT src_autoinc, dst_autoinc;
13842 rtx mem, addr;
13844 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
13846 /* Use hard registers if we have aligned source or destination so we can use
13847 load/store multiple with contiguous registers. */
13848 if (dst_aligned || src_aligned)
13849 for (i = 0; i < interleave_factor; i++)
13850 regs[i] = gen_rtx_REG (SImode, i);
13851 else
13852 for (i = 0; i < interleave_factor; i++)
13853 regs[i] = gen_reg_rtx (SImode);
13855 dst = copy_addr_to_reg (XEXP (dstbase, 0));
13856 src = copy_addr_to_reg (XEXP (srcbase, 0));
13858 srcoffset = dstoffset = 0;
13860 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
13861 For copying the last bytes we want to subtract this offset again. */
13862 src_autoinc = dst_autoinc = 0;
13864 for (i = 0; i < interleave_factor; i++)
13865 regnos[i] = i;
13867 /* Copy BLOCK_SIZE_BYTES chunks. */
13869 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
13871 /* Load words. */
13872 if (src_aligned && interleave_factor > 1)
13874 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
13875 TRUE, srcbase, &srcoffset));
13876 src_autoinc += UNITS_PER_WORD * interleave_factor;
13878 else
13880 for (j = 0; j < interleave_factor; j++)
13882 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
13883 - src_autoinc));
13884 mem = adjust_automodify_address (srcbase, SImode, addr,
13885 srcoffset + j * UNITS_PER_WORD);
13886 emit_insn (gen_unaligned_loadsi (regs[j], mem));
13888 srcoffset += block_size_bytes;
13891 /* Store words. */
13892 if (dst_aligned && interleave_factor > 1)
13894 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
13895 TRUE, dstbase, &dstoffset));
13896 dst_autoinc += UNITS_PER_WORD * interleave_factor;
13898 else
13900 for (j = 0; j < interleave_factor; j++)
13902 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
13903 - dst_autoinc));
13904 mem = adjust_automodify_address (dstbase, SImode, addr,
13905 dstoffset + j * UNITS_PER_WORD);
13906 emit_insn (gen_unaligned_storesi (mem, regs[j]));
13908 dstoffset += block_size_bytes;
13911 remaining -= block_size_bytes;
13914 /* Copy any whole words left (note these aren't interleaved with any
13915 subsequent halfword/byte load/stores in the interests of simplicity). */
13917 words = remaining / UNITS_PER_WORD;
13919 gcc_assert (words < interleave_factor);
13921 if (src_aligned && words > 1)
13923 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
13924 &srcoffset));
13925 src_autoinc += UNITS_PER_WORD * words;
13927 else
13929 for (j = 0; j < words; j++)
13931 addr = plus_constant (Pmode, src,
13932 srcoffset + j * UNITS_PER_WORD - src_autoinc);
13933 mem = adjust_automodify_address (srcbase, SImode, addr,
13934 srcoffset + j * UNITS_PER_WORD);
13935 if (src_aligned)
13936 emit_move_insn (regs[j], mem);
13937 else
13938 emit_insn (gen_unaligned_loadsi (regs[j], mem));
13940 srcoffset += words * UNITS_PER_WORD;
13943 if (dst_aligned && words > 1)
13945 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
13946 &dstoffset));
13947 dst_autoinc += words * UNITS_PER_WORD;
13949 else
13951 for (j = 0; j < words; j++)
13953 addr = plus_constant (Pmode, dst,
13954 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
13955 mem = adjust_automodify_address (dstbase, SImode, addr,
13956 dstoffset + j * UNITS_PER_WORD);
13957 if (dst_aligned)
13958 emit_move_insn (mem, regs[j]);
13959 else
13960 emit_insn (gen_unaligned_storesi (mem, regs[j]));
13962 dstoffset += words * UNITS_PER_WORD;
13965 remaining -= words * UNITS_PER_WORD;
13967 gcc_assert (remaining < 4);
13969 /* Copy a halfword if necessary. */
13971 if (remaining >= 2)
13973 halfword_tmp = gen_reg_rtx (SImode);
13975 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
13976 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
13977 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
13979 /* Either write out immediately, or delay until we've loaded the last
13980 byte, depending on interleave factor. */
13981 if (interleave_factor == 1)
13983 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
13984 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
13985 emit_insn (gen_unaligned_storehi (mem,
13986 gen_lowpart (HImode, halfword_tmp)));
13987 halfword_tmp = NULL;
13988 dstoffset += 2;
13991 remaining -= 2;
13992 srcoffset += 2;
13995 gcc_assert (remaining < 2);
13997 /* Copy last byte. */
13999 if ((remaining & 1) != 0)
14001 byte_tmp = gen_reg_rtx (SImode);
14003 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14004 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14005 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14007 if (interleave_factor == 1)
14009 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14010 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14011 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14012 byte_tmp = NULL;
14013 dstoffset++;
14016 remaining--;
14017 srcoffset++;
14020 /* Store last halfword if we haven't done so already. */
14022 if (halfword_tmp)
14024 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14025 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14026 emit_insn (gen_unaligned_storehi (mem,
14027 gen_lowpart (HImode, halfword_tmp)));
14028 dstoffset += 2;
14031 /* Likewise for last byte. */
14033 if (byte_tmp)
14035 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14036 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14037 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14038 dstoffset++;
14041 gcc_assert (remaining == 0 && srcoffset == dstoffset);
14044 /* From mips_adjust_block_mem:
14046 Helper function for doing a loop-based block operation on memory
14047 reference MEM. Each iteration of the loop will operate on LENGTH
14048 bytes of MEM.
14050 Create a new base register for use within the loop and point it to
14051 the start of MEM. Create a new memory reference that uses this
14052 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14054 static void
14055 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14056 rtx *loop_mem)
14058 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14060 /* Although the new mem does not refer to a known location,
14061 it does keep up to LENGTH bytes of alignment. */
14062 *loop_mem = change_address (mem, BLKmode, *loop_reg);
14063 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14066 /* From mips_block_move_loop:
14068 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14069 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14070 the memory regions do not overlap. */
14072 static void
14073 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14074 unsigned int interleave_factor,
14075 HOST_WIDE_INT bytes_per_iter)
14077 rtx src_reg, dest_reg, final_src, test;
14078 HOST_WIDE_INT leftover;
14080 leftover = length % bytes_per_iter;
14081 length -= leftover;
14083 /* Create registers and memory references for use within the loop. */
14084 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14085 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14087 /* Calculate the value that SRC_REG should have after the last iteration of
14088 the loop. */
14089 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14090 0, 0, OPTAB_WIDEN);
14092 /* Emit the start of the loop. */
14093 rtx_code_label *label = gen_label_rtx ();
14094 emit_label (label);
14096 /* Emit the loop body. */
14097 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14098 interleave_factor);
14100 /* Move on to the next block. */
14101 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14102 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14104 /* Emit the loop condition. */
14105 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14106 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14108 /* Mop up any left-over bytes. */
14109 if (leftover)
14110 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14113 /* Emit a block move when either the source or destination is unaligned (not
14114 aligned to a four-byte boundary). This may need further tuning depending on
14115 core type, optimize_size setting, etc. */
14117 static int
14118 arm_movmemqi_unaligned (rtx *operands)
14120 HOST_WIDE_INT length = INTVAL (operands[2]);
14122 if (optimize_size)
14124 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14125 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14126 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14127 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14128 or dst_aligned though: allow more interleaving in those cases since the
14129 resulting code can be smaller. */
14130 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14131 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14133 if (length > 12)
14134 arm_block_move_unaligned_loop (operands[0], operands[1], length,
14135 interleave_factor, bytes_per_iter);
14136 else
14137 arm_block_move_unaligned_straight (operands[0], operands[1], length,
14138 interleave_factor);
14140 else
14142 /* Note that the loop created by arm_block_move_unaligned_loop may be
14143 subject to loop unrolling, which makes tuning this condition a little
14144 redundant. */
14145 if (length > 32)
14146 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14147 else
14148 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14151 return 1;
14155 arm_gen_movmemqi (rtx *operands)
14157 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14158 HOST_WIDE_INT srcoffset, dstoffset;
14159 rtx src, dst, srcbase, dstbase;
14160 rtx part_bytes_reg = NULL;
14161 rtx mem;
14163 if (!CONST_INT_P (operands[2])
14164 || !CONST_INT_P (operands[3])
14165 || INTVAL (operands[2]) > 64)
14166 return 0;
14168 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14169 return arm_movmemqi_unaligned (operands);
14171 if (INTVAL (operands[3]) & 3)
14172 return 0;
14174 dstbase = operands[0];
14175 srcbase = operands[1];
14177 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14178 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14180 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14181 out_words_to_go = INTVAL (operands[2]) / 4;
14182 last_bytes = INTVAL (operands[2]) & 3;
14183 dstoffset = srcoffset = 0;
14185 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14186 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14188 while (in_words_to_go >= 2)
14190 if (in_words_to_go > 4)
14191 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14192 TRUE, srcbase, &srcoffset));
14193 else
14194 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14195 src, FALSE, srcbase,
14196 &srcoffset));
14198 if (out_words_to_go)
14200 if (out_words_to_go > 4)
14201 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14202 TRUE, dstbase, &dstoffset));
14203 else if (out_words_to_go != 1)
14204 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14205 out_words_to_go, dst,
14206 (last_bytes == 0
14207 ? FALSE : TRUE),
14208 dstbase, &dstoffset));
14209 else
14211 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14212 emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
14213 if (last_bytes != 0)
14215 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14216 dstoffset += 4;
14221 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14222 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14225 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14226 if (out_words_to_go)
14228 rtx sreg;
14230 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14231 sreg = copy_to_reg (mem);
14233 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14234 emit_move_insn (mem, sreg);
14235 in_words_to_go--;
14237 gcc_assert (!in_words_to_go); /* Sanity check */
14240 if (in_words_to_go)
14242 gcc_assert (in_words_to_go > 0);
14244 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14245 part_bytes_reg = copy_to_mode_reg (SImode, mem);
14248 gcc_assert (!last_bytes || part_bytes_reg);
14250 if (BYTES_BIG_ENDIAN && last_bytes)
14252 rtx tmp = gen_reg_rtx (SImode);
14254 /* The bytes we want are in the top end of the word. */
14255 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14256 GEN_INT (8 * (4 - last_bytes))));
14257 part_bytes_reg = tmp;
14259 while (last_bytes)
14261 mem = adjust_automodify_address (dstbase, QImode,
14262 plus_constant (Pmode, dst,
14263 last_bytes - 1),
14264 dstoffset + last_bytes - 1);
14265 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14267 if (--last_bytes)
14269 tmp = gen_reg_rtx (SImode);
14270 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14271 part_bytes_reg = tmp;
14276 else
14278 if (last_bytes > 1)
14280 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14281 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14282 last_bytes -= 2;
14283 if (last_bytes)
14285 rtx tmp = gen_reg_rtx (SImode);
14286 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14287 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14288 part_bytes_reg = tmp;
14289 dstoffset += 2;
14293 if (last_bytes)
14295 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14296 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14300 return 1;
14303 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14304 by mode size. */
14305 inline static rtx
14306 next_consecutive_mem (rtx mem)
14308 machine_mode mode = GET_MODE (mem);
14309 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14310 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14312 return adjust_automodify_address (mem, mode, addr, offset);
14315 /* Copy using LDRD/STRD instructions whenever possible.
14316 Returns true upon success. */
14317 bool
14318 gen_movmem_ldrd_strd (rtx *operands)
14320 unsigned HOST_WIDE_INT len;
14321 HOST_WIDE_INT align;
14322 rtx src, dst, base;
14323 rtx reg0;
14324 bool src_aligned, dst_aligned;
14325 bool src_volatile, dst_volatile;
14327 gcc_assert (CONST_INT_P (operands[2]));
14328 gcc_assert (CONST_INT_P (operands[3]));
14330 len = UINTVAL (operands[2]);
14331 if (len > 64)
14332 return false;
14334 /* Maximum alignment we can assume for both src and dst buffers. */
14335 align = INTVAL (operands[3]);
14337 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14338 return false;
14340 /* Place src and dst addresses in registers
14341 and update the corresponding mem rtx. */
14342 dst = operands[0];
14343 dst_volatile = MEM_VOLATILE_P (dst);
14344 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14345 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14346 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14348 src = operands[1];
14349 src_volatile = MEM_VOLATILE_P (src);
14350 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14351 base = copy_to_mode_reg (SImode, XEXP (src, 0));
14352 src = adjust_automodify_address (src, VOIDmode, base, 0);
14354 if (!unaligned_access && !(src_aligned && dst_aligned))
14355 return false;
14357 if (src_volatile || dst_volatile)
14358 return false;
14360 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14361 if (!(dst_aligned || src_aligned))
14362 return arm_gen_movmemqi (operands);
14364 /* If the either src or dst is unaligned we'll be accessing it as pairs
14365 of unaligned SImode accesses. Otherwise we can generate DImode
14366 ldrd/strd instructions. */
14367 src = adjust_address (src, src_aligned ? DImode : SImode, 0);
14368 dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
14370 while (len >= 8)
14372 len -= 8;
14373 reg0 = gen_reg_rtx (DImode);
14374 rtx low_reg = NULL_RTX;
14375 rtx hi_reg = NULL_RTX;
14377 if (!src_aligned || !dst_aligned)
14379 low_reg = gen_lowpart (SImode, reg0);
14380 hi_reg = gen_highpart_mode (SImode, DImode, reg0);
14382 if (src_aligned)
14383 emit_move_insn (reg0, src);
14384 else
14386 emit_insn (gen_unaligned_loadsi (low_reg, src));
14387 src = next_consecutive_mem (src);
14388 emit_insn (gen_unaligned_loadsi (hi_reg, src));
14391 if (dst_aligned)
14392 emit_move_insn (dst, reg0);
14393 else
14395 emit_insn (gen_unaligned_storesi (dst, low_reg));
14396 dst = next_consecutive_mem (dst);
14397 emit_insn (gen_unaligned_storesi (dst, hi_reg));
14400 src = next_consecutive_mem (src);
14401 dst = next_consecutive_mem (dst);
14404 gcc_assert (len < 8);
14405 if (len >= 4)
14407 /* More than a word but less than a double-word to copy. Copy a word. */
14408 reg0 = gen_reg_rtx (SImode);
14409 src = adjust_address (src, SImode, 0);
14410 dst = adjust_address (dst, SImode, 0);
14411 if (src_aligned)
14412 emit_move_insn (reg0, src);
14413 else
14414 emit_insn (gen_unaligned_loadsi (reg0, src));
14416 if (dst_aligned)
14417 emit_move_insn (dst, reg0);
14418 else
14419 emit_insn (gen_unaligned_storesi (dst, reg0));
14421 src = next_consecutive_mem (src);
14422 dst = next_consecutive_mem (dst);
14423 len -= 4;
14426 if (len == 0)
14427 return true;
14429 /* Copy the remaining bytes. */
14430 if (len >= 2)
14432 dst = adjust_address (dst, HImode, 0);
14433 src = adjust_address (src, HImode, 0);
14434 reg0 = gen_reg_rtx (SImode);
14435 if (src_aligned)
14436 emit_insn (gen_zero_extendhisi2 (reg0, src));
14437 else
14438 emit_insn (gen_unaligned_loadhiu (reg0, src));
14440 if (dst_aligned)
14441 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14442 else
14443 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14445 src = next_consecutive_mem (src);
14446 dst = next_consecutive_mem (dst);
14447 if (len == 2)
14448 return true;
14451 dst = adjust_address (dst, QImode, 0);
14452 src = adjust_address (src, QImode, 0);
14453 reg0 = gen_reg_rtx (QImode);
14454 emit_move_insn (reg0, src);
14455 emit_move_insn (dst, reg0);
14456 return true;
14459 /* Select a dominance comparison mode if possible for a test of the general
14460 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14461 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14462 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14463 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14464 In all cases OP will be either EQ or NE, but we don't need to know which
14465 here. If we are unable to support a dominance comparison we return
14466 CC mode. This will then fail to match for the RTL expressions that
14467 generate this call. */
14468 machine_mode
14469 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14471 enum rtx_code cond1, cond2;
14472 int swapped = 0;
14474 /* Currently we will probably get the wrong result if the individual
14475 comparisons are not simple. This also ensures that it is safe to
14476 reverse a comparison if necessary. */
14477 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14478 != CCmode)
14479 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14480 != CCmode))
14481 return CCmode;
14483 /* The if_then_else variant of this tests the second condition if the
14484 first passes, but is true if the first fails. Reverse the first
14485 condition to get a true "inclusive-or" expression. */
14486 if (cond_or == DOM_CC_NX_OR_Y)
14487 cond1 = reverse_condition (cond1);
14489 /* If the comparisons are not equal, and one doesn't dominate the other,
14490 then we can't do this. */
14491 if (cond1 != cond2
14492 && !comparison_dominates_p (cond1, cond2)
14493 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14494 return CCmode;
14496 if (swapped)
14497 std::swap (cond1, cond2);
14499 switch (cond1)
14501 case EQ:
14502 if (cond_or == DOM_CC_X_AND_Y)
14503 return CC_DEQmode;
14505 switch (cond2)
14507 case EQ: return CC_DEQmode;
14508 case LE: return CC_DLEmode;
14509 case LEU: return CC_DLEUmode;
14510 case GE: return CC_DGEmode;
14511 case GEU: return CC_DGEUmode;
14512 default: gcc_unreachable ();
14515 case LT:
14516 if (cond_or == DOM_CC_X_AND_Y)
14517 return CC_DLTmode;
14519 switch (cond2)
14521 case LT:
14522 return CC_DLTmode;
14523 case LE:
14524 return CC_DLEmode;
14525 case NE:
14526 return CC_DNEmode;
14527 default:
14528 gcc_unreachable ();
14531 case GT:
14532 if (cond_or == DOM_CC_X_AND_Y)
14533 return CC_DGTmode;
14535 switch (cond2)
14537 case GT:
14538 return CC_DGTmode;
14539 case GE:
14540 return CC_DGEmode;
14541 case NE:
14542 return CC_DNEmode;
14543 default:
14544 gcc_unreachable ();
14547 case LTU:
14548 if (cond_or == DOM_CC_X_AND_Y)
14549 return CC_DLTUmode;
14551 switch (cond2)
14553 case LTU:
14554 return CC_DLTUmode;
14555 case LEU:
14556 return CC_DLEUmode;
14557 case NE:
14558 return CC_DNEmode;
14559 default:
14560 gcc_unreachable ();
14563 case GTU:
14564 if (cond_or == DOM_CC_X_AND_Y)
14565 return CC_DGTUmode;
14567 switch (cond2)
14569 case GTU:
14570 return CC_DGTUmode;
14571 case GEU:
14572 return CC_DGEUmode;
14573 case NE:
14574 return CC_DNEmode;
14575 default:
14576 gcc_unreachable ();
14579 /* The remaining cases only occur when both comparisons are the
14580 same. */
14581 case NE:
14582 gcc_assert (cond1 == cond2);
14583 return CC_DNEmode;
14585 case LE:
14586 gcc_assert (cond1 == cond2);
14587 return CC_DLEmode;
14589 case GE:
14590 gcc_assert (cond1 == cond2);
14591 return CC_DGEmode;
14593 case LEU:
14594 gcc_assert (cond1 == cond2);
14595 return CC_DLEUmode;
14597 case GEU:
14598 gcc_assert (cond1 == cond2);
14599 return CC_DGEUmode;
14601 default:
14602 gcc_unreachable ();
14606 machine_mode
14607 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
14609 /* All floating point compares return CCFP if it is an equality
14610 comparison, and CCFPE otherwise. */
14611 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14613 switch (op)
14615 case EQ:
14616 case NE:
14617 case UNORDERED:
14618 case ORDERED:
14619 case UNLT:
14620 case UNLE:
14621 case UNGT:
14622 case UNGE:
14623 case UNEQ:
14624 case LTGT:
14625 return CCFPmode;
14627 case LT:
14628 case LE:
14629 case GT:
14630 case GE:
14631 return CCFPEmode;
14633 default:
14634 gcc_unreachable ();
14638 /* A compare with a shifted operand. Because of canonicalization, the
14639 comparison will have to be swapped when we emit the assembler. */
14640 if (GET_MODE (y) == SImode
14641 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14642 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14643 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
14644 || GET_CODE (x) == ROTATERT))
14645 return CC_SWPmode;
14647 /* This operation is performed swapped, but since we only rely on the Z
14648 flag we don't need an additional mode. */
14649 if (GET_MODE (y) == SImode
14650 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14651 && GET_CODE (x) == NEG
14652 && (op == EQ || op == NE))
14653 return CC_Zmode;
14655 /* This is a special case that is used by combine to allow a
14656 comparison of a shifted byte load to be split into a zero-extend
14657 followed by a comparison of the shifted integer (only valid for
14658 equalities and unsigned inequalities). */
14659 if (GET_MODE (x) == SImode
14660 && GET_CODE (x) == ASHIFT
14661 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
14662 && GET_CODE (XEXP (x, 0)) == SUBREG
14663 && MEM_P (SUBREG_REG (XEXP (x, 0)))
14664 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
14665 && (op == EQ || op == NE
14666 || op == GEU || op == GTU || op == LTU || op == LEU)
14667 && CONST_INT_P (y))
14668 return CC_Zmode;
14670 /* A construct for a conditional compare, if the false arm contains
14671 0, then both conditions must be true, otherwise either condition
14672 must be true. Not all conditions are possible, so CCmode is
14673 returned if it can't be done. */
14674 if (GET_CODE (x) == IF_THEN_ELSE
14675 && (XEXP (x, 2) == const0_rtx
14676 || XEXP (x, 2) == const1_rtx)
14677 && COMPARISON_P (XEXP (x, 0))
14678 && COMPARISON_P (XEXP (x, 1)))
14679 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14680 INTVAL (XEXP (x, 2)));
14682 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
14683 if (GET_CODE (x) == AND
14684 && (op == EQ || op == NE)
14685 && COMPARISON_P (XEXP (x, 0))
14686 && COMPARISON_P (XEXP (x, 1)))
14687 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14688 DOM_CC_X_AND_Y);
14690 if (GET_CODE (x) == IOR
14691 && (op == EQ || op == NE)
14692 && COMPARISON_P (XEXP (x, 0))
14693 && COMPARISON_P (XEXP (x, 1)))
14694 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14695 DOM_CC_X_OR_Y);
14697 /* An operation (on Thumb) where we want to test for a single bit.
14698 This is done by shifting that bit up into the top bit of a
14699 scratch register; we can then branch on the sign bit. */
14700 if (TARGET_THUMB1
14701 && GET_MODE (x) == SImode
14702 && (op == EQ || op == NE)
14703 && GET_CODE (x) == ZERO_EXTRACT
14704 && XEXP (x, 1) == const1_rtx)
14705 return CC_Nmode;
14707 /* An operation that sets the condition codes as a side-effect, the
14708 V flag is not set correctly, so we can only use comparisons where
14709 this doesn't matter. (For LT and GE we can use "mi" and "pl"
14710 instead.) */
14711 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
14712 if (GET_MODE (x) == SImode
14713 && y == const0_rtx
14714 && (op == EQ || op == NE || op == LT || op == GE)
14715 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
14716 || GET_CODE (x) == AND || GET_CODE (x) == IOR
14717 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
14718 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
14719 || GET_CODE (x) == LSHIFTRT
14720 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14721 || GET_CODE (x) == ROTATERT
14722 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
14723 return CC_NOOVmode;
14725 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
14726 return CC_Zmode;
14728 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
14729 && GET_CODE (x) == PLUS
14730 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
14731 return CC_Cmode;
14733 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
14735 switch (op)
14737 case EQ:
14738 case NE:
14739 /* A DImode comparison against zero can be implemented by
14740 or'ing the two halves together. */
14741 if (y == const0_rtx)
14742 return CC_Zmode;
14744 /* We can do an equality test in three Thumb instructions. */
14745 if (!TARGET_32BIT)
14746 return CC_Zmode;
14748 /* FALLTHROUGH */
14750 case LTU:
14751 case LEU:
14752 case GTU:
14753 case GEU:
14754 /* DImode unsigned comparisons can be implemented by cmp +
14755 cmpeq without a scratch register. Not worth doing in
14756 Thumb-2. */
14757 if (TARGET_32BIT)
14758 return CC_CZmode;
14760 /* FALLTHROUGH */
14762 case LT:
14763 case LE:
14764 case GT:
14765 case GE:
14766 /* DImode signed and unsigned comparisons can be implemented
14767 by cmp + sbcs with a scratch register, but that does not
14768 set the Z flag - we must reverse GT/LE/GTU/LEU. */
14769 gcc_assert (op != EQ && op != NE);
14770 return CC_NCVmode;
14772 default:
14773 gcc_unreachable ();
14777 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
14778 return GET_MODE (x);
14780 return CCmode;
14783 /* X and Y are two things to compare using CODE. Emit the compare insn and
14784 return the rtx for register 0 in the proper mode. FP means this is a
14785 floating point compare: I don't think that it is needed on the arm. */
14787 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
14789 machine_mode mode;
14790 rtx cc_reg;
14791 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
14793 /* We might have X as a constant, Y as a register because of the predicates
14794 used for cmpdi. If so, force X to a register here. */
14795 if (dimode_comparison && !REG_P (x))
14796 x = force_reg (DImode, x);
14798 mode = SELECT_CC_MODE (code, x, y);
14799 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
14801 if (dimode_comparison
14802 && mode != CC_CZmode)
14804 rtx clobber, set;
14806 /* To compare two non-zero values for equality, XOR them and
14807 then compare against zero. Not used for ARM mode; there
14808 CC_CZmode is cheaper. */
14809 if (mode == CC_Zmode && y != const0_rtx)
14811 gcc_assert (!reload_completed);
14812 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
14813 y = const0_rtx;
14816 /* A scratch register is required. */
14817 if (reload_completed)
14818 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
14819 else
14820 scratch = gen_rtx_SCRATCH (SImode);
14822 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
14823 set = gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y));
14824 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
14826 else
14827 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
14829 return cc_reg;
14832 /* Generate a sequence of insns that will generate the correct return
14833 address mask depending on the physical architecture that the program
14834 is running on. */
14836 arm_gen_return_addr_mask (void)
14838 rtx reg = gen_reg_rtx (Pmode);
14840 emit_insn (gen_return_addr_mask (reg));
14841 return reg;
14844 void
14845 arm_reload_in_hi (rtx *operands)
14847 rtx ref = operands[1];
14848 rtx base, scratch;
14849 HOST_WIDE_INT offset = 0;
14851 if (GET_CODE (ref) == SUBREG)
14853 offset = SUBREG_BYTE (ref);
14854 ref = SUBREG_REG (ref);
14857 if (REG_P (ref))
14859 /* We have a pseudo which has been spilt onto the stack; there
14860 are two cases here: the first where there is a simple
14861 stack-slot replacement and a second where the stack-slot is
14862 out of range, or is used as a subreg. */
14863 if (reg_equiv_mem (REGNO (ref)))
14865 ref = reg_equiv_mem (REGNO (ref));
14866 base = find_replacement (&XEXP (ref, 0));
14868 else
14869 /* The slot is out of range, or was dressed up in a SUBREG. */
14870 base = reg_equiv_address (REGNO (ref));
14872 /* PR 62554: If there is no equivalent memory location then just move
14873 the value as an SImode register move. This happens when the target
14874 architecture variant does not have an HImode register move. */
14875 if (base == NULL)
14877 gcc_assert (REG_P (operands[0]));
14878 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
14879 gen_rtx_SUBREG (SImode, ref, 0)));
14880 return;
14883 else
14884 base = find_replacement (&XEXP (ref, 0));
14886 /* Handle the case where the address is too complex to be offset by 1. */
14887 if (GET_CODE (base) == MINUS
14888 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
14890 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14892 emit_set_insn (base_plus, base);
14893 base = base_plus;
14895 else if (GET_CODE (base) == PLUS)
14897 /* The addend must be CONST_INT, or we would have dealt with it above. */
14898 HOST_WIDE_INT hi, lo;
14900 offset += INTVAL (XEXP (base, 1));
14901 base = XEXP (base, 0);
14903 /* Rework the address into a legal sequence of insns. */
14904 /* Valid range for lo is -4095 -> 4095 */
14905 lo = (offset >= 0
14906 ? (offset & 0xfff)
14907 : -((-offset) & 0xfff));
14909 /* Corner case, if lo is the max offset then we would be out of range
14910 once we have added the additional 1 below, so bump the msb into the
14911 pre-loading insn(s). */
14912 if (lo == 4095)
14913 lo &= 0x7ff;
14915 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
14916 ^ (HOST_WIDE_INT) 0x80000000)
14917 - (HOST_WIDE_INT) 0x80000000);
14919 gcc_assert (hi + lo == offset);
14921 if (hi != 0)
14923 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14925 /* Get the base address; addsi3 knows how to handle constants
14926 that require more than one insn. */
14927 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
14928 base = base_plus;
14929 offset = lo;
14933 /* Operands[2] may overlap operands[0] (though it won't overlap
14934 operands[1]), that's why we asked for a DImode reg -- so we can
14935 use the bit that does not overlap. */
14936 if (REGNO (operands[2]) == REGNO (operands[0]))
14937 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14938 else
14939 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
14941 emit_insn (gen_zero_extendqisi2 (scratch,
14942 gen_rtx_MEM (QImode,
14943 plus_constant (Pmode, base,
14944 offset))));
14945 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
14946 gen_rtx_MEM (QImode,
14947 plus_constant (Pmode, base,
14948 offset + 1))));
14949 if (!BYTES_BIG_ENDIAN)
14950 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
14951 gen_rtx_IOR (SImode,
14952 gen_rtx_ASHIFT
14953 (SImode,
14954 gen_rtx_SUBREG (SImode, operands[0], 0),
14955 GEN_INT (8)),
14956 scratch));
14957 else
14958 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
14959 gen_rtx_IOR (SImode,
14960 gen_rtx_ASHIFT (SImode, scratch,
14961 GEN_INT (8)),
14962 gen_rtx_SUBREG (SImode, operands[0], 0)));
14965 /* Handle storing a half-word to memory during reload by synthesizing as two
14966 byte stores. Take care not to clobber the input values until after we
14967 have moved them somewhere safe. This code assumes that if the DImode
14968 scratch in operands[2] overlaps either the input value or output address
14969 in some way, then that value must die in this insn (we absolutely need
14970 two scratch registers for some corner cases). */
14971 void
14972 arm_reload_out_hi (rtx *operands)
14974 rtx ref = operands[0];
14975 rtx outval = operands[1];
14976 rtx base, scratch;
14977 HOST_WIDE_INT offset = 0;
14979 if (GET_CODE (ref) == SUBREG)
14981 offset = SUBREG_BYTE (ref);
14982 ref = SUBREG_REG (ref);
14985 if (REG_P (ref))
14987 /* We have a pseudo which has been spilt onto the stack; there
14988 are two cases here: the first where there is a simple
14989 stack-slot replacement and a second where the stack-slot is
14990 out of range, or is used as a subreg. */
14991 if (reg_equiv_mem (REGNO (ref)))
14993 ref = reg_equiv_mem (REGNO (ref));
14994 base = find_replacement (&XEXP (ref, 0));
14996 else
14997 /* The slot is out of range, or was dressed up in a SUBREG. */
14998 base = reg_equiv_address (REGNO (ref));
15000 /* PR 62254: If there is no equivalent memory location then just move
15001 the value as an SImode register move. This happens when the target
15002 architecture variant does not have an HImode register move. */
15003 if (base == NULL)
15005 gcc_assert (REG_P (outval) || SUBREG_P (outval));
15007 if (REG_P (outval))
15009 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15010 gen_rtx_SUBREG (SImode, outval, 0)));
15012 else /* SUBREG_P (outval) */
15014 if (GET_MODE (SUBREG_REG (outval)) == SImode)
15015 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15016 SUBREG_REG (outval)));
15017 else
15018 /* FIXME: Handle other cases ? */
15019 gcc_unreachable ();
15021 return;
15024 else
15025 base = find_replacement (&XEXP (ref, 0));
15027 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15029 /* Handle the case where the address is too complex to be offset by 1. */
15030 if (GET_CODE (base) == MINUS
15031 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15033 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15035 /* Be careful not to destroy OUTVAL. */
15036 if (reg_overlap_mentioned_p (base_plus, outval))
15038 /* Updating base_plus might destroy outval, see if we can
15039 swap the scratch and base_plus. */
15040 if (!reg_overlap_mentioned_p (scratch, outval))
15041 std::swap (scratch, base_plus);
15042 else
15044 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15046 /* Be conservative and copy OUTVAL into the scratch now,
15047 this should only be necessary if outval is a subreg
15048 of something larger than a word. */
15049 /* XXX Might this clobber base? I can't see how it can,
15050 since scratch is known to overlap with OUTVAL, and
15051 must be wider than a word. */
15052 emit_insn (gen_movhi (scratch_hi, outval));
15053 outval = scratch_hi;
15057 emit_set_insn (base_plus, base);
15058 base = base_plus;
15060 else if (GET_CODE (base) == PLUS)
15062 /* The addend must be CONST_INT, or we would have dealt with it above. */
15063 HOST_WIDE_INT hi, lo;
15065 offset += INTVAL (XEXP (base, 1));
15066 base = XEXP (base, 0);
15068 /* Rework the address into a legal sequence of insns. */
15069 /* Valid range for lo is -4095 -> 4095 */
15070 lo = (offset >= 0
15071 ? (offset & 0xfff)
15072 : -((-offset) & 0xfff));
15074 /* Corner case, if lo is the max offset then we would be out of range
15075 once we have added the additional 1 below, so bump the msb into the
15076 pre-loading insn(s). */
15077 if (lo == 4095)
15078 lo &= 0x7ff;
15080 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15081 ^ (HOST_WIDE_INT) 0x80000000)
15082 - (HOST_WIDE_INT) 0x80000000);
15084 gcc_assert (hi + lo == offset);
15086 if (hi != 0)
15088 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15090 /* Be careful not to destroy OUTVAL. */
15091 if (reg_overlap_mentioned_p (base_plus, outval))
15093 /* Updating base_plus might destroy outval, see if we
15094 can swap the scratch and base_plus. */
15095 if (!reg_overlap_mentioned_p (scratch, outval))
15096 std::swap (scratch, base_plus);
15097 else
15099 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15101 /* Be conservative and copy outval into scratch now,
15102 this should only be necessary if outval is a
15103 subreg of something larger than a word. */
15104 /* XXX Might this clobber base? I can't see how it
15105 can, since scratch is known to overlap with
15106 outval. */
15107 emit_insn (gen_movhi (scratch_hi, outval));
15108 outval = scratch_hi;
15112 /* Get the base address; addsi3 knows how to handle constants
15113 that require more than one insn. */
15114 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15115 base = base_plus;
15116 offset = lo;
15120 if (BYTES_BIG_ENDIAN)
15122 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15123 plus_constant (Pmode, base,
15124 offset + 1)),
15125 gen_lowpart (QImode, outval)));
15126 emit_insn (gen_lshrsi3 (scratch,
15127 gen_rtx_SUBREG (SImode, outval, 0),
15128 GEN_INT (8)));
15129 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15130 offset)),
15131 gen_lowpart (QImode, scratch)));
15133 else
15135 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15136 offset)),
15137 gen_lowpart (QImode, outval)));
15138 emit_insn (gen_lshrsi3 (scratch,
15139 gen_rtx_SUBREG (SImode, outval, 0),
15140 GEN_INT (8)));
15141 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15142 plus_constant (Pmode, base,
15143 offset + 1)),
15144 gen_lowpart (QImode, scratch)));
15148 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15149 (padded to the size of a word) should be passed in a register. */
15151 static bool
15152 arm_must_pass_in_stack (machine_mode mode, const_tree type)
15154 if (TARGET_AAPCS_BASED)
15155 return must_pass_in_stack_var_size (mode, type);
15156 else
15157 return must_pass_in_stack_var_size_or_pad (mode, type);
15161 /* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the lowest
15162 byte of a stack argument has useful data. For legacy APCS ABIs we use
15163 the default. For AAPCS based ABIs small aggregate types are placed
15164 in the lowest memory address. */
15166 static pad_direction
15167 arm_function_arg_padding (machine_mode mode, const_tree type)
15169 if (!TARGET_AAPCS_BASED)
15170 return default_function_arg_padding (mode, type);
15172 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15173 return PAD_DOWNWARD;
15175 return PAD_UPWARD;
15179 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15180 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15181 register has useful data, and return the opposite if the most
15182 significant byte does. */
15184 bool
15185 arm_pad_reg_upward (machine_mode mode,
15186 tree type, int first ATTRIBUTE_UNUSED)
15188 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15190 /* For AAPCS, small aggregates, small fixed-point types,
15191 and small complex types are always padded upwards. */
15192 if (type)
15194 if ((AGGREGATE_TYPE_P (type)
15195 || TREE_CODE (type) == COMPLEX_TYPE
15196 || FIXED_POINT_TYPE_P (type))
15197 && int_size_in_bytes (type) <= 4)
15198 return true;
15200 else
15202 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15203 && GET_MODE_SIZE (mode) <= 4)
15204 return true;
15208 /* Otherwise, use default padding. */
15209 return !BYTES_BIG_ENDIAN;
15212 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15213 assuming that the address in the base register is word aligned. */
15214 bool
15215 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15217 HOST_WIDE_INT max_offset;
15219 /* Offset must be a multiple of 4 in Thumb mode. */
15220 if (TARGET_THUMB2 && ((offset & 3) != 0))
15221 return false;
15223 if (TARGET_THUMB2)
15224 max_offset = 1020;
15225 else if (TARGET_ARM)
15226 max_offset = 255;
15227 else
15228 return false;
15230 return ((offset <= max_offset) && (offset >= -max_offset));
15233 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15234 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15235 Assumes that the address in the base register RN is word aligned. Pattern
15236 guarantees that both memory accesses use the same base register,
15237 the offsets are constants within the range, and the gap between the offsets is 4.
15238 If preload complete then check that registers are legal. WBACK indicates whether
15239 address is updated. LOAD indicates whether memory access is load or store. */
15240 bool
15241 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15242 bool wback, bool load)
15244 unsigned int t, t2, n;
15246 if (!reload_completed)
15247 return true;
15249 if (!offset_ok_for_ldrd_strd (offset))
15250 return false;
15252 t = REGNO (rt);
15253 t2 = REGNO (rt2);
15254 n = REGNO (rn);
15256 if ((TARGET_THUMB2)
15257 && ((wback && (n == t || n == t2))
15258 || (t == SP_REGNUM)
15259 || (t == PC_REGNUM)
15260 || (t2 == SP_REGNUM)
15261 || (t2 == PC_REGNUM)
15262 || (!load && (n == PC_REGNUM))
15263 || (load && (t == t2))
15264 /* Triggers Cortex-M3 LDRD errata. */
15265 || (!wback && load && fix_cm3_ldrd && (n == t))))
15266 return false;
15268 if ((TARGET_ARM)
15269 && ((wback && (n == t || n == t2))
15270 || (t2 == PC_REGNUM)
15271 || (t % 2 != 0) /* First destination register is not even. */
15272 || (t2 != t + 1)
15273 /* PC can be used as base register (for offset addressing only),
15274 but it is depricated. */
15275 || (n == PC_REGNUM)))
15276 return false;
15278 return true;
15281 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15282 operand MEM's address contains an immediate offset from the base
15283 register and has no side effects, in which case it sets BASE and
15284 OFFSET accordingly. */
15285 static bool
15286 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset)
15288 rtx addr;
15290 gcc_assert (base != NULL && offset != NULL);
15292 /* TODO: Handle more general memory operand patterns, such as
15293 PRE_DEC and PRE_INC. */
15295 if (side_effects_p (mem))
15296 return false;
15298 /* Can't deal with subregs. */
15299 if (GET_CODE (mem) == SUBREG)
15300 return false;
15302 gcc_assert (MEM_P (mem));
15304 *offset = const0_rtx;
15306 addr = XEXP (mem, 0);
15308 /* If addr isn't valid for DImode, then we can't handle it. */
15309 if (!arm_legitimate_address_p (DImode, addr,
15310 reload_in_progress || reload_completed))
15311 return false;
15313 if (REG_P (addr))
15315 *base = addr;
15316 return true;
15318 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15320 *base = XEXP (addr, 0);
15321 *offset = XEXP (addr, 1);
15322 return (REG_P (*base) && CONST_INT_P (*offset));
15325 return false;
15328 /* Called from a peephole2 to replace two word-size accesses with a
15329 single LDRD/STRD instruction. Returns true iff we can generate a
15330 new instruction sequence. That is, both accesses use the same base
15331 register and the gap between constant offsets is 4. This function
15332 may reorder its operands to match ldrd/strd RTL templates.
15333 OPERANDS are the operands found by the peephole matcher;
15334 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15335 corresponding memory operands. LOAD indicaates whether the access
15336 is load or store. CONST_STORE indicates a store of constant
15337 integer values held in OPERANDS[4,5] and assumes that the pattern
15338 is of length 4 insn, for the purpose of checking dead registers.
15339 COMMUTE indicates that register operands may be reordered. */
15340 bool
15341 gen_operands_ldrd_strd (rtx *operands, bool load,
15342 bool const_store, bool commute)
15344 int nops = 2;
15345 HOST_WIDE_INT offsets[2], offset;
15346 rtx base = NULL_RTX;
15347 rtx cur_base, cur_offset, tmp;
15348 int i, gap;
15349 HARD_REG_SET regset;
15351 gcc_assert (!const_store || !load);
15352 /* Check that the memory references are immediate offsets from the
15353 same base register. Extract the base register, the destination
15354 registers, and the corresponding memory offsets. */
15355 for (i = 0; i < nops; i++)
15357 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
15358 return false;
15360 if (i == 0)
15361 base = cur_base;
15362 else if (REGNO (base) != REGNO (cur_base))
15363 return false;
15365 offsets[i] = INTVAL (cur_offset);
15366 if (GET_CODE (operands[i]) == SUBREG)
15368 tmp = SUBREG_REG (operands[i]);
15369 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15370 operands[i] = tmp;
15374 /* Make sure there is no dependency between the individual loads. */
15375 if (load && REGNO (operands[0]) == REGNO (base))
15376 return false; /* RAW */
15378 if (load && REGNO (operands[0]) == REGNO (operands[1]))
15379 return false; /* WAW */
15381 /* If the same input register is used in both stores
15382 when storing different constants, try to find a free register.
15383 For example, the code
15384 mov r0, 0
15385 str r0, [r2]
15386 mov r0, 1
15387 str r0, [r2, #4]
15388 can be transformed into
15389 mov r1, 0
15390 mov r0, 1
15391 strd r1, r0, [r2]
15392 in Thumb mode assuming that r1 is free.
15393 For ARM mode do the same but only if the starting register
15394 can be made to be even. */
15395 if (const_store
15396 && REGNO (operands[0]) == REGNO (operands[1])
15397 && INTVAL (operands[4]) != INTVAL (operands[5]))
15399 if (TARGET_THUMB2)
15401 CLEAR_HARD_REG_SET (regset);
15402 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15403 if (tmp == NULL_RTX)
15404 return false;
15406 /* Use the new register in the first load to ensure that
15407 if the original input register is not dead after peephole,
15408 then it will have the correct constant value. */
15409 operands[0] = tmp;
15411 else if (TARGET_ARM)
15413 int regno = REGNO (operands[0]);
15414 if (!peep2_reg_dead_p (4, operands[0]))
15416 /* When the input register is even and is not dead after the
15417 pattern, it has to hold the second constant but we cannot
15418 form a legal STRD in ARM mode with this register as the second
15419 register. */
15420 if (regno % 2 == 0)
15421 return false;
15423 /* Is regno-1 free? */
15424 SET_HARD_REG_SET (regset);
15425 CLEAR_HARD_REG_BIT(regset, regno - 1);
15426 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15427 if (tmp == NULL_RTX)
15428 return false;
15430 operands[0] = tmp;
15432 else
15434 /* Find a DImode register. */
15435 CLEAR_HARD_REG_SET (regset);
15436 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15437 if (tmp != NULL_RTX)
15439 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15440 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15442 else
15444 /* Can we use the input register to form a DI register? */
15445 SET_HARD_REG_SET (regset);
15446 CLEAR_HARD_REG_BIT(regset,
15447 regno % 2 == 0 ? regno + 1 : regno - 1);
15448 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15449 if (tmp == NULL_RTX)
15450 return false;
15451 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15455 gcc_assert (operands[0] != NULL_RTX);
15456 gcc_assert (operands[1] != NULL_RTX);
15457 gcc_assert (REGNO (operands[0]) % 2 == 0);
15458 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15462 /* Make sure the instructions are ordered with lower memory access first. */
15463 if (offsets[0] > offsets[1])
15465 gap = offsets[0] - offsets[1];
15466 offset = offsets[1];
15468 /* Swap the instructions such that lower memory is accessed first. */
15469 std::swap (operands[0], operands[1]);
15470 std::swap (operands[2], operands[3]);
15471 if (const_store)
15472 std::swap (operands[4], operands[5]);
15474 else
15476 gap = offsets[1] - offsets[0];
15477 offset = offsets[0];
15480 /* Make sure accesses are to consecutive memory locations. */
15481 if (gap != 4)
15482 return false;
15484 /* Make sure we generate legal instructions. */
15485 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15486 false, load))
15487 return true;
15489 /* In Thumb state, where registers are almost unconstrained, there
15490 is little hope to fix it. */
15491 if (TARGET_THUMB2)
15492 return false;
15494 if (load && commute)
15496 /* Try reordering registers. */
15497 std::swap (operands[0], operands[1]);
15498 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15499 false, load))
15500 return true;
15503 if (const_store)
15505 /* If input registers are dead after this pattern, they can be
15506 reordered or replaced by other registers that are free in the
15507 current pattern. */
15508 if (!peep2_reg_dead_p (4, operands[0])
15509 || !peep2_reg_dead_p (4, operands[1]))
15510 return false;
15512 /* Try to reorder the input registers. */
15513 /* For example, the code
15514 mov r0, 0
15515 mov r1, 1
15516 str r1, [r2]
15517 str r0, [r2, #4]
15518 can be transformed into
15519 mov r1, 0
15520 mov r0, 1
15521 strd r0, [r2]
15523 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15524 false, false))
15526 std::swap (operands[0], operands[1]);
15527 return true;
15530 /* Try to find a free DI register. */
15531 CLEAR_HARD_REG_SET (regset);
15532 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15533 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15534 while (true)
15536 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15537 if (tmp == NULL_RTX)
15538 return false;
15540 /* DREG must be an even-numbered register in DImode.
15541 Split it into SI registers. */
15542 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15543 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15544 gcc_assert (operands[0] != NULL_RTX);
15545 gcc_assert (operands[1] != NULL_RTX);
15546 gcc_assert (REGNO (operands[0]) % 2 == 0);
15547 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15549 return (operands_ok_ldrd_strd (operands[0], operands[1],
15550 base, offset,
15551 false, load));
15555 return false;
15561 /* Print a symbolic form of X to the debug file, F. */
15562 static void
15563 arm_print_value (FILE *f, rtx x)
15565 switch (GET_CODE (x))
15567 case CONST_INT:
15568 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15569 return;
15571 case CONST_DOUBLE:
15572 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15573 return;
15575 case CONST_VECTOR:
15577 int i;
15579 fprintf (f, "<");
15580 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15582 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15583 if (i < (CONST_VECTOR_NUNITS (x) - 1))
15584 fputc (',', f);
15586 fprintf (f, ">");
15588 return;
15590 case CONST_STRING:
15591 fprintf (f, "\"%s\"", XSTR (x, 0));
15592 return;
15594 case SYMBOL_REF:
15595 fprintf (f, "`%s'", XSTR (x, 0));
15596 return;
15598 case LABEL_REF:
15599 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
15600 return;
15602 case CONST:
15603 arm_print_value (f, XEXP (x, 0));
15604 return;
15606 case PLUS:
15607 arm_print_value (f, XEXP (x, 0));
15608 fprintf (f, "+");
15609 arm_print_value (f, XEXP (x, 1));
15610 return;
15612 case PC:
15613 fprintf (f, "pc");
15614 return;
15616 default:
15617 fprintf (f, "????");
15618 return;
15622 /* Routines for manipulation of the constant pool. */
15624 /* Arm instructions cannot load a large constant directly into a
15625 register; they have to come from a pc relative load. The constant
15626 must therefore be placed in the addressable range of the pc
15627 relative load. Depending on the precise pc relative load
15628 instruction the range is somewhere between 256 bytes and 4k. This
15629 means that we often have to dump a constant inside a function, and
15630 generate code to branch around it.
15632 It is important to minimize this, since the branches will slow
15633 things down and make the code larger.
15635 Normally we can hide the table after an existing unconditional
15636 branch so that there is no interruption of the flow, but in the
15637 worst case the code looks like this:
15639 ldr rn, L1
15641 b L2
15642 align
15643 L1: .long value
15647 ldr rn, L3
15649 b L4
15650 align
15651 L3: .long value
15655 We fix this by performing a scan after scheduling, which notices
15656 which instructions need to have their operands fetched from the
15657 constant table and builds the table.
15659 The algorithm starts by building a table of all the constants that
15660 need fixing up and all the natural barriers in the function (places
15661 where a constant table can be dropped without breaking the flow).
15662 For each fixup we note how far the pc-relative replacement will be
15663 able to reach and the offset of the instruction into the function.
15665 Having built the table we then group the fixes together to form
15666 tables that are as large as possible (subject to addressing
15667 constraints) and emit each table of constants after the last
15668 barrier that is within range of all the instructions in the group.
15669 If a group does not contain a barrier, then we forcibly create one
15670 by inserting a jump instruction into the flow. Once the table has
15671 been inserted, the insns are then modified to reference the
15672 relevant entry in the pool.
15674 Possible enhancements to the algorithm (not implemented) are:
15676 1) For some processors and object formats, there may be benefit in
15677 aligning the pools to the start of cache lines; this alignment
15678 would need to be taken into account when calculating addressability
15679 of a pool. */
15681 /* These typedefs are located at the start of this file, so that
15682 they can be used in the prototypes there. This comment is to
15683 remind readers of that fact so that the following structures
15684 can be understood more easily.
15686 typedef struct minipool_node Mnode;
15687 typedef struct minipool_fixup Mfix; */
15689 struct minipool_node
15691 /* Doubly linked chain of entries. */
15692 Mnode * next;
15693 Mnode * prev;
15694 /* The maximum offset into the code that this entry can be placed. While
15695 pushing fixes for forward references, all entries are sorted in order
15696 of increasing max_address. */
15697 HOST_WIDE_INT max_address;
15698 /* Similarly for an entry inserted for a backwards ref. */
15699 HOST_WIDE_INT min_address;
15700 /* The number of fixes referencing this entry. This can become zero
15701 if we "unpush" an entry. In this case we ignore the entry when we
15702 come to emit the code. */
15703 int refcount;
15704 /* The offset from the start of the minipool. */
15705 HOST_WIDE_INT offset;
15706 /* The value in table. */
15707 rtx value;
15708 /* The mode of value. */
15709 machine_mode mode;
15710 /* The size of the value. With iWMMXt enabled
15711 sizes > 4 also imply an alignment of 8-bytes. */
15712 int fix_size;
15715 struct minipool_fixup
15717 Mfix * next;
15718 rtx_insn * insn;
15719 HOST_WIDE_INT address;
15720 rtx * loc;
15721 machine_mode mode;
15722 int fix_size;
15723 rtx value;
15724 Mnode * minipool;
15725 HOST_WIDE_INT forwards;
15726 HOST_WIDE_INT backwards;
15729 /* Fixes less than a word need padding out to a word boundary. */
15730 #define MINIPOOL_FIX_SIZE(mode) \
15731 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15733 static Mnode * minipool_vector_head;
15734 static Mnode * minipool_vector_tail;
15735 static rtx_code_label *minipool_vector_label;
15736 static int minipool_pad;
15738 /* The linked list of all minipool fixes required for this function. */
15739 Mfix * minipool_fix_head;
15740 Mfix * minipool_fix_tail;
15741 /* The fix entry for the current minipool, once it has been placed. */
15742 Mfix * minipool_barrier;
15744 #ifndef JUMP_TABLES_IN_TEXT_SECTION
15745 #define JUMP_TABLES_IN_TEXT_SECTION 0
15746 #endif
15748 static HOST_WIDE_INT
15749 get_jump_table_size (rtx_jump_table_data *insn)
15751 /* ADDR_VECs only take room if read-only data does into the text
15752 section. */
15753 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
15755 rtx body = PATTERN (insn);
15756 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
15757 HOST_WIDE_INT size;
15758 HOST_WIDE_INT modesize;
15760 modesize = GET_MODE_SIZE (GET_MODE (body));
15761 size = modesize * XVECLEN (body, elt);
15762 switch (modesize)
15764 case 1:
15765 /* Round up size of TBB table to a halfword boundary. */
15766 size = (size + 1) & ~HOST_WIDE_INT_1;
15767 break;
15768 case 2:
15769 /* No padding necessary for TBH. */
15770 break;
15771 case 4:
15772 /* Add two bytes for alignment on Thumb. */
15773 if (TARGET_THUMB)
15774 size += 2;
15775 break;
15776 default:
15777 gcc_unreachable ();
15779 return size;
15782 return 0;
15785 /* Return the maximum amount of padding that will be inserted before
15786 label LABEL. */
15788 static HOST_WIDE_INT
15789 get_label_padding (rtx label)
15791 HOST_WIDE_INT align, min_insn_size;
15793 align = 1 << label_to_alignment (label);
15794 min_insn_size = TARGET_THUMB ? 2 : 4;
15795 return align > min_insn_size ? align - min_insn_size : 0;
15798 /* Move a minipool fix MP from its current location to before MAX_MP.
15799 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
15800 constraints may need updating. */
15801 static Mnode *
15802 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
15803 HOST_WIDE_INT max_address)
15805 /* The code below assumes these are different. */
15806 gcc_assert (mp != max_mp);
15808 if (max_mp == NULL)
15810 if (max_address < mp->max_address)
15811 mp->max_address = max_address;
15813 else
15815 if (max_address > max_mp->max_address - mp->fix_size)
15816 mp->max_address = max_mp->max_address - mp->fix_size;
15817 else
15818 mp->max_address = max_address;
15820 /* Unlink MP from its current position. Since max_mp is non-null,
15821 mp->prev must be non-null. */
15822 mp->prev->next = mp->next;
15823 if (mp->next != NULL)
15824 mp->next->prev = mp->prev;
15825 else
15826 minipool_vector_tail = mp->prev;
15828 /* Re-insert it before MAX_MP. */
15829 mp->next = max_mp;
15830 mp->prev = max_mp->prev;
15831 max_mp->prev = mp;
15833 if (mp->prev != NULL)
15834 mp->prev->next = mp;
15835 else
15836 minipool_vector_head = mp;
15839 /* Save the new entry. */
15840 max_mp = mp;
15842 /* Scan over the preceding entries and adjust their addresses as
15843 required. */
15844 while (mp->prev != NULL
15845 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
15847 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
15848 mp = mp->prev;
15851 return max_mp;
15854 /* Add a constant to the minipool for a forward reference. Returns the
15855 node added or NULL if the constant will not fit in this pool. */
15856 static Mnode *
15857 add_minipool_forward_ref (Mfix *fix)
15859 /* If set, max_mp is the first pool_entry that has a lower
15860 constraint than the one we are trying to add. */
15861 Mnode * max_mp = NULL;
15862 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
15863 Mnode * mp;
15865 /* If the minipool starts before the end of FIX->INSN then this FIX
15866 can not be placed into the current pool. Furthermore, adding the
15867 new constant pool entry may cause the pool to start FIX_SIZE bytes
15868 earlier. */
15869 if (minipool_vector_head &&
15870 (fix->address + get_attr_length (fix->insn)
15871 >= minipool_vector_head->max_address - fix->fix_size))
15872 return NULL;
15874 /* Scan the pool to see if a constant with the same value has
15875 already been added. While we are doing this, also note the
15876 location where we must insert the constant if it doesn't already
15877 exist. */
15878 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
15880 if (GET_CODE (fix->value) == GET_CODE (mp->value)
15881 && fix->mode == mp->mode
15882 && (!LABEL_P (fix->value)
15883 || (CODE_LABEL_NUMBER (fix->value)
15884 == CODE_LABEL_NUMBER (mp->value)))
15885 && rtx_equal_p (fix->value, mp->value))
15887 /* More than one fix references this entry. */
15888 mp->refcount++;
15889 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
15892 /* Note the insertion point if necessary. */
15893 if (max_mp == NULL
15894 && mp->max_address > max_address)
15895 max_mp = mp;
15897 /* If we are inserting an 8-bytes aligned quantity and
15898 we have not already found an insertion point, then
15899 make sure that all such 8-byte aligned quantities are
15900 placed at the start of the pool. */
15901 if (ARM_DOUBLEWORD_ALIGN
15902 && max_mp == NULL
15903 && fix->fix_size >= 8
15904 && mp->fix_size < 8)
15906 max_mp = mp;
15907 max_address = mp->max_address;
15911 /* The value is not currently in the minipool, so we need to create
15912 a new entry for it. If MAX_MP is NULL, the entry will be put on
15913 the end of the list since the placement is less constrained than
15914 any existing entry. Otherwise, we insert the new fix before
15915 MAX_MP and, if necessary, adjust the constraints on the other
15916 entries. */
15917 mp = XNEW (Mnode);
15918 mp->fix_size = fix->fix_size;
15919 mp->mode = fix->mode;
15920 mp->value = fix->value;
15921 mp->refcount = 1;
15922 /* Not yet required for a backwards ref. */
15923 mp->min_address = -65536;
15925 if (max_mp == NULL)
15927 mp->max_address = max_address;
15928 mp->next = NULL;
15929 mp->prev = minipool_vector_tail;
15931 if (mp->prev == NULL)
15933 minipool_vector_head = mp;
15934 minipool_vector_label = gen_label_rtx ();
15936 else
15937 mp->prev->next = mp;
15939 minipool_vector_tail = mp;
15941 else
15943 if (max_address > max_mp->max_address - mp->fix_size)
15944 mp->max_address = max_mp->max_address - mp->fix_size;
15945 else
15946 mp->max_address = max_address;
15948 mp->next = max_mp;
15949 mp->prev = max_mp->prev;
15950 max_mp->prev = mp;
15951 if (mp->prev != NULL)
15952 mp->prev->next = mp;
15953 else
15954 minipool_vector_head = mp;
15957 /* Save the new entry. */
15958 max_mp = mp;
15960 /* Scan over the preceding entries and adjust their addresses as
15961 required. */
15962 while (mp->prev != NULL
15963 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
15965 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
15966 mp = mp->prev;
15969 return max_mp;
15972 static Mnode *
15973 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
15974 HOST_WIDE_INT min_address)
15976 HOST_WIDE_INT offset;
15978 /* The code below assumes these are different. */
15979 gcc_assert (mp != min_mp);
15981 if (min_mp == NULL)
15983 if (min_address > mp->min_address)
15984 mp->min_address = min_address;
15986 else
15988 /* We will adjust this below if it is too loose. */
15989 mp->min_address = min_address;
15991 /* Unlink MP from its current position. Since min_mp is non-null,
15992 mp->next must be non-null. */
15993 mp->next->prev = mp->prev;
15994 if (mp->prev != NULL)
15995 mp->prev->next = mp->next;
15996 else
15997 minipool_vector_head = mp->next;
15999 /* Reinsert it after MIN_MP. */
16000 mp->prev = min_mp;
16001 mp->next = min_mp->next;
16002 min_mp->next = mp;
16003 if (mp->next != NULL)
16004 mp->next->prev = mp;
16005 else
16006 minipool_vector_tail = mp;
16009 min_mp = mp;
16011 offset = 0;
16012 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16014 mp->offset = offset;
16015 if (mp->refcount > 0)
16016 offset += mp->fix_size;
16018 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16019 mp->next->min_address = mp->min_address + mp->fix_size;
16022 return min_mp;
16025 /* Add a constant to the minipool for a backward reference. Returns the
16026 node added or NULL if the constant will not fit in this pool.
16028 Note that the code for insertion for a backwards reference can be
16029 somewhat confusing because the calculated offsets for each fix do
16030 not take into account the size of the pool (which is still under
16031 construction. */
16032 static Mnode *
16033 add_minipool_backward_ref (Mfix *fix)
16035 /* If set, min_mp is the last pool_entry that has a lower constraint
16036 than the one we are trying to add. */
16037 Mnode *min_mp = NULL;
16038 /* This can be negative, since it is only a constraint. */
16039 HOST_WIDE_INT min_address = fix->address - fix->backwards;
16040 Mnode *mp;
16042 /* If we can't reach the current pool from this insn, or if we can't
16043 insert this entry at the end of the pool without pushing other
16044 fixes out of range, then we don't try. This ensures that we
16045 can't fail later on. */
16046 if (min_address >= minipool_barrier->address
16047 || (minipool_vector_tail->min_address + fix->fix_size
16048 >= minipool_barrier->address))
16049 return NULL;
16051 /* Scan the pool to see if a constant with the same value has
16052 already been added. While we are doing this, also note the
16053 location where we must insert the constant if it doesn't already
16054 exist. */
16055 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16057 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16058 && fix->mode == mp->mode
16059 && (!LABEL_P (fix->value)
16060 || (CODE_LABEL_NUMBER (fix->value)
16061 == CODE_LABEL_NUMBER (mp->value)))
16062 && rtx_equal_p (fix->value, mp->value)
16063 /* Check that there is enough slack to move this entry to the
16064 end of the table (this is conservative). */
16065 && (mp->max_address
16066 > (minipool_barrier->address
16067 + minipool_vector_tail->offset
16068 + minipool_vector_tail->fix_size)))
16070 mp->refcount++;
16071 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16074 if (min_mp != NULL)
16075 mp->min_address += fix->fix_size;
16076 else
16078 /* Note the insertion point if necessary. */
16079 if (mp->min_address < min_address)
16081 /* For now, we do not allow the insertion of 8-byte alignment
16082 requiring nodes anywhere but at the start of the pool. */
16083 if (ARM_DOUBLEWORD_ALIGN
16084 && fix->fix_size >= 8 && mp->fix_size < 8)
16085 return NULL;
16086 else
16087 min_mp = mp;
16089 else if (mp->max_address
16090 < minipool_barrier->address + mp->offset + fix->fix_size)
16092 /* Inserting before this entry would push the fix beyond
16093 its maximum address (which can happen if we have
16094 re-located a forwards fix); force the new fix to come
16095 after it. */
16096 if (ARM_DOUBLEWORD_ALIGN
16097 && fix->fix_size >= 8 && mp->fix_size < 8)
16098 return NULL;
16099 else
16101 min_mp = mp;
16102 min_address = mp->min_address + fix->fix_size;
16105 /* Do not insert a non-8-byte aligned quantity before 8-byte
16106 aligned quantities. */
16107 else if (ARM_DOUBLEWORD_ALIGN
16108 && fix->fix_size < 8
16109 && mp->fix_size >= 8)
16111 min_mp = mp;
16112 min_address = mp->min_address + fix->fix_size;
16117 /* We need to create a new entry. */
16118 mp = XNEW (Mnode);
16119 mp->fix_size = fix->fix_size;
16120 mp->mode = fix->mode;
16121 mp->value = fix->value;
16122 mp->refcount = 1;
16123 mp->max_address = minipool_barrier->address + 65536;
16125 mp->min_address = min_address;
16127 if (min_mp == NULL)
16129 mp->prev = NULL;
16130 mp->next = minipool_vector_head;
16132 if (mp->next == NULL)
16134 minipool_vector_tail = mp;
16135 minipool_vector_label = gen_label_rtx ();
16137 else
16138 mp->next->prev = mp;
16140 minipool_vector_head = mp;
16142 else
16144 mp->next = min_mp->next;
16145 mp->prev = min_mp;
16146 min_mp->next = mp;
16148 if (mp->next != NULL)
16149 mp->next->prev = mp;
16150 else
16151 minipool_vector_tail = mp;
16154 /* Save the new entry. */
16155 min_mp = mp;
16157 if (mp->prev)
16158 mp = mp->prev;
16159 else
16160 mp->offset = 0;
16162 /* Scan over the following entries and adjust their offsets. */
16163 while (mp->next != NULL)
16165 if (mp->next->min_address < mp->min_address + mp->fix_size)
16166 mp->next->min_address = mp->min_address + mp->fix_size;
16168 if (mp->refcount)
16169 mp->next->offset = mp->offset + mp->fix_size;
16170 else
16171 mp->next->offset = mp->offset;
16173 mp = mp->next;
16176 return min_mp;
16179 static void
16180 assign_minipool_offsets (Mfix *barrier)
16182 HOST_WIDE_INT offset = 0;
16183 Mnode *mp;
16185 minipool_barrier = barrier;
16187 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16189 mp->offset = offset;
16191 if (mp->refcount > 0)
16192 offset += mp->fix_size;
16196 /* Output the literal table */
16197 static void
16198 dump_minipool (rtx_insn *scan)
16200 Mnode * mp;
16201 Mnode * nmp;
16202 int align64 = 0;
16204 if (ARM_DOUBLEWORD_ALIGN)
16205 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16206 if (mp->refcount > 0 && mp->fix_size >= 8)
16208 align64 = 1;
16209 break;
16212 if (dump_file)
16213 fprintf (dump_file,
16214 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16215 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16217 scan = emit_label_after (gen_label_rtx (), scan);
16218 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16219 scan = emit_label_after (minipool_vector_label, scan);
16221 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16223 if (mp->refcount > 0)
16225 if (dump_file)
16227 fprintf (dump_file,
16228 ";; Offset %u, min %ld, max %ld ",
16229 (unsigned) mp->offset, (unsigned long) mp->min_address,
16230 (unsigned long) mp->max_address);
16231 arm_print_value (dump_file, mp->value);
16232 fputc ('\n', dump_file);
16235 rtx val = copy_rtx (mp->value);
16237 switch (GET_MODE_SIZE (mp->mode))
16239 #ifdef HAVE_consttable_1
16240 case 1:
16241 scan = emit_insn_after (gen_consttable_1 (val), scan);
16242 break;
16244 #endif
16245 #ifdef HAVE_consttable_2
16246 case 2:
16247 scan = emit_insn_after (gen_consttable_2 (val), scan);
16248 break;
16250 #endif
16251 #ifdef HAVE_consttable_4
16252 case 4:
16253 scan = emit_insn_after (gen_consttable_4 (val), scan);
16254 break;
16256 #endif
16257 #ifdef HAVE_consttable_8
16258 case 8:
16259 scan = emit_insn_after (gen_consttable_8 (val), scan);
16260 break;
16262 #endif
16263 #ifdef HAVE_consttable_16
16264 case 16:
16265 scan = emit_insn_after (gen_consttable_16 (val), scan);
16266 break;
16268 #endif
16269 default:
16270 gcc_unreachable ();
16274 nmp = mp->next;
16275 free (mp);
16278 minipool_vector_head = minipool_vector_tail = NULL;
16279 scan = emit_insn_after (gen_consttable_end (), scan);
16280 scan = emit_barrier_after (scan);
16283 /* Return the cost of forcibly inserting a barrier after INSN. */
16284 static int
16285 arm_barrier_cost (rtx_insn *insn)
16287 /* Basing the location of the pool on the loop depth is preferable,
16288 but at the moment, the basic block information seems to be
16289 corrupt by this stage of the compilation. */
16290 int base_cost = 50;
16291 rtx_insn *next = next_nonnote_insn (insn);
16293 if (next != NULL && LABEL_P (next))
16294 base_cost -= 20;
16296 switch (GET_CODE (insn))
16298 case CODE_LABEL:
16299 /* It will always be better to place the table before the label, rather
16300 than after it. */
16301 return 50;
16303 case INSN:
16304 case CALL_INSN:
16305 return base_cost;
16307 case JUMP_INSN:
16308 return base_cost - 10;
16310 default:
16311 return base_cost + 10;
16315 /* Find the best place in the insn stream in the range
16316 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16317 Create the barrier by inserting a jump and add a new fix entry for
16318 it. */
16319 static Mfix *
16320 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16322 HOST_WIDE_INT count = 0;
16323 rtx_barrier *barrier;
16324 rtx_insn *from = fix->insn;
16325 /* The instruction after which we will insert the jump. */
16326 rtx_insn *selected = NULL;
16327 int selected_cost;
16328 /* The address at which the jump instruction will be placed. */
16329 HOST_WIDE_INT selected_address;
16330 Mfix * new_fix;
16331 HOST_WIDE_INT max_count = max_address - fix->address;
16332 rtx_code_label *label = gen_label_rtx ();
16334 selected_cost = arm_barrier_cost (from);
16335 selected_address = fix->address;
16337 while (from && count < max_count)
16339 rtx_jump_table_data *tmp;
16340 int new_cost;
16342 /* This code shouldn't have been called if there was a natural barrier
16343 within range. */
16344 gcc_assert (!BARRIER_P (from));
16346 /* Count the length of this insn. This must stay in sync with the
16347 code that pushes minipool fixes. */
16348 if (LABEL_P (from))
16349 count += get_label_padding (from);
16350 else
16351 count += get_attr_length (from);
16353 /* If there is a jump table, add its length. */
16354 if (tablejump_p (from, NULL, &tmp))
16356 count += get_jump_table_size (tmp);
16358 /* Jump tables aren't in a basic block, so base the cost on
16359 the dispatch insn. If we select this location, we will
16360 still put the pool after the table. */
16361 new_cost = arm_barrier_cost (from);
16363 if (count < max_count
16364 && (!selected || new_cost <= selected_cost))
16366 selected = tmp;
16367 selected_cost = new_cost;
16368 selected_address = fix->address + count;
16371 /* Continue after the dispatch table. */
16372 from = NEXT_INSN (tmp);
16373 continue;
16376 new_cost = arm_barrier_cost (from);
16378 if (count < max_count
16379 && (!selected || new_cost <= selected_cost))
16381 selected = from;
16382 selected_cost = new_cost;
16383 selected_address = fix->address + count;
16386 from = NEXT_INSN (from);
16389 /* Make sure that we found a place to insert the jump. */
16390 gcc_assert (selected);
16392 /* Make sure we do not split a call and its corresponding
16393 CALL_ARG_LOCATION note. */
16394 if (CALL_P (selected))
16396 rtx_insn *next = NEXT_INSN (selected);
16397 if (next && NOTE_P (next)
16398 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16399 selected = next;
16402 /* Create a new JUMP_INSN that branches around a barrier. */
16403 from = emit_jump_insn_after (gen_jump (label), selected);
16404 JUMP_LABEL (from) = label;
16405 barrier = emit_barrier_after (from);
16406 emit_label_after (label, barrier);
16408 /* Create a minipool barrier entry for the new barrier. */
16409 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16410 new_fix->insn = barrier;
16411 new_fix->address = selected_address;
16412 new_fix->next = fix->next;
16413 fix->next = new_fix;
16415 return new_fix;
16418 /* Record that there is a natural barrier in the insn stream at
16419 ADDRESS. */
16420 static void
16421 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
16423 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16425 fix->insn = insn;
16426 fix->address = address;
16428 fix->next = NULL;
16429 if (minipool_fix_head != NULL)
16430 minipool_fix_tail->next = fix;
16431 else
16432 minipool_fix_head = fix;
16434 minipool_fix_tail = fix;
16437 /* Record INSN, which will need fixing up to load a value from the
16438 minipool. ADDRESS is the offset of the insn since the start of the
16439 function; LOC is a pointer to the part of the insn which requires
16440 fixing; VALUE is the constant that must be loaded, which is of type
16441 MODE. */
16442 static void
16443 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
16444 machine_mode mode, rtx value)
16446 gcc_assert (!arm_disable_literal_pool);
16447 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16449 fix->insn = insn;
16450 fix->address = address;
16451 fix->loc = loc;
16452 fix->mode = mode;
16453 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16454 fix->value = value;
16455 fix->forwards = get_attr_pool_range (insn);
16456 fix->backwards = get_attr_neg_pool_range (insn);
16457 fix->minipool = NULL;
16459 /* If an insn doesn't have a range defined for it, then it isn't
16460 expecting to be reworked by this code. Better to stop now than
16461 to generate duff assembly code. */
16462 gcc_assert (fix->forwards || fix->backwards);
16464 /* If an entry requires 8-byte alignment then assume all constant pools
16465 require 4 bytes of padding. Trying to do this later on a per-pool
16466 basis is awkward because existing pool entries have to be modified. */
16467 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16468 minipool_pad = 4;
16470 if (dump_file)
16472 fprintf (dump_file,
16473 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16474 GET_MODE_NAME (mode),
16475 INSN_UID (insn), (unsigned long) address,
16476 -1 * (long)fix->backwards, (long)fix->forwards);
16477 arm_print_value (dump_file, fix->value);
16478 fprintf (dump_file, "\n");
16481 /* Add it to the chain of fixes. */
16482 fix->next = NULL;
16484 if (minipool_fix_head != NULL)
16485 minipool_fix_tail->next = fix;
16486 else
16487 minipool_fix_head = fix;
16489 minipool_fix_tail = fix;
16492 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16493 Returns the number of insns needed, or 99 if we always want to synthesize
16494 the value. */
16496 arm_max_const_double_inline_cost ()
16498 return ((optimize_size || arm_ld_sched) ? 3 : 4);
16501 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16502 Returns the number of insns needed, or 99 if we don't know how to
16503 do it. */
16505 arm_const_double_inline_cost (rtx val)
16507 rtx lowpart, highpart;
16508 machine_mode mode;
16510 mode = GET_MODE (val);
16512 if (mode == VOIDmode)
16513 mode = DImode;
16515 gcc_assert (GET_MODE_SIZE (mode) == 8);
16517 lowpart = gen_lowpart (SImode, val);
16518 highpart = gen_highpart_mode (SImode, mode, val);
16520 gcc_assert (CONST_INT_P (lowpart));
16521 gcc_assert (CONST_INT_P (highpart));
16523 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16524 NULL_RTX, NULL_RTX, 0, 0)
16525 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16526 NULL_RTX, NULL_RTX, 0, 0));
16529 /* Cost of loading a SImode constant. */
16530 static inline int
16531 arm_const_inline_cost (enum rtx_code code, rtx val)
16533 return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
16534 NULL_RTX, NULL_RTX, 1, 0);
16537 /* Return true if it is worthwhile to split a 64-bit constant into two
16538 32-bit operations. This is the case if optimizing for size, or
16539 if we have load delay slots, or if one 32-bit part can be done with
16540 a single data operation. */
16541 bool
16542 arm_const_double_by_parts (rtx val)
16544 machine_mode mode = GET_MODE (val);
16545 rtx part;
16547 if (optimize_size || arm_ld_sched)
16548 return true;
16550 if (mode == VOIDmode)
16551 mode = DImode;
16553 part = gen_highpart_mode (SImode, mode, val);
16555 gcc_assert (CONST_INT_P (part));
16557 if (const_ok_for_arm (INTVAL (part))
16558 || const_ok_for_arm (~INTVAL (part)))
16559 return true;
16561 part = gen_lowpart (SImode, val);
16563 gcc_assert (CONST_INT_P (part));
16565 if (const_ok_for_arm (INTVAL (part))
16566 || const_ok_for_arm (~INTVAL (part)))
16567 return true;
16569 return false;
16572 /* Return true if it is possible to inline both the high and low parts
16573 of a 64-bit constant into 32-bit data processing instructions. */
16574 bool
16575 arm_const_double_by_immediates (rtx val)
16577 machine_mode mode = GET_MODE (val);
16578 rtx part;
16580 if (mode == VOIDmode)
16581 mode = DImode;
16583 part = gen_highpart_mode (SImode, mode, val);
16585 gcc_assert (CONST_INT_P (part));
16587 if (!const_ok_for_arm (INTVAL (part)))
16588 return false;
16590 part = gen_lowpart (SImode, val);
16592 gcc_assert (CONST_INT_P (part));
16594 if (!const_ok_for_arm (INTVAL (part)))
16595 return false;
16597 return true;
16600 /* Scan INSN and note any of its operands that need fixing.
16601 If DO_PUSHES is false we do not actually push any of the fixups
16602 needed. */
16603 static void
16604 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
16606 int opno;
16608 extract_constrain_insn (insn);
16610 if (recog_data.n_alternatives == 0)
16611 return;
16613 /* Fill in recog_op_alt with information about the constraints of
16614 this insn. */
16615 preprocess_constraints (insn);
16617 const operand_alternative *op_alt = which_op_alt ();
16618 for (opno = 0; opno < recog_data.n_operands; opno++)
16620 /* Things we need to fix can only occur in inputs. */
16621 if (recog_data.operand_type[opno] != OP_IN)
16622 continue;
16624 /* If this alternative is a memory reference, then any mention
16625 of constants in this alternative is really to fool reload
16626 into allowing us to accept one there. We need to fix them up
16627 now so that we output the right code. */
16628 if (op_alt[opno].memory_ok)
16630 rtx op = recog_data.operand[opno];
16632 if (CONSTANT_P (op))
16634 if (do_pushes)
16635 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
16636 recog_data.operand_mode[opno], op);
16638 else if (MEM_P (op)
16639 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
16640 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
16642 if (do_pushes)
16644 rtx cop = avoid_constant_pool_reference (op);
16646 /* Casting the address of something to a mode narrower
16647 than a word can cause avoid_constant_pool_reference()
16648 to return the pool reference itself. That's no good to
16649 us here. Lets just hope that we can use the
16650 constant pool value directly. */
16651 if (op == cop)
16652 cop = get_pool_constant (XEXP (op, 0));
16654 push_minipool_fix (insn, address,
16655 recog_data.operand_loc[opno],
16656 recog_data.operand_mode[opno], cop);
16663 return;
16666 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
16667 and unions in the context of ARMv8-M Security Extensions. It is used as a
16668 helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
16669 functions. The PADDING_BITS_TO_CLEAR pointer can be the base to either one
16670 or four masks, depending on whether it is being computed for a
16671 'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
16672 respectively. The tree for the type of the argument or a field within an
16673 argument is passed in ARG_TYPE, the current register this argument or field
16674 starts in is kept in the pointer REGNO and updated accordingly, the bit this
16675 argument or field starts at is passed in STARTING_BIT and the last used bit
16676 is kept in LAST_USED_BIT which is also updated accordingly. */
16678 static unsigned HOST_WIDE_INT
16679 comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
16680 uint32_t * padding_bits_to_clear,
16681 unsigned starting_bit, int * last_used_bit)
16684 unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0;
16686 if (TREE_CODE (arg_type) == RECORD_TYPE)
16688 unsigned current_bit = starting_bit;
16689 tree field;
16690 long int offset, size;
16693 field = TYPE_FIELDS (arg_type);
16694 while (field)
16696 /* The offset within a structure is always an offset from
16697 the start of that structure. Make sure we take that into the
16698 calculation of the register based offset that we use here. */
16699 offset = starting_bit;
16700 offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0);
16701 offset %= 32;
16703 /* This is the actual size of the field, for bitfields this is the
16704 bitfield width and not the container size. */
16705 size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16707 if (*last_used_bit != offset)
16709 if (offset < *last_used_bit)
16711 /* This field's offset is before the 'last_used_bit', that
16712 means this field goes on the next register. So we need to
16713 pad the rest of the current register and increase the
16714 register number. */
16715 uint32_t mask;
16716 mask = ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit);
16717 mask++;
16719 padding_bits_to_clear[*regno] |= mask;
16720 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16721 (*regno)++;
16723 else
16725 /* Otherwise we pad the bits between the last field's end and
16726 the start of the new field. */
16727 uint32_t mask;
16729 mask = ((uint32_t)-1) >> (32 - offset);
16730 mask -= ((uint32_t) 1 << *last_used_bit) - 1;
16731 padding_bits_to_clear[*regno] |= mask;
16733 current_bit = offset;
16736 /* Calculate further padding bits for inner structs/unions too. */
16737 if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)))
16739 *last_used_bit = current_bit;
16740 not_to_clear_reg_mask
16741 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno,
16742 padding_bits_to_clear, offset,
16743 last_used_bit);
16745 else
16747 /* Update 'current_bit' with this field's size. If the
16748 'current_bit' lies in a subsequent register, update 'regno' and
16749 reset 'current_bit' to point to the current bit in that new
16750 register. */
16751 current_bit += size;
16752 while (current_bit >= 32)
16754 current_bit-=32;
16755 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16756 (*regno)++;
16758 *last_used_bit = current_bit;
16761 field = TREE_CHAIN (field);
16763 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16765 else if (TREE_CODE (arg_type) == UNION_TYPE)
16767 tree field, field_t;
16768 int i, regno_t, field_size;
16769 int max_reg = -1;
16770 int max_bit = -1;
16771 uint32_t mask;
16772 uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
16773 = {-1, -1, -1, -1};
16775 /* To compute the padding bits in a union we only consider bits as
16776 padding bits if they are always either a padding bit or fall outside a
16777 fields size for all fields in the union. */
16778 field = TYPE_FIELDS (arg_type);
16779 while (field)
16781 uint32_t padding_bits_to_clear_t[NUM_ARG_REGS]
16782 = {0U, 0U, 0U, 0U};
16783 int last_used_bit_t = *last_used_bit;
16784 regno_t = *regno;
16785 field_t = TREE_TYPE (field);
16787 /* If the field's type is either a record or a union make sure to
16788 compute their padding bits too. */
16789 if (RECORD_OR_UNION_TYPE_P (field_t))
16790 not_to_clear_reg_mask
16791 |= comp_not_to_clear_mask_str_un (field_t, &regno_t,
16792 &padding_bits_to_clear_t[0],
16793 starting_bit, &last_used_bit_t);
16794 else
16796 field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16797 regno_t = (field_size / 32) + *regno;
16798 last_used_bit_t = (starting_bit + field_size) % 32;
16801 for (i = *regno; i < regno_t; i++)
16803 /* For all but the last register used by this field only keep the
16804 padding bits that were padding bits in this field. */
16805 padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i];
16808 /* For the last register, keep all padding bits that were padding
16809 bits in this field and any padding bits that are still valid
16810 as padding bits but fall outside of this field's size. */
16811 mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1;
16812 padding_bits_to_clear_res[regno_t]
16813 &= padding_bits_to_clear_t[regno_t] | mask;
16815 /* Update the maximum size of the fields in terms of registers used
16816 ('max_reg') and the 'last_used_bit' in said register. */
16817 if (max_reg < regno_t)
16819 max_reg = regno_t;
16820 max_bit = last_used_bit_t;
16822 else if (max_reg == regno_t && max_bit < last_used_bit_t)
16823 max_bit = last_used_bit_t;
16825 field = TREE_CHAIN (field);
16828 /* Update the current padding_bits_to_clear using the intersection of the
16829 padding bits of all the fields. */
16830 for (i=*regno; i < max_reg; i++)
16831 padding_bits_to_clear[i] |= padding_bits_to_clear_res[i];
16833 /* Do not keep trailing padding bits, we do not know yet whether this
16834 is the end of the argument. */
16835 mask = ((uint32_t) 1 << max_bit) - 1;
16836 padding_bits_to_clear[max_reg]
16837 |= padding_bits_to_clear_res[max_reg] & mask;
16839 *regno = max_reg;
16840 *last_used_bit = max_bit;
16842 else
16843 /* This function should only be used for structs and unions. */
16844 gcc_unreachable ();
16846 return not_to_clear_reg_mask;
16849 /* In the context of ARMv8-M Security Extensions, this function is used for both
16850 'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
16851 registers are used when returning or passing arguments, which is then
16852 returned as a mask. It will also compute a mask to indicate padding/unused
16853 bits for each of these registers, and passes this through the
16854 PADDING_BITS_TO_CLEAR pointer. The tree of the argument type is passed in
16855 ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
16856 the starting register used to pass this argument or return value is passed
16857 in REGNO. It makes use of 'comp_not_to_clear_mask_str_un' to compute these
16858 for struct and union types. */
16860 static unsigned HOST_WIDE_INT
16861 compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
16862 uint32_t * padding_bits_to_clear)
16865 int last_used_bit = 0;
16866 unsigned HOST_WIDE_INT not_to_clear_mask;
16868 if (RECORD_OR_UNION_TYPE_P (arg_type))
16870 not_to_clear_mask
16871 = comp_not_to_clear_mask_str_un (arg_type, &regno,
16872 padding_bits_to_clear, 0,
16873 &last_used_bit);
16876 /* If the 'last_used_bit' is not zero, that means we are still using a
16877 part of the last 'regno'. In such cases we must clear the trailing
16878 bits. Otherwise we are not using regno and we should mark it as to
16879 clear. */
16880 if (last_used_bit != 0)
16881 padding_bits_to_clear[regno]
16882 |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit) + 1;
16883 else
16884 not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno);
16886 else
16888 not_to_clear_mask = 0;
16889 /* We are not dealing with structs nor unions. So these arguments may be
16890 passed in floating point registers too. In some cases a BLKmode is
16891 used when returning or passing arguments in multiple VFP registers. */
16892 if (GET_MODE (arg_rtx) == BLKmode)
16894 int i, arg_regs;
16895 rtx reg;
16897 /* This should really only occur when dealing with the hard-float
16898 ABI. */
16899 gcc_assert (TARGET_HARD_FLOAT_ABI);
16901 for (i = 0; i < XVECLEN (arg_rtx, 0); i++)
16903 reg = XEXP (XVECEXP (arg_rtx, 0, i), 0);
16904 gcc_assert (REG_P (reg));
16906 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg);
16908 /* If we are dealing with DF mode, make sure we don't
16909 clear either of the registers it addresses. */
16910 arg_regs = ARM_NUM_REGS (GET_MODE (reg));
16911 if (arg_regs > 1)
16913 unsigned HOST_WIDE_INT mask;
16914 mask = HOST_WIDE_INT_1U << (REGNO (reg) + arg_regs);
16915 mask -= HOST_WIDE_INT_1U << REGNO (reg);
16916 not_to_clear_mask |= mask;
16920 else
16922 /* Otherwise we can rely on the MODE to determine how many registers
16923 are being used by this argument. */
16924 int arg_regs = ARM_NUM_REGS (GET_MODE (arg_rtx));
16925 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (arg_rtx);
16926 if (arg_regs > 1)
16928 unsigned HOST_WIDE_INT
16929 mask = HOST_WIDE_INT_1U << (REGNO (arg_rtx) + arg_regs);
16930 mask -= HOST_WIDE_INT_1U << REGNO (arg_rtx);
16931 not_to_clear_mask |= mask;
16936 return not_to_clear_mask;
16939 /* Clears caller saved registers not used to pass arguments before a
16940 cmse_nonsecure_call. Saving, clearing and restoring of callee saved
16941 registers is done in __gnu_cmse_nonsecure_call libcall.
16942 See libgcc/config/arm/cmse_nonsecure_call.S. */
16944 static void
16945 cmse_nonsecure_call_clear_caller_saved (void)
16947 basic_block bb;
16949 FOR_EACH_BB_FN (bb, cfun)
16951 rtx_insn *insn;
16953 FOR_BB_INSNS (bb, insn)
16955 uint64_t to_clear_mask, float_mask;
16956 rtx_insn *seq;
16957 rtx pat, call, unspec, reg, cleared_reg, tmp;
16958 unsigned int regno, maxregno;
16959 rtx address;
16960 CUMULATIVE_ARGS args_so_far_v;
16961 cumulative_args_t args_so_far;
16962 tree arg_type, fntype;
16963 bool using_r4, first_param = true;
16964 function_args_iterator args_iter;
16965 uint32_t padding_bits_to_clear[4] = {0U, 0U, 0U, 0U};
16966 uint32_t * padding_bits_to_clear_ptr = &padding_bits_to_clear[0];
16968 if (!NONDEBUG_INSN_P (insn))
16969 continue;
16971 if (!CALL_P (insn))
16972 continue;
16974 pat = PATTERN (insn);
16975 gcc_assert (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 0);
16976 call = XVECEXP (pat, 0, 0);
16978 /* Get the real call RTX if the insn sets a value, ie. returns. */
16979 if (GET_CODE (call) == SET)
16980 call = SET_SRC (call);
16982 /* Check if it is a cmse_nonsecure_call. */
16983 unspec = XEXP (call, 0);
16984 if (GET_CODE (unspec) != UNSPEC
16985 || XINT (unspec, 1) != UNSPEC_NONSECURE_MEM)
16986 continue;
16988 /* Determine the caller-saved registers we need to clear. */
16989 to_clear_mask = (1LL << (NUM_ARG_REGS)) - 1;
16990 maxregno = NUM_ARG_REGS - 1;
16991 /* Only look at the caller-saved floating point registers in case of
16992 -mfloat-abi=hard. For -mfloat-abi=softfp we will be using the
16993 lazy store and loads which clear both caller- and callee-saved
16994 registers. */
16995 if (TARGET_HARD_FLOAT_ABI)
16997 float_mask = (1LL << (D7_VFP_REGNUM + 1)) - 1;
16998 float_mask &= ~((1LL << FIRST_VFP_REGNUM) - 1);
16999 to_clear_mask |= float_mask;
17000 maxregno = D7_VFP_REGNUM;
17003 /* Make sure the register used to hold the function address is not
17004 cleared. */
17005 address = RTVEC_ELT (XVEC (unspec, 0), 0);
17006 gcc_assert (MEM_P (address));
17007 gcc_assert (REG_P (XEXP (address, 0)));
17008 to_clear_mask &= ~(1LL << REGNO (XEXP (address, 0)));
17010 /* Set basic block of call insn so that df rescan is performed on
17011 insns inserted here. */
17012 set_block_for_insn (insn, bb);
17013 df_set_flags (DF_DEFER_INSN_RESCAN);
17014 start_sequence ();
17016 /* Make sure the scheduler doesn't schedule other insns beyond
17017 here. */
17018 emit_insn (gen_blockage ());
17020 /* Walk through all arguments and clear registers appropriately.
17022 fntype = TREE_TYPE (MEM_EXPR (address));
17023 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX,
17024 NULL_TREE);
17025 args_so_far = pack_cumulative_args (&args_so_far_v);
17026 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
17028 rtx arg_rtx;
17029 machine_mode arg_mode = TYPE_MODE (arg_type);
17031 if (VOID_TYPE_P (arg_type))
17032 continue;
17034 if (!first_param)
17035 arm_function_arg_advance (args_so_far, arg_mode, arg_type,
17036 true);
17038 arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type,
17039 true);
17040 gcc_assert (REG_P (arg_rtx));
17041 to_clear_mask
17042 &= ~compute_not_to_clear_mask (arg_type, arg_rtx,
17043 REGNO (arg_rtx),
17044 padding_bits_to_clear_ptr);
17046 first_param = false;
17049 /* Clear padding bits where needed. */
17050 cleared_reg = XEXP (address, 0);
17051 reg = gen_rtx_REG (SImode, IP_REGNUM);
17052 using_r4 = false;
17053 for (regno = R0_REGNUM; regno < NUM_ARG_REGS; regno++)
17055 if (padding_bits_to_clear[regno] == 0)
17056 continue;
17058 /* If this is a Thumb-1 target copy the address of the function
17059 we are calling from 'r4' into 'ip' such that we can use r4 to
17060 clear the unused bits in the arguments. */
17061 if (TARGET_THUMB1 && !using_r4)
17063 using_r4 = true;
17064 reg = cleared_reg;
17065 emit_move_insn (gen_rtx_REG (SImode, IP_REGNUM),
17066 reg);
17069 tmp = GEN_INT ((((~padding_bits_to_clear[regno]) << 16u) >> 16u));
17070 emit_move_insn (reg, tmp);
17071 /* Also fill the top half of the negated
17072 padding_bits_to_clear. */
17073 if (((~padding_bits_to_clear[regno]) >> 16) > 0)
17075 tmp = GEN_INT ((~padding_bits_to_clear[regno]) >> 16);
17076 emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode, reg,
17077 GEN_INT (16),
17078 GEN_INT (16)),
17079 tmp));
17082 emit_insn (gen_andsi3 (gen_rtx_REG (SImode, regno),
17083 gen_rtx_REG (SImode, regno),
17084 reg));
17087 if (using_r4)
17088 emit_move_insn (cleared_reg,
17089 gen_rtx_REG (SImode, IP_REGNUM));
17091 /* We use right shift and left shift to clear the LSB of the address
17092 we jump to instead of using bic, to avoid having to use an extra
17093 register on Thumb-1. */
17094 tmp = gen_rtx_LSHIFTRT (SImode, cleared_reg, const1_rtx);
17095 emit_insn (gen_rtx_SET (cleared_reg, tmp));
17096 tmp = gen_rtx_ASHIFT (SImode, cleared_reg, const1_rtx);
17097 emit_insn (gen_rtx_SET (cleared_reg, tmp));
17099 /* Clearing all registers that leak before doing a non-secure
17100 call. */
17101 for (regno = R0_REGNUM; regno <= maxregno; regno++)
17103 if (!(to_clear_mask & (1LL << regno)))
17104 continue;
17106 /* If regno is an even vfp register and its successor is also to
17107 be cleared, use vmov. */
17108 if (IS_VFP_REGNUM (regno))
17110 if (TARGET_VFP_DOUBLE
17111 && VFP_REGNO_OK_FOR_DOUBLE (regno)
17112 && to_clear_mask & (1LL << (regno + 1)))
17113 emit_move_insn (gen_rtx_REG (DFmode, regno++),
17114 CONST0_RTX (DFmode));
17115 else
17116 emit_move_insn (gen_rtx_REG (SFmode, regno),
17117 CONST0_RTX (SFmode));
17119 else
17120 emit_move_insn (gen_rtx_REG (SImode, regno), cleared_reg);
17123 seq = get_insns ();
17124 end_sequence ();
17125 emit_insn_before (seq, insn);
17131 /* Rewrite move insn into subtract of 0 if the condition codes will
17132 be useful in next conditional jump insn. */
17134 static void
17135 thumb1_reorg (void)
17137 basic_block bb;
17139 FOR_EACH_BB_FN (bb, cfun)
17141 rtx dest, src;
17142 rtx cmp, op0, op1, set = NULL;
17143 rtx_insn *prev, *insn = BB_END (bb);
17144 bool insn_clobbered = false;
17146 while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17147 insn = PREV_INSN (insn);
17149 /* Find the last cbranchsi4_insn in basic block BB. */
17150 if (insn == BB_HEAD (bb)
17151 || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17152 continue;
17154 /* Get the register with which we are comparing. */
17155 cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
17156 op0 = XEXP (cmp, 0);
17157 op1 = XEXP (cmp, 1);
17159 /* Check that comparison is against ZERO. */
17160 if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
17161 continue;
17163 /* Find the first flag setting insn before INSN in basic block BB. */
17164 gcc_assert (insn != BB_HEAD (bb));
17165 for (prev = PREV_INSN (insn);
17166 (!insn_clobbered
17167 && prev != BB_HEAD (bb)
17168 && (NOTE_P (prev)
17169 || DEBUG_INSN_P (prev)
17170 || ((set = single_set (prev)) != NULL
17171 && get_attr_conds (prev) == CONDS_NOCOND)));
17172 prev = PREV_INSN (prev))
17174 if (reg_set_p (op0, prev))
17175 insn_clobbered = true;
17178 /* Skip if op0 is clobbered by insn other than prev. */
17179 if (insn_clobbered)
17180 continue;
17182 if (!set)
17183 continue;
17185 dest = SET_DEST (set);
17186 src = SET_SRC (set);
17187 if (!low_register_operand (dest, SImode)
17188 || !low_register_operand (src, SImode))
17189 continue;
17191 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17192 in INSN. Both src and dest of the move insn are checked. */
17193 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17195 dest = copy_rtx (dest);
17196 src = copy_rtx (src);
17197 src = gen_rtx_MINUS (SImode, src, const0_rtx);
17198 PATTERN (prev) = gen_rtx_SET (dest, src);
17199 INSN_CODE (prev) = -1;
17200 /* Set test register in INSN to dest. */
17201 XEXP (cmp, 0) = copy_rtx (dest);
17202 INSN_CODE (insn) = -1;
17207 /* Convert instructions to their cc-clobbering variant if possible, since
17208 that allows us to use smaller encodings. */
17210 static void
17211 thumb2_reorg (void)
17213 basic_block bb;
17214 regset_head live;
17216 INIT_REG_SET (&live);
17218 /* We are freeing block_for_insn in the toplev to keep compatibility
17219 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17220 compute_bb_for_insn ();
17221 df_analyze ();
17223 enum Convert_Action {SKIP, CONV, SWAP_CONV};
17225 FOR_EACH_BB_FN (bb, cfun)
17227 if ((current_tune->disparage_flag_setting_t16_encodings
17228 == tune_params::DISPARAGE_FLAGS_ALL)
17229 && optimize_bb_for_speed_p (bb))
17230 continue;
17232 rtx_insn *insn;
17233 Convert_Action action = SKIP;
17234 Convert_Action action_for_partial_flag_setting
17235 = ((current_tune->disparage_flag_setting_t16_encodings
17236 != tune_params::DISPARAGE_FLAGS_NEITHER)
17237 && optimize_bb_for_speed_p (bb))
17238 ? SKIP : CONV;
17240 COPY_REG_SET (&live, DF_LR_OUT (bb));
17241 df_simulate_initialize_backwards (bb, &live);
17242 FOR_BB_INSNS_REVERSE (bb, insn)
17244 if (NONJUMP_INSN_P (insn)
17245 && !REGNO_REG_SET_P (&live, CC_REGNUM)
17246 && GET_CODE (PATTERN (insn)) == SET)
17248 action = SKIP;
17249 rtx pat = PATTERN (insn);
17250 rtx dst = XEXP (pat, 0);
17251 rtx src = XEXP (pat, 1);
17252 rtx op0 = NULL_RTX, op1 = NULL_RTX;
17254 if (UNARY_P (src) || BINARY_P (src))
17255 op0 = XEXP (src, 0);
17257 if (BINARY_P (src))
17258 op1 = XEXP (src, 1);
17260 if (low_register_operand (dst, SImode))
17262 switch (GET_CODE (src))
17264 case PLUS:
17265 /* Adding two registers and storing the result
17266 in the first source is already a 16-bit
17267 operation. */
17268 if (rtx_equal_p (dst, op0)
17269 && register_operand (op1, SImode))
17270 break;
17272 if (low_register_operand (op0, SImode))
17274 /* ADDS <Rd>,<Rn>,<Rm> */
17275 if (low_register_operand (op1, SImode))
17276 action = CONV;
17277 /* ADDS <Rdn>,#<imm8> */
17278 /* SUBS <Rdn>,#<imm8> */
17279 else if (rtx_equal_p (dst, op0)
17280 && CONST_INT_P (op1)
17281 && IN_RANGE (INTVAL (op1), -255, 255))
17282 action = CONV;
17283 /* ADDS <Rd>,<Rn>,#<imm3> */
17284 /* SUBS <Rd>,<Rn>,#<imm3> */
17285 else if (CONST_INT_P (op1)
17286 && IN_RANGE (INTVAL (op1), -7, 7))
17287 action = CONV;
17289 /* ADCS <Rd>, <Rn> */
17290 else if (GET_CODE (XEXP (src, 0)) == PLUS
17291 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17292 && low_register_operand (XEXP (XEXP (src, 0), 1),
17293 SImode)
17294 && COMPARISON_P (op1)
17295 && cc_register (XEXP (op1, 0), VOIDmode)
17296 && maybe_get_arm_condition_code (op1) == ARM_CS
17297 && XEXP (op1, 1) == const0_rtx)
17298 action = CONV;
17299 break;
17301 case MINUS:
17302 /* RSBS <Rd>,<Rn>,#0
17303 Not handled here: see NEG below. */
17304 /* SUBS <Rd>,<Rn>,#<imm3>
17305 SUBS <Rdn>,#<imm8>
17306 Not handled here: see PLUS above. */
17307 /* SUBS <Rd>,<Rn>,<Rm> */
17308 if (low_register_operand (op0, SImode)
17309 && low_register_operand (op1, SImode))
17310 action = CONV;
17311 break;
17313 case MULT:
17314 /* MULS <Rdm>,<Rn>,<Rdm>
17315 As an exception to the rule, this is only used
17316 when optimizing for size since MULS is slow on all
17317 known implementations. We do not even want to use
17318 MULS in cold code, if optimizing for speed, so we
17319 test the global flag here. */
17320 if (!optimize_size)
17321 break;
17322 /* Fall through. */
17323 case AND:
17324 case IOR:
17325 case XOR:
17326 /* ANDS <Rdn>,<Rm> */
17327 if (rtx_equal_p (dst, op0)
17328 && low_register_operand (op1, SImode))
17329 action = action_for_partial_flag_setting;
17330 else if (rtx_equal_p (dst, op1)
17331 && low_register_operand (op0, SImode))
17332 action = action_for_partial_flag_setting == SKIP
17333 ? SKIP : SWAP_CONV;
17334 break;
17336 case ASHIFTRT:
17337 case ASHIFT:
17338 case LSHIFTRT:
17339 /* ASRS <Rdn>,<Rm> */
17340 /* LSRS <Rdn>,<Rm> */
17341 /* LSLS <Rdn>,<Rm> */
17342 if (rtx_equal_p (dst, op0)
17343 && low_register_operand (op1, SImode))
17344 action = action_for_partial_flag_setting;
17345 /* ASRS <Rd>,<Rm>,#<imm5> */
17346 /* LSRS <Rd>,<Rm>,#<imm5> */
17347 /* LSLS <Rd>,<Rm>,#<imm5> */
17348 else if (low_register_operand (op0, SImode)
17349 && CONST_INT_P (op1)
17350 && IN_RANGE (INTVAL (op1), 0, 31))
17351 action = action_for_partial_flag_setting;
17352 break;
17354 case ROTATERT:
17355 /* RORS <Rdn>,<Rm> */
17356 if (rtx_equal_p (dst, op0)
17357 && low_register_operand (op1, SImode))
17358 action = action_for_partial_flag_setting;
17359 break;
17361 case NOT:
17362 /* MVNS <Rd>,<Rm> */
17363 if (low_register_operand (op0, SImode))
17364 action = action_for_partial_flag_setting;
17365 break;
17367 case NEG:
17368 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17369 if (low_register_operand (op0, SImode))
17370 action = CONV;
17371 break;
17373 case CONST_INT:
17374 /* MOVS <Rd>,#<imm8> */
17375 if (CONST_INT_P (src)
17376 && IN_RANGE (INTVAL (src), 0, 255))
17377 action = action_for_partial_flag_setting;
17378 break;
17380 case REG:
17381 /* MOVS and MOV<c> with registers have different
17382 encodings, so are not relevant here. */
17383 break;
17385 default:
17386 break;
17390 if (action != SKIP)
17392 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17393 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17394 rtvec vec;
17396 if (action == SWAP_CONV)
17398 src = copy_rtx (src);
17399 XEXP (src, 0) = op1;
17400 XEXP (src, 1) = op0;
17401 pat = gen_rtx_SET (dst, src);
17402 vec = gen_rtvec (2, pat, clobber);
17404 else /* action == CONV */
17405 vec = gen_rtvec (2, pat, clobber);
17407 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17408 INSN_CODE (insn) = -1;
17412 if (NONDEBUG_INSN_P (insn))
17413 df_simulate_one_insn_backwards (bb, insn, &live);
17417 CLEAR_REG_SET (&live);
17420 /* Gcc puts the pool in the wrong place for ARM, since we can only
17421 load addresses a limited distance around the pc. We do some
17422 special munging to move the constant pool values to the correct
17423 point in the code. */
17424 static void
17425 arm_reorg (void)
17427 rtx_insn *insn;
17428 HOST_WIDE_INT address = 0;
17429 Mfix * fix;
17431 if (use_cmse)
17432 cmse_nonsecure_call_clear_caller_saved ();
17433 if (TARGET_THUMB1)
17434 thumb1_reorg ();
17435 else if (TARGET_THUMB2)
17436 thumb2_reorg ();
17438 /* Ensure all insns that must be split have been split at this point.
17439 Otherwise, the pool placement code below may compute incorrect
17440 insn lengths. Note that when optimizing, all insns have already
17441 been split at this point. */
17442 if (!optimize)
17443 split_all_insns_noflow ();
17445 /* Make sure we do not attempt to create a literal pool even though it should
17446 no longer be necessary to create any. */
17447 if (arm_disable_literal_pool)
17448 return ;
17450 minipool_fix_head = minipool_fix_tail = NULL;
17452 /* The first insn must always be a note, or the code below won't
17453 scan it properly. */
17454 insn = get_insns ();
17455 gcc_assert (NOTE_P (insn));
17456 minipool_pad = 0;
17458 /* Scan all the insns and record the operands that will need fixing. */
17459 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17461 if (BARRIER_P (insn))
17462 push_minipool_barrier (insn, address);
17463 else if (INSN_P (insn))
17465 rtx_jump_table_data *table;
17467 note_invalid_constants (insn, address, true);
17468 address += get_attr_length (insn);
17470 /* If the insn is a vector jump, add the size of the table
17471 and skip the table. */
17472 if (tablejump_p (insn, NULL, &table))
17474 address += get_jump_table_size (table);
17475 insn = table;
17478 else if (LABEL_P (insn))
17479 /* Add the worst-case padding due to alignment. We don't add
17480 the _current_ padding because the minipool insertions
17481 themselves might change it. */
17482 address += get_label_padding (insn);
17485 fix = minipool_fix_head;
17487 /* Now scan the fixups and perform the required changes. */
17488 while (fix)
17490 Mfix * ftmp;
17491 Mfix * fdel;
17492 Mfix * last_added_fix;
17493 Mfix * last_barrier = NULL;
17494 Mfix * this_fix;
17496 /* Skip any further barriers before the next fix. */
17497 while (fix && BARRIER_P (fix->insn))
17498 fix = fix->next;
17500 /* No more fixes. */
17501 if (fix == NULL)
17502 break;
17504 last_added_fix = NULL;
17506 for (ftmp = fix; ftmp; ftmp = ftmp->next)
17508 if (BARRIER_P (ftmp->insn))
17510 if (ftmp->address >= minipool_vector_head->max_address)
17511 break;
17513 last_barrier = ftmp;
17515 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17516 break;
17518 last_added_fix = ftmp; /* Keep track of the last fix added. */
17521 /* If we found a barrier, drop back to that; any fixes that we
17522 could have reached but come after the barrier will now go in
17523 the next mini-pool. */
17524 if (last_barrier != NULL)
17526 /* Reduce the refcount for those fixes that won't go into this
17527 pool after all. */
17528 for (fdel = last_barrier->next;
17529 fdel && fdel != ftmp;
17530 fdel = fdel->next)
17532 fdel->minipool->refcount--;
17533 fdel->minipool = NULL;
17536 ftmp = last_barrier;
17538 else
17540 /* ftmp is first fix that we can't fit into this pool and
17541 there no natural barriers that we could use. Insert a
17542 new barrier in the code somewhere between the previous
17543 fix and this one, and arrange to jump around it. */
17544 HOST_WIDE_INT max_address;
17546 /* The last item on the list of fixes must be a barrier, so
17547 we can never run off the end of the list of fixes without
17548 last_barrier being set. */
17549 gcc_assert (ftmp);
17551 max_address = minipool_vector_head->max_address;
17552 /* Check that there isn't another fix that is in range that
17553 we couldn't fit into this pool because the pool was
17554 already too large: we need to put the pool before such an
17555 instruction. The pool itself may come just after the
17556 fix because create_fix_barrier also allows space for a
17557 jump instruction. */
17558 if (ftmp->address < max_address)
17559 max_address = ftmp->address + 1;
17561 last_barrier = create_fix_barrier (last_added_fix, max_address);
17564 assign_minipool_offsets (last_barrier);
17566 while (ftmp)
17568 if (!BARRIER_P (ftmp->insn)
17569 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17570 == NULL))
17571 break;
17573 ftmp = ftmp->next;
17576 /* Scan over the fixes we have identified for this pool, fixing them
17577 up and adding the constants to the pool itself. */
17578 for (this_fix = fix; this_fix && ftmp != this_fix;
17579 this_fix = this_fix->next)
17580 if (!BARRIER_P (this_fix->insn))
17582 rtx addr
17583 = plus_constant (Pmode,
17584 gen_rtx_LABEL_REF (VOIDmode,
17585 minipool_vector_label),
17586 this_fix->minipool->offset);
17587 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17590 dump_minipool (last_barrier->insn);
17591 fix = ftmp;
17594 /* From now on we must synthesize any constants that we can't handle
17595 directly. This can happen if the RTL gets split during final
17596 instruction generation. */
17597 cfun->machine->after_arm_reorg = 1;
17599 /* Free the minipool memory. */
17600 obstack_free (&minipool_obstack, minipool_startobj);
17603 /* Routines to output assembly language. */
17605 /* Return string representation of passed in real value. */
17606 static const char *
17607 fp_const_from_val (REAL_VALUE_TYPE *r)
17609 if (!fp_consts_inited)
17610 init_fp_table ();
17612 gcc_assert (real_equal (r, &value_fp0));
17613 return "0";
17616 /* OPERANDS[0] is the entire list of insns that constitute pop,
17617 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17618 is in the list, UPDATE is true iff the list contains explicit
17619 update of base register. */
17620 void
17621 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17622 bool update)
17624 int i;
17625 char pattern[100];
17626 int offset;
17627 const char *conditional;
17628 int num_saves = XVECLEN (operands[0], 0);
17629 unsigned int regno;
17630 unsigned int regno_base = REGNO (operands[1]);
17631 bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
17633 offset = 0;
17634 offset += update ? 1 : 0;
17635 offset += return_pc ? 1 : 0;
17637 /* Is the base register in the list? */
17638 for (i = offset; i < num_saves; i++)
17640 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17641 /* If SP is in the list, then the base register must be SP. */
17642 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17643 /* If base register is in the list, there must be no explicit update. */
17644 if (regno == regno_base)
17645 gcc_assert (!update);
17648 conditional = reverse ? "%?%D0" : "%?%d0";
17649 /* Can't use POP if returning from an interrupt. */
17650 if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
17651 sprintf (pattern, "pop%s\t{", conditional);
17652 else
17654 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17655 It's just a convention, their semantics are identical. */
17656 if (regno_base == SP_REGNUM)
17657 sprintf (pattern, "ldmfd%s\t", conditional);
17658 else if (update)
17659 sprintf (pattern, "ldmia%s\t", conditional);
17660 else
17661 sprintf (pattern, "ldm%s\t", conditional);
17663 strcat (pattern, reg_names[regno_base]);
17664 if (update)
17665 strcat (pattern, "!, {");
17666 else
17667 strcat (pattern, ", {");
17670 /* Output the first destination register. */
17671 strcat (pattern,
17672 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17674 /* Output the rest of the destination registers. */
17675 for (i = offset + 1; i < num_saves; i++)
17677 strcat (pattern, ", ");
17678 strcat (pattern,
17679 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17682 strcat (pattern, "}");
17684 if (interrupt_p && return_pc)
17685 strcat (pattern, "^");
17687 output_asm_insn (pattern, &cond);
17691 /* Output the assembly for a store multiple. */
17693 const char *
17694 vfp_output_vstmd (rtx * operands)
17696 char pattern[100];
17697 int p;
17698 int base;
17699 int i;
17700 rtx addr_reg = REG_P (XEXP (operands[0], 0))
17701 ? XEXP (operands[0], 0)
17702 : XEXP (XEXP (operands[0], 0), 0);
17703 bool push_p = REGNO (addr_reg) == SP_REGNUM;
17705 if (push_p)
17706 strcpy (pattern, "vpush%?.64\t{%P1");
17707 else
17708 strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
17710 p = strlen (pattern);
17712 gcc_assert (REG_P (operands[1]));
17714 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17715 for (i = 1; i < XVECLEN (operands[2], 0); i++)
17717 p += sprintf (&pattern[p], ", d%d", base + i);
17719 strcpy (&pattern[p], "}");
17721 output_asm_insn (pattern, operands);
17722 return "";
17726 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17727 number of bytes pushed. */
17729 static int
17730 vfp_emit_fstmd (int base_reg, int count)
17732 rtx par;
17733 rtx dwarf;
17734 rtx tmp, reg;
17735 int i;
17737 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17738 register pairs are stored by a store multiple insn. We avoid this
17739 by pushing an extra pair. */
17740 if (count == 2 && !arm_arch6)
17742 if (base_reg == LAST_VFP_REGNUM - 3)
17743 base_reg -= 2;
17744 count++;
17747 /* FSTMD may not store more than 16 doubleword registers at once. Split
17748 larger stores into multiple parts (up to a maximum of two, in
17749 practice). */
17750 if (count > 16)
17752 int saved;
17753 /* NOTE: base_reg is an internal register number, so each D register
17754 counts as 2. */
17755 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17756 saved += vfp_emit_fstmd (base_reg, 16);
17757 return saved;
17760 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17761 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17763 reg = gen_rtx_REG (DFmode, base_reg);
17764 base_reg += 2;
17766 XVECEXP (par, 0, 0)
17767 = gen_rtx_SET (gen_frame_mem
17768 (BLKmode,
17769 gen_rtx_PRE_MODIFY (Pmode,
17770 stack_pointer_rtx,
17771 plus_constant
17772 (Pmode, stack_pointer_rtx,
17773 - (count * 8)))
17775 gen_rtx_UNSPEC (BLKmode,
17776 gen_rtvec (1, reg),
17777 UNSPEC_PUSH_MULT));
17779 tmp = gen_rtx_SET (stack_pointer_rtx,
17780 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17781 RTX_FRAME_RELATED_P (tmp) = 1;
17782 XVECEXP (dwarf, 0, 0) = tmp;
17784 tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
17785 RTX_FRAME_RELATED_P (tmp) = 1;
17786 XVECEXP (dwarf, 0, 1) = tmp;
17788 for (i = 1; i < count; i++)
17790 reg = gen_rtx_REG (DFmode, base_reg);
17791 base_reg += 2;
17792 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
17794 tmp = gen_rtx_SET (gen_frame_mem (DFmode,
17795 plus_constant (Pmode,
17796 stack_pointer_rtx,
17797 i * 8)),
17798 reg);
17799 RTX_FRAME_RELATED_P (tmp) = 1;
17800 XVECEXP (dwarf, 0, i + 1) = tmp;
17803 par = emit_insn (par);
17804 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17805 RTX_FRAME_RELATED_P (par) = 1;
17807 return count * 8;
17810 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
17811 has the cmse_nonsecure_call attribute and returns false otherwise. */
17813 bool
17814 detect_cmse_nonsecure_call (tree addr)
17816 if (!addr)
17817 return FALSE;
17819 tree fntype = TREE_TYPE (addr);
17820 if (use_cmse && lookup_attribute ("cmse_nonsecure_call",
17821 TYPE_ATTRIBUTES (fntype)))
17822 return TRUE;
17823 return FALSE;
17827 /* Emit a call instruction with pattern PAT. ADDR is the address of
17828 the call target. */
17830 void
17831 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
17833 rtx insn;
17835 insn = emit_call_insn (pat);
17837 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17838 If the call might use such an entry, add a use of the PIC register
17839 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17840 if (TARGET_VXWORKS_RTP
17841 && flag_pic
17842 && !sibcall
17843 && GET_CODE (addr) == SYMBOL_REF
17844 && (SYMBOL_REF_DECL (addr)
17845 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
17846 : !SYMBOL_REF_LOCAL_P (addr)))
17848 require_pic_register ();
17849 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
17852 if (TARGET_AAPCS_BASED)
17854 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17855 linker. We need to add an IP clobber to allow setting
17856 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
17857 is not needed since it's a fixed register. */
17858 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
17859 clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
17863 /* Output a 'call' insn. */
17864 const char *
17865 output_call (rtx *operands)
17867 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
17869 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17870 if (REGNO (operands[0]) == LR_REGNUM)
17872 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
17873 output_asm_insn ("mov%?\t%0, %|lr", operands);
17876 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17878 if (TARGET_INTERWORK || arm_arch4t)
17879 output_asm_insn ("bx%?\t%0", operands);
17880 else
17881 output_asm_insn ("mov%?\t%|pc, %0", operands);
17883 return "";
17886 /* Output a move from arm registers to arm registers of a long double
17887 OPERANDS[0] is the destination.
17888 OPERANDS[1] is the source. */
17889 const char *
17890 output_mov_long_double_arm_from_arm (rtx *operands)
17892 /* We have to be careful here because the two might overlap. */
17893 int dest_start = REGNO (operands[0]);
17894 int src_start = REGNO (operands[1]);
17895 rtx ops[2];
17896 int i;
17898 if (dest_start < src_start)
17900 for (i = 0; i < 3; i++)
17902 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17903 ops[1] = gen_rtx_REG (SImode, src_start + i);
17904 output_asm_insn ("mov%?\t%0, %1", ops);
17907 else
17909 for (i = 2; i >= 0; i--)
17911 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17912 ops[1] = gen_rtx_REG (SImode, src_start + i);
17913 output_asm_insn ("mov%?\t%0, %1", ops);
17917 return "";
17920 void
17921 arm_emit_movpair (rtx dest, rtx src)
17923 /* If the src is an immediate, simplify it. */
17924 if (CONST_INT_P (src))
17926 HOST_WIDE_INT val = INTVAL (src);
17927 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
17928 if ((val >> 16) & 0x0000ffff)
17930 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
17931 GEN_INT (16)),
17932 GEN_INT ((val >> 16) & 0x0000ffff));
17933 rtx_insn *insn = get_last_insn ();
17934 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
17936 return;
17938 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
17939 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
17940 rtx_insn *insn = get_last_insn ();
17941 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
17944 /* Output a move between double words. It must be REG<-MEM
17945 or MEM<-REG. */
17946 const char *
17947 output_move_double (rtx *operands, bool emit, int *count)
17949 enum rtx_code code0 = GET_CODE (operands[0]);
17950 enum rtx_code code1 = GET_CODE (operands[1]);
17951 rtx otherops[3];
17952 if (count)
17953 *count = 1;
17955 /* The only case when this might happen is when
17956 you are looking at the length of a DImode instruction
17957 that has an invalid constant in it. */
17958 if (code0 == REG && code1 != MEM)
17960 gcc_assert (!emit);
17961 *count = 2;
17962 return "";
17965 if (code0 == REG)
17967 unsigned int reg0 = REGNO (operands[0]);
17969 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
17971 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
17973 switch (GET_CODE (XEXP (operands[1], 0)))
17975 case REG:
17977 if (emit)
17979 if (TARGET_LDRD
17980 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
17981 output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
17982 else
17983 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
17985 break;
17987 case PRE_INC:
17988 gcc_assert (TARGET_LDRD);
17989 if (emit)
17990 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
17991 break;
17993 case PRE_DEC:
17994 if (emit)
17996 if (TARGET_LDRD)
17997 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
17998 else
17999 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
18001 break;
18003 case POST_INC:
18004 if (emit)
18006 if (TARGET_LDRD)
18007 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
18008 else
18009 output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
18011 break;
18013 case POST_DEC:
18014 gcc_assert (TARGET_LDRD);
18015 if (emit)
18016 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
18017 break;
18019 case PRE_MODIFY:
18020 case POST_MODIFY:
18021 /* Autoicrement addressing modes should never have overlapping
18022 base and destination registers, and overlapping index registers
18023 are already prohibited, so this doesn't need to worry about
18024 fix_cm3_ldrd. */
18025 otherops[0] = operands[0];
18026 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
18027 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
18029 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
18031 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
18033 /* Registers overlap so split out the increment. */
18034 if (emit)
18036 output_asm_insn ("add%?\t%1, %1, %2", otherops);
18037 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
18039 if (count)
18040 *count = 2;
18042 else
18044 /* Use a single insn if we can.
18045 FIXME: IWMMXT allows offsets larger than ldrd can
18046 handle, fix these up with a pair of ldr. */
18047 if (TARGET_THUMB2
18048 || !CONST_INT_P (otherops[2])
18049 || (INTVAL (otherops[2]) > -256
18050 && INTVAL (otherops[2]) < 256))
18052 if (emit)
18053 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
18055 else
18057 if (emit)
18059 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
18060 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18062 if (count)
18063 *count = 2;
18068 else
18070 /* Use a single insn if we can.
18071 FIXME: IWMMXT allows offsets larger than ldrd can handle,
18072 fix these up with a pair of ldr. */
18073 if (TARGET_THUMB2
18074 || !CONST_INT_P (otherops[2])
18075 || (INTVAL (otherops[2]) > -256
18076 && INTVAL (otherops[2]) < 256))
18078 if (emit)
18079 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
18081 else
18083 if (emit)
18085 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18086 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
18088 if (count)
18089 *count = 2;
18092 break;
18094 case LABEL_REF:
18095 case CONST:
18096 /* We might be able to use ldrd %0, %1 here. However the range is
18097 different to ldr/adr, and it is broken on some ARMv7-M
18098 implementations. */
18099 /* Use the second register of the pair to avoid problematic
18100 overlap. */
18101 otherops[1] = operands[1];
18102 if (emit)
18103 output_asm_insn ("adr%?\t%0, %1", otherops);
18104 operands[1] = otherops[0];
18105 if (emit)
18107 if (TARGET_LDRD)
18108 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18109 else
18110 output_asm_insn ("ldmia%?\t%1, %M0", operands);
18113 if (count)
18114 *count = 2;
18115 break;
18117 /* ??? This needs checking for thumb2. */
18118 default:
18119 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18120 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18122 otherops[0] = operands[0];
18123 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18124 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18126 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18128 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18130 switch ((int) INTVAL (otherops[2]))
18132 case -8:
18133 if (emit)
18134 output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
18135 return "";
18136 case -4:
18137 if (TARGET_THUMB2)
18138 break;
18139 if (emit)
18140 output_asm_insn ("ldmda%?\t%1, %M0", otherops);
18141 return "";
18142 case 4:
18143 if (TARGET_THUMB2)
18144 break;
18145 if (emit)
18146 output_asm_insn ("ldmib%?\t%1, %M0", otherops);
18147 return "";
18150 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18151 operands[1] = otherops[0];
18152 if (TARGET_LDRD
18153 && (REG_P (otherops[2])
18154 || TARGET_THUMB2
18155 || (CONST_INT_P (otherops[2])
18156 && INTVAL (otherops[2]) > -256
18157 && INTVAL (otherops[2]) < 256)))
18159 if (reg_overlap_mentioned_p (operands[0],
18160 otherops[2]))
18162 /* Swap base and index registers over to
18163 avoid a conflict. */
18164 std::swap (otherops[1], otherops[2]);
18166 /* If both registers conflict, it will usually
18167 have been fixed by a splitter. */
18168 if (reg_overlap_mentioned_p (operands[0], otherops[2])
18169 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18171 if (emit)
18173 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18174 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18176 if (count)
18177 *count = 2;
18179 else
18181 otherops[0] = operands[0];
18182 if (emit)
18183 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
18185 return "";
18188 if (CONST_INT_P (otherops[2]))
18190 if (emit)
18192 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18193 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18194 else
18195 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18198 else
18200 if (emit)
18201 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18204 else
18206 if (emit)
18207 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18210 if (count)
18211 *count = 2;
18213 if (TARGET_LDRD)
18214 return "ldrd%?\t%0, [%1]";
18216 return "ldmia%?\t%1, %M0";
18218 else
18220 otherops[1] = adjust_address (operands[1], SImode, 4);
18221 /* Take care of overlapping base/data reg. */
18222 if (reg_mentioned_p (operands[0], operands[1]))
18224 if (emit)
18226 output_asm_insn ("ldr%?\t%0, %1", otherops);
18227 output_asm_insn ("ldr%?\t%0, %1", operands);
18229 if (count)
18230 *count = 2;
18233 else
18235 if (emit)
18237 output_asm_insn ("ldr%?\t%0, %1", operands);
18238 output_asm_insn ("ldr%?\t%0, %1", otherops);
18240 if (count)
18241 *count = 2;
18246 else
18248 /* Constraints should ensure this. */
18249 gcc_assert (code0 == MEM && code1 == REG);
18250 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18251 || (TARGET_ARM && TARGET_LDRD));
18253 switch (GET_CODE (XEXP (operands[0], 0)))
18255 case REG:
18256 if (emit)
18258 if (TARGET_LDRD)
18259 output_asm_insn ("strd%?\t%1, [%m0]", operands);
18260 else
18261 output_asm_insn ("stm%?\t%m0, %M1", operands);
18263 break;
18265 case PRE_INC:
18266 gcc_assert (TARGET_LDRD);
18267 if (emit)
18268 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
18269 break;
18271 case PRE_DEC:
18272 if (emit)
18274 if (TARGET_LDRD)
18275 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
18276 else
18277 output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
18279 break;
18281 case POST_INC:
18282 if (emit)
18284 if (TARGET_LDRD)
18285 output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
18286 else
18287 output_asm_insn ("stm%?\t%m0!, %M1", operands);
18289 break;
18291 case POST_DEC:
18292 gcc_assert (TARGET_LDRD);
18293 if (emit)
18294 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
18295 break;
18297 case PRE_MODIFY:
18298 case POST_MODIFY:
18299 otherops[0] = operands[1];
18300 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18301 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18303 /* IWMMXT allows offsets larger than ldrd can handle,
18304 fix these up with a pair of ldr. */
18305 if (!TARGET_THUMB2
18306 && CONST_INT_P (otherops[2])
18307 && (INTVAL(otherops[2]) <= -256
18308 || INTVAL(otherops[2]) >= 256))
18310 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18312 if (emit)
18314 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18315 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18317 if (count)
18318 *count = 2;
18320 else
18322 if (emit)
18324 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18325 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18327 if (count)
18328 *count = 2;
18331 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18333 if (emit)
18334 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
18336 else
18338 if (emit)
18339 output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
18341 break;
18343 case PLUS:
18344 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18345 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18347 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18349 case -8:
18350 if (emit)
18351 output_asm_insn ("stmdb%?\t%m0, %M1", operands);
18352 return "";
18354 case -4:
18355 if (TARGET_THUMB2)
18356 break;
18357 if (emit)
18358 output_asm_insn ("stmda%?\t%m0, %M1", operands);
18359 return "";
18361 case 4:
18362 if (TARGET_THUMB2)
18363 break;
18364 if (emit)
18365 output_asm_insn ("stmib%?\t%m0, %M1", operands);
18366 return "";
18369 if (TARGET_LDRD
18370 && (REG_P (otherops[2])
18371 || TARGET_THUMB2
18372 || (CONST_INT_P (otherops[2])
18373 && INTVAL (otherops[2]) > -256
18374 && INTVAL (otherops[2]) < 256)))
18376 otherops[0] = operands[1];
18377 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18378 if (emit)
18379 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
18380 return "";
18382 /* Fall through */
18384 default:
18385 otherops[0] = adjust_address (operands[0], SImode, 4);
18386 otherops[1] = operands[1];
18387 if (emit)
18389 output_asm_insn ("str%?\t%1, %0", operands);
18390 output_asm_insn ("str%?\t%H1, %0", otherops);
18392 if (count)
18393 *count = 2;
18397 return "";
18400 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18401 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18403 const char *
18404 output_move_quad (rtx *operands)
18406 if (REG_P (operands[0]))
18408 /* Load, or reg->reg move. */
18410 if (MEM_P (operands[1]))
18412 switch (GET_CODE (XEXP (operands[1], 0)))
18414 case REG:
18415 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18416 break;
18418 case LABEL_REF:
18419 case CONST:
18420 output_asm_insn ("adr%?\t%0, %1", operands);
18421 output_asm_insn ("ldmia%?\t%0, %M0", operands);
18422 break;
18424 default:
18425 gcc_unreachable ();
18428 else
18430 rtx ops[2];
18431 int dest, src, i;
18433 gcc_assert (REG_P (operands[1]));
18435 dest = REGNO (operands[0]);
18436 src = REGNO (operands[1]);
18438 /* This seems pretty dumb, but hopefully GCC won't try to do it
18439 very often. */
18440 if (dest < src)
18441 for (i = 0; i < 4; i++)
18443 ops[0] = gen_rtx_REG (SImode, dest + i);
18444 ops[1] = gen_rtx_REG (SImode, src + i);
18445 output_asm_insn ("mov%?\t%0, %1", ops);
18447 else
18448 for (i = 3; i >= 0; i--)
18450 ops[0] = gen_rtx_REG (SImode, dest + i);
18451 ops[1] = gen_rtx_REG (SImode, src + i);
18452 output_asm_insn ("mov%?\t%0, %1", ops);
18456 else
18458 gcc_assert (MEM_P (operands[0]));
18459 gcc_assert (REG_P (operands[1]));
18460 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18462 switch (GET_CODE (XEXP (operands[0], 0)))
18464 case REG:
18465 output_asm_insn ("stm%?\t%m0, %M1", operands);
18466 break;
18468 default:
18469 gcc_unreachable ();
18473 return "";
18476 /* Output a VFP load or store instruction. */
18478 const char *
18479 output_move_vfp (rtx *operands)
18481 rtx reg, mem, addr, ops[2];
18482 int load = REG_P (operands[0]);
18483 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18484 int sp = (!TARGET_VFP_FP16INST
18485 || GET_MODE_SIZE (GET_MODE (operands[0])) == 4);
18486 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18487 const char *templ;
18488 char buff[50];
18489 machine_mode mode;
18491 reg = operands[!load];
18492 mem = operands[load];
18494 mode = GET_MODE (reg);
18496 gcc_assert (REG_P (reg));
18497 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18498 gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT)
18499 || mode == SFmode
18500 || mode == DFmode
18501 || mode == HImode
18502 || mode == SImode
18503 || mode == DImode
18504 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18505 gcc_assert (MEM_P (mem));
18507 addr = XEXP (mem, 0);
18509 switch (GET_CODE (addr))
18511 case PRE_DEC:
18512 templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18513 ops[0] = XEXP (addr, 0);
18514 ops[1] = reg;
18515 break;
18517 case POST_INC:
18518 templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18519 ops[0] = XEXP (addr, 0);
18520 ops[1] = reg;
18521 break;
18523 default:
18524 templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18525 ops[0] = reg;
18526 ops[1] = mem;
18527 break;
18530 sprintf (buff, templ,
18531 load ? "ld" : "st",
18532 dp ? "64" : sp ? "32" : "16",
18533 dp ? "P" : "",
18534 integer_p ? "\t%@ int" : "");
18535 output_asm_insn (buff, ops);
18537 return "";
18540 /* Output a Neon double-word or quad-word load or store, or a load
18541 or store for larger structure modes.
18543 WARNING: The ordering of elements is weird in big-endian mode,
18544 because the EABI requires that vectors stored in memory appear
18545 as though they were stored by a VSTM, as required by the EABI.
18546 GCC RTL defines element ordering based on in-memory order.
18547 This can be different from the architectural ordering of elements
18548 within a NEON register. The intrinsics defined in arm_neon.h use the
18549 NEON register element ordering, not the GCC RTL element ordering.
18551 For example, the in-memory ordering of a big-endian a quadword
18552 vector with 16-bit elements when stored from register pair {d0,d1}
18553 will be (lowest address first, d0[N] is NEON register element N):
18555 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18557 When necessary, quadword registers (dN, dN+1) are moved to ARM
18558 registers from rN in the order:
18560 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18562 So that STM/LDM can be used on vectors in ARM registers, and the
18563 same memory layout will result as if VSTM/VLDM were used.
18565 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18566 possible, which allows use of appropriate alignment tags.
18567 Note that the choice of "64" is independent of the actual vector
18568 element size; this size simply ensures that the behavior is
18569 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18571 Due to limitations of those instructions, use of VST1.64/VLD1.64
18572 is not possible if:
18573 - the address contains PRE_DEC, or
18574 - the mode refers to more than 4 double-word registers
18576 In those cases, it would be possible to replace VSTM/VLDM by a
18577 sequence of instructions; this is not currently implemented since
18578 this is not certain to actually improve performance. */
18580 const char *
18581 output_move_neon (rtx *operands)
18583 rtx reg, mem, addr, ops[2];
18584 int regno, nregs, load = REG_P (operands[0]);
18585 const char *templ;
18586 char buff[50];
18587 machine_mode mode;
18589 reg = operands[!load];
18590 mem = operands[load];
18592 mode = GET_MODE (reg);
18594 gcc_assert (REG_P (reg));
18595 regno = REGNO (reg);
18596 nregs = HARD_REGNO_NREGS (regno, mode) / 2;
18597 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18598 || NEON_REGNO_OK_FOR_QUAD (regno));
18599 gcc_assert (VALID_NEON_DREG_MODE (mode)
18600 || VALID_NEON_QREG_MODE (mode)
18601 || VALID_NEON_STRUCT_MODE (mode));
18602 gcc_assert (MEM_P (mem));
18604 addr = XEXP (mem, 0);
18606 /* Strip off const from addresses like (const (plus (...))). */
18607 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18608 addr = XEXP (addr, 0);
18610 switch (GET_CODE (addr))
18612 case POST_INC:
18613 /* We have to use vldm / vstm for too-large modes. */
18614 if (nregs > 4)
18616 templ = "v%smia%%?\t%%0!, %%h1";
18617 ops[0] = XEXP (addr, 0);
18619 else
18621 templ = "v%s1.64\t%%h1, %%A0";
18622 ops[0] = mem;
18624 ops[1] = reg;
18625 break;
18627 case PRE_DEC:
18628 /* We have to use vldm / vstm in this case, since there is no
18629 pre-decrement form of the vld1 / vst1 instructions. */
18630 templ = "v%smdb%%?\t%%0!, %%h1";
18631 ops[0] = XEXP (addr, 0);
18632 ops[1] = reg;
18633 break;
18635 case POST_MODIFY:
18636 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18637 gcc_unreachable ();
18639 case REG:
18640 /* We have to use vldm / vstm for too-large modes. */
18641 if (nregs > 1)
18643 if (nregs > 4)
18644 templ = "v%smia%%?\t%%m0, %%h1";
18645 else
18646 templ = "v%s1.64\t%%h1, %%A0";
18648 ops[0] = mem;
18649 ops[1] = reg;
18650 break;
18652 /* Fall through. */
18653 case LABEL_REF:
18654 case PLUS:
18656 int i;
18657 int overlap = -1;
18658 for (i = 0; i < nregs; i++)
18660 /* We're only using DImode here because it's a convenient size. */
18661 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18662 ops[1] = adjust_address (mem, DImode, 8 * i);
18663 if (reg_overlap_mentioned_p (ops[0], mem))
18665 gcc_assert (overlap == -1);
18666 overlap = i;
18668 else
18670 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18671 output_asm_insn (buff, ops);
18674 if (overlap != -1)
18676 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18677 ops[1] = adjust_address (mem, SImode, 8 * overlap);
18678 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18679 output_asm_insn (buff, ops);
18682 return "";
18685 default:
18686 gcc_unreachable ();
18689 sprintf (buff, templ, load ? "ld" : "st");
18690 output_asm_insn (buff, ops);
18692 return "";
18695 /* Compute and return the length of neon_mov<mode>, where <mode> is
18696 one of VSTRUCT modes: EI, OI, CI or XI. */
18698 arm_attr_length_move_neon (rtx_insn *insn)
18700 rtx reg, mem, addr;
18701 int load;
18702 machine_mode mode;
18704 extract_insn_cached (insn);
18706 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18708 mode = GET_MODE (recog_data.operand[0]);
18709 switch (mode)
18711 case E_EImode:
18712 case E_OImode:
18713 return 8;
18714 case E_CImode:
18715 return 12;
18716 case E_XImode:
18717 return 16;
18718 default:
18719 gcc_unreachable ();
18723 load = REG_P (recog_data.operand[0]);
18724 reg = recog_data.operand[!load];
18725 mem = recog_data.operand[load];
18727 gcc_assert (MEM_P (mem));
18729 mode = GET_MODE (reg);
18730 addr = XEXP (mem, 0);
18732 /* Strip off const from addresses like (const (plus (...))). */
18733 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18734 addr = XEXP (addr, 0);
18736 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18738 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
18739 return insns * 4;
18741 else
18742 return 4;
18745 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18746 return zero. */
18749 arm_address_offset_is_imm (rtx_insn *insn)
18751 rtx mem, addr;
18753 extract_insn_cached (insn);
18755 if (REG_P (recog_data.operand[0]))
18756 return 0;
18758 mem = recog_data.operand[0];
18760 gcc_assert (MEM_P (mem));
18762 addr = XEXP (mem, 0);
18764 if (REG_P (addr)
18765 || (GET_CODE (addr) == PLUS
18766 && REG_P (XEXP (addr, 0))
18767 && CONST_INT_P (XEXP (addr, 1))))
18768 return 1;
18769 else
18770 return 0;
18773 /* Output an ADD r, s, #n where n may be too big for one instruction.
18774 If adding zero to one register, output nothing. */
18775 const char *
18776 output_add_immediate (rtx *operands)
18778 HOST_WIDE_INT n = INTVAL (operands[2]);
18780 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18782 if (n < 0)
18783 output_multi_immediate (operands,
18784 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18785 -n);
18786 else
18787 output_multi_immediate (operands,
18788 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18792 return "";
18795 /* Output a multiple immediate operation.
18796 OPERANDS is the vector of operands referred to in the output patterns.
18797 INSTR1 is the output pattern to use for the first constant.
18798 INSTR2 is the output pattern to use for subsequent constants.
18799 IMMED_OP is the index of the constant slot in OPERANDS.
18800 N is the constant value. */
18801 static const char *
18802 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18803 int immed_op, HOST_WIDE_INT n)
18805 #if HOST_BITS_PER_WIDE_INT > 32
18806 n &= 0xffffffff;
18807 #endif
18809 if (n == 0)
18811 /* Quick and easy output. */
18812 operands[immed_op] = const0_rtx;
18813 output_asm_insn (instr1, operands);
18815 else
18817 int i;
18818 const char * instr = instr1;
18820 /* Note that n is never zero here (which would give no output). */
18821 for (i = 0; i < 32; i += 2)
18823 if (n & (3 << i))
18825 operands[immed_op] = GEN_INT (n & (255 << i));
18826 output_asm_insn (instr, operands);
18827 instr = instr2;
18828 i += 6;
18833 return "";
18836 /* Return the name of a shifter operation. */
18837 static const char *
18838 arm_shift_nmem(enum rtx_code code)
18840 switch (code)
18842 case ASHIFT:
18843 return ARM_LSL_NAME;
18845 case ASHIFTRT:
18846 return "asr";
18848 case LSHIFTRT:
18849 return "lsr";
18851 case ROTATERT:
18852 return "ror";
18854 default:
18855 abort();
18859 /* Return the appropriate ARM instruction for the operation code.
18860 The returned result should not be overwritten. OP is the rtx of the
18861 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18862 was shifted. */
18863 const char *
18864 arithmetic_instr (rtx op, int shift_first_arg)
18866 switch (GET_CODE (op))
18868 case PLUS:
18869 return "add";
18871 case MINUS:
18872 return shift_first_arg ? "rsb" : "sub";
18874 case IOR:
18875 return "orr";
18877 case XOR:
18878 return "eor";
18880 case AND:
18881 return "and";
18883 case ASHIFT:
18884 case ASHIFTRT:
18885 case LSHIFTRT:
18886 case ROTATERT:
18887 return arm_shift_nmem(GET_CODE(op));
18889 default:
18890 gcc_unreachable ();
18894 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18895 for the operation code. The returned result should not be overwritten.
18896 OP is the rtx code of the shift.
18897 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18898 shift. */
18899 static const char *
18900 shift_op (rtx op, HOST_WIDE_INT *amountp)
18902 const char * mnem;
18903 enum rtx_code code = GET_CODE (op);
18905 switch (code)
18907 case ROTATE:
18908 if (!CONST_INT_P (XEXP (op, 1)))
18910 output_operand_lossage ("invalid shift operand");
18911 return NULL;
18914 code = ROTATERT;
18915 *amountp = 32 - INTVAL (XEXP (op, 1));
18916 mnem = "ror";
18917 break;
18919 case ASHIFT:
18920 case ASHIFTRT:
18921 case LSHIFTRT:
18922 case ROTATERT:
18923 mnem = arm_shift_nmem(code);
18924 if (CONST_INT_P (XEXP (op, 1)))
18926 *amountp = INTVAL (XEXP (op, 1));
18928 else if (REG_P (XEXP (op, 1)))
18930 *amountp = -1;
18931 return mnem;
18933 else
18935 output_operand_lossage ("invalid shift operand");
18936 return NULL;
18938 break;
18940 case MULT:
18941 /* We never have to worry about the amount being other than a
18942 power of 2, since this case can never be reloaded from a reg. */
18943 if (!CONST_INT_P (XEXP (op, 1)))
18945 output_operand_lossage ("invalid shift operand");
18946 return NULL;
18949 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
18951 /* Amount must be a power of two. */
18952 if (*amountp & (*amountp - 1))
18954 output_operand_lossage ("invalid shift operand");
18955 return NULL;
18958 *amountp = exact_log2 (*amountp);
18959 gcc_assert (IN_RANGE (*amountp, 0, 31));
18960 return ARM_LSL_NAME;
18962 default:
18963 output_operand_lossage ("invalid shift operand");
18964 return NULL;
18967 /* This is not 100% correct, but follows from the desire to merge
18968 multiplication by a power of 2 with the recognizer for a
18969 shift. >=32 is not a valid shift for "lsl", so we must try and
18970 output a shift that produces the correct arithmetical result.
18971 Using lsr #32 is identical except for the fact that the carry bit
18972 is not set correctly if we set the flags; but we never use the
18973 carry bit from such an operation, so we can ignore that. */
18974 if (code == ROTATERT)
18975 /* Rotate is just modulo 32. */
18976 *amountp &= 31;
18977 else if (*amountp != (*amountp & 31))
18979 if (code == ASHIFT)
18980 mnem = "lsr";
18981 *amountp = 32;
18984 /* Shifts of 0 are no-ops. */
18985 if (*amountp == 0)
18986 return NULL;
18988 return mnem;
18991 /* Output a .ascii pseudo-op, keeping track of lengths. This is
18992 because /bin/as is horribly restrictive. The judgement about
18993 whether or not each character is 'printable' (and can be output as
18994 is) or not (and must be printed with an octal escape) must be made
18995 with reference to the *host* character set -- the situation is
18996 similar to that discussed in the comments above pp_c_char in
18997 c-pretty-print.c. */
18999 #define MAX_ASCII_LEN 51
19001 void
19002 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
19004 int i;
19005 int len_so_far = 0;
19007 fputs ("\t.ascii\t\"", stream);
19009 for (i = 0; i < len; i++)
19011 int c = p[i];
19013 if (len_so_far >= MAX_ASCII_LEN)
19015 fputs ("\"\n\t.ascii\t\"", stream);
19016 len_so_far = 0;
19019 if (ISPRINT (c))
19021 if (c == '\\' || c == '\"')
19023 putc ('\\', stream);
19024 len_so_far++;
19026 putc (c, stream);
19027 len_so_far++;
19029 else
19031 fprintf (stream, "\\%03o", c);
19032 len_so_far += 4;
19036 fputs ("\"\n", stream);
19039 /* Whether a register is callee saved or not. This is necessary because high
19040 registers are marked as caller saved when optimizing for size on Thumb-1
19041 targets despite being callee saved in order to avoid using them. */
19042 #define callee_saved_reg_p(reg) \
19043 (!call_used_regs[reg] \
19044 || (TARGET_THUMB1 && optimize_size \
19045 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
19047 /* Compute the register save mask for registers 0 through 12
19048 inclusive. This code is used by arm_compute_save_core_reg_mask (). */
19050 static unsigned long
19051 arm_compute_save_reg0_reg12_mask (void)
19053 unsigned long func_type = arm_current_func_type ();
19054 unsigned long save_reg_mask = 0;
19055 unsigned int reg;
19057 if (IS_INTERRUPT (func_type))
19059 unsigned int max_reg;
19060 /* Interrupt functions must not corrupt any registers,
19061 even call clobbered ones. If this is a leaf function
19062 we can just examine the registers used by the RTL, but
19063 otherwise we have to assume that whatever function is
19064 called might clobber anything, and so we have to save
19065 all the call-clobbered registers as well. */
19066 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
19067 /* FIQ handlers have registers r8 - r12 banked, so
19068 we only need to check r0 - r7, Normal ISRs only
19069 bank r14 and r15, so we must check up to r12.
19070 r13 is the stack pointer which is always preserved,
19071 so we do not need to consider it here. */
19072 max_reg = 7;
19073 else
19074 max_reg = 12;
19076 for (reg = 0; reg <= max_reg; reg++)
19077 if (df_regs_ever_live_p (reg)
19078 || (! crtl->is_leaf && call_used_regs[reg]))
19079 save_reg_mask |= (1 << reg);
19081 /* Also save the pic base register if necessary. */
19082 if (flag_pic
19083 && !TARGET_SINGLE_PIC_BASE
19084 && arm_pic_register != INVALID_REGNUM
19085 && crtl->uses_pic_offset_table)
19086 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19088 else if (IS_VOLATILE(func_type))
19090 /* For noreturn functions we historically omitted register saves
19091 altogether. However this really messes up debugging. As a
19092 compromise save just the frame pointers. Combined with the link
19093 register saved elsewhere this should be sufficient to get
19094 a backtrace. */
19095 if (frame_pointer_needed)
19096 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19097 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
19098 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19099 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
19100 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
19102 else
19104 /* In the normal case we only need to save those registers
19105 which are call saved and which are used by this function. */
19106 for (reg = 0; reg <= 11; reg++)
19107 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19108 save_reg_mask |= (1 << reg);
19110 /* Handle the frame pointer as a special case. */
19111 if (frame_pointer_needed)
19112 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19114 /* If we aren't loading the PIC register,
19115 don't stack it even though it may be live. */
19116 if (flag_pic
19117 && !TARGET_SINGLE_PIC_BASE
19118 && arm_pic_register != INVALID_REGNUM
19119 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19120 || crtl->uses_pic_offset_table))
19121 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19123 /* The prologue will copy SP into R0, so save it. */
19124 if (IS_STACKALIGN (func_type))
19125 save_reg_mask |= 1;
19128 /* Save registers so the exception handler can modify them. */
19129 if (crtl->calls_eh_return)
19131 unsigned int i;
19133 for (i = 0; ; i++)
19135 reg = EH_RETURN_DATA_REGNO (i);
19136 if (reg == INVALID_REGNUM)
19137 break;
19138 save_reg_mask |= 1 << reg;
19142 return save_reg_mask;
19145 /* Return true if r3 is live at the start of the function. */
19147 static bool
19148 arm_r3_live_at_start_p (void)
19150 /* Just look at cfg info, which is still close enough to correct at this
19151 point. This gives false positives for broken functions that might use
19152 uninitialized data that happens to be allocated in r3, but who cares? */
19153 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19156 /* Compute the number of bytes used to store the static chain register on the
19157 stack, above the stack frame. We need to know this accurately to get the
19158 alignment of the rest of the stack frame correct. */
19160 static int
19161 arm_compute_static_chain_stack_bytes (void)
19163 /* See the defining assertion in arm_expand_prologue. */
19164 if (IS_NESTED (arm_current_func_type ())
19165 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19166 || (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
19167 && !df_regs_ever_live_p (LR_REGNUM)))
19168 && arm_r3_live_at_start_p ()
19169 && crtl->args.pretend_args_size == 0)
19170 return 4;
19172 return 0;
19175 /* Compute a bit mask of which core registers need to be
19176 saved on the stack for the current function.
19177 This is used by arm_compute_frame_layout, which may add extra registers. */
19179 static unsigned long
19180 arm_compute_save_core_reg_mask (void)
19182 unsigned int save_reg_mask = 0;
19183 unsigned long func_type = arm_current_func_type ();
19184 unsigned int reg;
19186 if (IS_NAKED (func_type))
19187 /* This should never really happen. */
19188 return 0;
19190 /* If we are creating a stack frame, then we must save the frame pointer,
19191 IP (which will hold the old stack pointer), LR and the PC. */
19192 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19193 save_reg_mask |=
19194 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19195 | (1 << IP_REGNUM)
19196 | (1 << LR_REGNUM)
19197 | (1 << PC_REGNUM);
19199 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19201 /* Decide if we need to save the link register.
19202 Interrupt routines have their own banked link register,
19203 so they never need to save it.
19204 Otherwise if we do not use the link register we do not need to save
19205 it. If we are pushing other registers onto the stack however, we
19206 can save an instruction in the epilogue by pushing the link register
19207 now and then popping it back into the PC. This incurs extra memory
19208 accesses though, so we only do it when optimizing for size, and only
19209 if we know that we will not need a fancy return sequence. */
19210 if (df_regs_ever_live_p (LR_REGNUM)
19211 || (save_reg_mask
19212 && optimize_size
19213 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19214 && !crtl->tail_call_emit
19215 && !crtl->calls_eh_return))
19216 save_reg_mask |= 1 << LR_REGNUM;
19218 if (cfun->machine->lr_save_eliminated)
19219 save_reg_mask &= ~ (1 << LR_REGNUM);
19221 if (TARGET_REALLY_IWMMXT
19222 && ((bit_count (save_reg_mask)
19223 + ARM_NUM_INTS (crtl->args.pretend_args_size +
19224 arm_compute_static_chain_stack_bytes())
19225 ) % 2) != 0)
19227 /* The total number of registers that are going to be pushed
19228 onto the stack is odd. We need to ensure that the stack
19229 is 64-bit aligned before we start to save iWMMXt registers,
19230 and also before we start to create locals. (A local variable
19231 might be a double or long long which we will load/store using
19232 an iWMMXt instruction). Therefore we need to push another
19233 ARM register, so that the stack will be 64-bit aligned. We
19234 try to avoid using the arg registers (r0 -r3) as they might be
19235 used to pass values in a tail call. */
19236 for (reg = 4; reg <= 12; reg++)
19237 if ((save_reg_mask & (1 << reg)) == 0)
19238 break;
19240 if (reg <= 12)
19241 save_reg_mask |= (1 << reg);
19242 else
19244 cfun->machine->sibcall_blocked = 1;
19245 save_reg_mask |= (1 << 3);
19249 /* We may need to push an additional register for use initializing the
19250 PIC base register. */
19251 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19252 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19254 reg = thumb_find_work_register (1 << 4);
19255 if (!call_used_regs[reg])
19256 save_reg_mask |= (1 << reg);
19259 return save_reg_mask;
19262 /* Compute a bit mask of which core registers need to be
19263 saved on the stack for the current function. */
19264 static unsigned long
19265 thumb1_compute_save_core_reg_mask (void)
19267 unsigned long mask;
19268 unsigned reg;
19270 mask = 0;
19271 for (reg = 0; reg < 12; reg ++)
19272 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19273 mask |= 1 << reg;
19275 /* Handle the frame pointer as a special case. */
19276 if (frame_pointer_needed)
19277 mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19279 if (flag_pic
19280 && !TARGET_SINGLE_PIC_BASE
19281 && arm_pic_register != INVALID_REGNUM
19282 && crtl->uses_pic_offset_table)
19283 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19285 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19286 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19287 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19289 /* LR will also be pushed if any lo regs are pushed. */
19290 if (mask & 0xff || thumb_force_lr_save ())
19291 mask |= (1 << LR_REGNUM);
19293 /* Make sure we have a low work register if we need one.
19294 We will need one if we are going to push a high register,
19295 but we are not currently intending to push a low register. */
19296 if ((mask & 0xff) == 0
19297 && ((mask & 0x0f00) || TARGET_BACKTRACE))
19299 /* Use thumb_find_work_register to choose which register
19300 we will use. If the register is live then we will
19301 have to push it. Use LAST_LO_REGNUM as our fallback
19302 choice for the register to select. */
19303 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19304 /* Make sure the register returned by thumb_find_work_register is
19305 not part of the return value. */
19306 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19307 reg = LAST_LO_REGNUM;
19309 if (callee_saved_reg_p (reg))
19310 mask |= 1 << reg;
19313 /* The 504 below is 8 bytes less than 512 because there are two possible
19314 alignment words. We can't tell here if they will be present or not so we
19315 have to play it safe and assume that they are. */
19316 if ((CALLER_INTERWORKING_SLOT_SIZE +
19317 ROUND_UP_WORD (get_frame_size ()) +
19318 crtl->outgoing_args_size) >= 504)
19320 /* This is the same as the code in thumb1_expand_prologue() which
19321 determines which register to use for stack decrement. */
19322 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19323 if (mask & (1 << reg))
19324 break;
19326 if (reg > LAST_LO_REGNUM)
19328 /* Make sure we have a register available for stack decrement. */
19329 mask |= 1 << LAST_LO_REGNUM;
19333 return mask;
19337 /* Return the number of bytes required to save VFP registers. */
19338 static int
19339 arm_get_vfp_saved_size (void)
19341 unsigned int regno;
19342 int count;
19343 int saved;
19345 saved = 0;
19346 /* Space for saved VFP registers. */
19347 if (TARGET_HARD_FLOAT)
19349 count = 0;
19350 for (regno = FIRST_VFP_REGNUM;
19351 regno < LAST_VFP_REGNUM;
19352 regno += 2)
19354 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19355 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19357 if (count > 0)
19359 /* Workaround ARM10 VFPr1 bug. */
19360 if (count == 2 && !arm_arch6)
19361 count++;
19362 saved += count * 8;
19364 count = 0;
19366 else
19367 count++;
19369 if (count > 0)
19371 if (count == 2 && !arm_arch6)
19372 count++;
19373 saved += count * 8;
19376 return saved;
19380 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19381 everything bar the final return instruction. If simple_return is true,
19382 then do not output epilogue, because it has already been emitted in RTL. */
19383 const char *
19384 output_return_instruction (rtx operand, bool really_return, bool reverse,
19385 bool simple_return)
19387 char conditional[10];
19388 char instr[100];
19389 unsigned reg;
19390 unsigned long live_regs_mask;
19391 unsigned long func_type;
19392 arm_stack_offsets *offsets;
19394 func_type = arm_current_func_type ();
19396 if (IS_NAKED (func_type))
19397 return "";
19399 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19401 /* If this function was declared non-returning, and we have
19402 found a tail call, then we have to trust that the called
19403 function won't return. */
19404 if (really_return)
19406 rtx ops[2];
19408 /* Otherwise, trap an attempted return by aborting. */
19409 ops[0] = operand;
19410 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19411 : "abort");
19412 assemble_external_libcall (ops[1]);
19413 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19416 return "";
19419 gcc_assert (!cfun->calls_alloca || really_return);
19421 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19423 cfun->machine->return_used_this_function = 1;
19425 offsets = arm_get_frame_offsets ();
19426 live_regs_mask = offsets->saved_regs_mask;
19428 if (!simple_return && live_regs_mask)
19430 const char * return_reg;
19432 /* If we do not have any special requirements for function exit
19433 (e.g. interworking) then we can load the return address
19434 directly into the PC. Otherwise we must load it into LR. */
19435 if (really_return
19436 && !IS_CMSE_ENTRY (func_type)
19437 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19438 return_reg = reg_names[PC_REGNUM];
19439 else
19440 return_reg = reg_names[LR_REGNUM];
19442 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19444 /* There are three possible reasons for the IP register
19445 being saved. 1) a stack frame was created, in which case
19446 IP contains the old stack pointer, or 2) an ISR routine
19447 corrupted it, or 3) it was saved to align the stack on
19448 iWMMXt. In case 1, restore IP into SP, otherwise just
19449 restore IP. */
19450 if (frame_pointer_needed)
19452 live_regs_mask &= ~ (1 << IP_REGNUM);
19453 live_regs_mask |= (1 << SP_REGNUM);
19455 else
19456 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19459 /* On some ARM architectures it is faster to use LDR rather than
19460 LDM to load a single register. On other architectures, the
19461 cost is the same. In 26 bit mode, or for exception handlers,
19462 we have to use LDM to load the PC so that the CPSR is also
19463 restored. */
19464 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19465 if (live_regs_mask == (1U << reg))
19466 break;
19468 if (reg <= LAST_ARM_REGNUM
19469 && (reg != LR_REGNUM
19470 || ! really_return
19471 || ! IS_INTERRUPT (func_type)))
19473 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19474 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19476 else
19478 char *p;
19479 int first = 1;
19481 /* Generate the load multiple instruction to restore the
19482 registers. Note we can get here, even if
19483 frame_pointer_needed is true, but only if sp already
19484 points to the base of the saved core registers. */
19485 if (live_regs_mask & (1 << SP_REGNUM))
19487 unsigned HOST_WIDE_INT stack_adjust;
19489 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19490 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19492 if (stack_adjust && arm_arch5 && TARGET_ARM)
19493 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19494 else
19496 /* If we can't use ldmib (SA110 bug),
19497 then try to pop r3 instead. */
19498 if (stack_adjust)
19499 live_regs_mask |= 1 << 3;
19501 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19504 /* For interrupt returns we have to use an LDM rather than
19505 a POP so that we can use the exception return variant. */
19506 else if (IS_INTERRUPT (func_type))
19507 sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
19508 else
19509 sprintf (instr, "pop%s\t{", conditional);
19511 p = instr + strlen (instr);
19513 for (reg = 0; reg <= SP_REGNUM; reg++)
19514 if (live_regs_mask & (1 << reg))
19516 int l = strlen (reg_names[reg]);
19518 if (first)
19519 first = 0;
19520 else
19522 memcpy (p, ", ", 2);
19523 p += 2;
19526 memcpy (p, "%|", 2);
19527 memcpy (p + 2, reg_names[reg], l);
19528 p += l + 2;
19531 if (live_regs_mask & (1 << LR_REGNUM))
19533 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19534 /* If returning from an interrupt, restore the CPSR. */
19535 if (IS_INTERRUPT (func_type))
19536 strcat (p, "^");
19538 else
19539 strcpy (p, "}");
19542 output_asm_insn (instr, & operand);
19544 /* See if we need to generate an extra instruction to
19545 perform the actual function return. */
19546 if (really_return
19547 && func_type != ARM_FT_INTERWORKED
19548 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19550 /* The return has already been handled
19551 by loading the LR into the PC. */
19552 return "";
19556 if (really_return)
19558 switch ((int) ARM_FUNC_TYPE (func_type))
19560 case ARM_FT_ISR:
19561 case ARM_FT_FIQ:
19562 /* ??? This is wrong for unified assembly syntax. */
19563 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19564 break;
19566 case ARM_FT_INTERWORKED:
19567 gcc_assert (arm_arch5 || arm_arch4t);
19568 sprintf (instr, "bx%s\t%%|lr", conditional);
19569 break;
19571 case ARM_FT_EXCEPTION:
19572 /* ??? This is wrong for unified assembly syntax. */
19573 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19574 break;
19576 default:
19577 if (IS_CMSE_ENTRY (func_type))
19579 /* Check if we have to clear the 'GE bits' which is only used if
19580 parallel add and subtraction instructions are available. */
19581 if (TARGET_INT_SIMD)
19582 snprintf (instr, sizeof (instr),
19583 "msr%s\tAPSR_nzcvqg, %%|lr", conditional);
19584 else
19585 snprintf (instr, sizeof (instr),
19586 "msr%s\tAPSR_nzcvq, %%|lr", conditional);
19588 output_asm_insn (instr, & operand);
19589 if (TARGET_HARD_FLOAT && !TARGET_THUMB1)
19591 /* Clear the cumulative exception-status bits (0-4,7) and the
19592 condition code bits (28-31) of the FPSCR. We need to
19593 remember to clear the first scratch register used (IP) and
19594 save and restore the second (r4). */
19595 snprintf (instr, sizeof (instr), "push\t{%%|r4}");
19596 output_asm_insn (instr, & operand);
19597 snprintf (instr, sizeof (instr), "vmrs\t%%|ip, fpscr");
19598 output_asm_insn (instr, & operand);
19599 snprintf (instr, sizeof (instr), "movw\t%%|r4, #65376");
19600 output_asm_insn (instr, & operand);
19601 snprintf (instr, sizeof (instr), "movt\t%%|r4, #4095");
19602 output_asm_insn (instr, & operand);
19603 snprintf (instr, sizeof (instr), "and\t%%|ip, %%|r4");
19604 output_asm_insn (instr, & operand);
19605 snprintf (instr, sizeof (instr), "vmsr\tfpscr, %%|ip");
19606 output_asm_insn (instr, & operand);
19607 snprintf (instr, sizeof (instr), "pop\t{%%|r4}");
19608 output_asm_insn (instr, & operand);
19609 snprintf (instr, sizeof (instr), "mov\t%%|ip, %%|lr");
19610 output_asm_insn (instr, & operand);
19612 snprintf (instr, sizeof (instr), "bxns\t%%|lr");
19614 /* Use bx if it's available. */
19615 else if (arm_arch5 || arm_arch4t)
19616 sprintf (instr, "bx%s\t%%|lr", conditional);
19617 else
19618 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19619 break;
19622 output_asm_insn (instr, & operand);
19625 return "";
19628 /* Output in FILE asm statements needed to declare the NAME of the function
19629 defined by its DECL node. */
19631 void
19632 arm_asm_declare_function_name (FILE *file, const char *name, tree decl)
19634 size_t cmse_name_len;
19635 char *cmse_name = 0;
19636 char cmse_prefix[] = "__acle_se_";
19638 /* When compiling with ARMv8-M Security Extensions enabled, we should print an
19639 extra function label for each function with the 'cmse_nonsecure_entry'
19640 attribute. This extra function label should be prepended with
19641 '__acle_se_', telling the linker that it needs to create secure gateway
19642 veneers for this function. */
19643 if (use_cmse && lookup_attribute ("cmse_nonsecure_entry",
19644 DECL_ATTRIBUTES (decl)))
19646 cmse_name_len = sizeof (cmse_prefix) + strlen (name);
19647 cmse_name = XALLOCAVEC (char, cmse_name_len);
19648 snprintf (cmse_name, cmse_name_len, "%s%s", cmse_prefix, name);
19649 targetm.asm_out.globalize_label (file, cmse_name);
19651 ARM_DECLARE_FUNCTION_NAME (file, cmse_name, decl);
19652 ASM_OUTPUT_TYPE_DIRECTIVE (file, cmse_name, "function");
19655 ARM_DECLARE_FUNCTION_NAME (file, name, decl);
19656 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
19657 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
19658 ASM_OUTPUT_LABEL (file, name);
19660 if (cmse_name)
19661 ASM_OUTPUT_LABEL (file, cmse_name);
19663 ARM_OUTPUT_FN_UNWIND (file, TRUE);
19666 /* Write the function name into the code section, directly preceding
19667 the function prologue.
19669 Code will be output similar to this:
19671 .ascii "arm_poke_function_name", 0
19672 .align
19674 .word 0xff000000 + (t1 - t0)
19675 arm_poke_function_name
19676 mov ip, sp
19677 stmfd sp!, {fp, ip, lr, pc}
19678 sub fp, ip, #4
19680 When performing a stack backtrace, code can inspect the value
19681 of 'pc' stored at 'fp' + 0. If the trace function then looks
19682 at location pc - 12 and the top 8 bits are set, then we know
19683 that there is a function name embedded immediately preceding this
19684 location and has length ((pc[-3]) & 0xff000000).
19686 We assume that pc is declared as a pointer to an unsigned long.
19688 It is of no benefit to output the function name if we are assembling
19689 a leaf function. These function types will not contain a stack
19690 backtrace structure, therefore it is not possible to determine the
19691 function name. */
19692 void
19693 arm_poke_function_name (FILE *stream, const char *name)
19695 unsigned long alignlength;
19696 unsigned long length;
19697 rtx x;
19699 length = strlen (name) + 1;
19700 alignlength = ROUND_UP_WORD (length);
19702 ASM_OUTPUT_ASCII (stream, name, length);
19703 ASM_OUTPUT_ALIGN (stream, 2);
19704 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19705 assemble_aligned_integer (UNITS_PER_WORD, x);
19708 /* Place some comments into the assembler stream
19709 describing the current function. */
19710 static void
19711 arm_output_function_prologue (FILE *f)
19713 unsigned long func_type;
19715 /* Sanity check. */
19716 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19718 func_type = arm_current_func_type ();
19720 switch ((int) ARM_FUNC_TYPE (func_type))
19722 default:
19723 case ARM_FT_NORMAL:
19724 break;
19725 case ARM_FT_INTERWORKED:
19726 asm_fprintf (f, "\t%@ Function supports interworking.\n");
19727 break;
19728 case ARM_FT_ISR:
19729 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19730 break;
19731 case ARM_FT_FIQ:
19732 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19733 break;
19734 case ARM_FT_EXCEPTION:
19735 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19736 break;
19739 if (IS_NAKED (func_type))
19740 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19742 if (IS_VOLATILE (func_type))
19743 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19745 if (IS_NESTED (func_type))
19746 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19747 if (IS_STACKALIGN (func_type))
19748 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19749 if (IS_CMSE_ENTRY (func_type))
19750 asm_fprintf (f, "\t%@ Non-secure entry function: called from non-secure code.\n");
19752 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19753 crtl->args.size,
19754 crtl->args.pretend_args_size,
19755 (HOST_WIDE_INT) get_frame_size ());
19757 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19758 frame_pointer_needed,
19759 cfun->machine->uses_anonymous_args);
19761 if (cfun->machine->lr_save_eliminated)
19762 asm_fprintf (f, "\t%@ link register save eliminated.\n");
19764 if (crtl->calls_eh_return)
19765 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19769 static void
19770 arm_output_function_epilogue (FILE *)
19772 arm_stack_offsets *offsets;
19774 if (TARGET_THUMB1)
19776 int regno;
19778 /* Emit any call-via-reg trampolines that are needed for v4t support
19779 of call_reg and call_value_reg type insns. */
19780 for (regno = 0; regno < LR_REGNUM; regno++)
19782 rtx label = cfun->machine->call_via[regno];
19784 if (label != NULL)
19786 switch_to_section (function_section (current_function_decl));
19787 targetm.asm_out.internal_label (asm_out_file, "L",
19788 CODE_LABEL_NUMBER (label));
19789 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19793 /* ??? Probably not safe to set this here, since it assumes that a
19794 function will be emitted as assembly immediately after we generate
19795 RTL for it. This does not happen for inline functions. */
19796 cfun->machine->return_used_this_function = 0;
19798 else /* TARGET_32BIT */
19800 /* We need to take into account any stack-frame rounding. */
19801 offsets = arm_get_frame_offsets ();
19803 gcc_assert (!use_return_insn (FALSE, NULL)
19804 || (cfun->machine->return_used_this_function != 0)
19805 || offsets->saved_regs == offsets->outgoing_args
19806 || frame_pointer_needed);
19810 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19811 STR and STRD. If an even number of registers are being pushed, one
19812 or more STRD patterns are created for each register pair. If an
19813 odd number of registers are pushed, emit an initial STR followed by
19814 as many STRD instructions as are needed. This works best when the
19815 stack is initially 64-bit aligned (the normal case), since it
19816 ensures that each STRD is also 64-bit aligned. */
19817 static void
19818 thumb2_emit_strd_push (unsigned long saved_regs_mask)
19820 int num_regs = 0;
19821 int i;
19822 int regno;
19823 rtx par = NULL_RTX;
19824 rtx dwarf = NULL_RTX;
19825 rtx tmp;
19826 bool first = true;
19828 num_regs = bit_count (saved_regs_mask);
19830 /* Must be at least one register to save, and can't save SP or PC. */
19831 gcc_assert (num_regs > 0 && num_regs <= 14);
19832 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19833 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19835 /* Create sequence for DWARF info. All the frame-related data for
19836 debugging is held in this wrapper. */
19837 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19839 /* Describe the stack adjustment. */
19840 tmp = gen_rtx_SET (stack_pointer_rtx,
19841 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19842 RTX_FRAME_RELATED_P (tmp) = 1;
19843 XVECEXP (dwarf, 0, 0) = tmp;
19845 /* Find the first register. */
19846 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19849 i = 0;
19851 /* If there's an odd number of registers to push. Start off by
19852 pushing a single register. This ensures that subsequent strd
19853 operations are dword aligned (assuming that SP was originally
19854 64-bit aligned). */
19855 if ((num_regs & 1) != 0)
19857 rtx reg, mem, insn;
19859 reg = gen_rtx_REG (SImode, regno);
19860 if (num_regs == 1)
19861 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19862 stack_pointer_rtx));
19863 else
19864 mem = gen_frame_mem (Pmode,
19865 gen_rtx_PRE_MODIFY
19866 (Pmode, stack_pointer_rtx,
19867 plus_constant (Pmode, stack_pointer_rtx,
19868 -4 * num_regs)));
19870 tmp = gen_rtx_SET (mem, reg);
19871 RTX_FRAME_RELATED_P (tmp) = 1;
19872 insn = emit_insn (tmp);
19873 RTX_FRAME_RELATED_P (insn) = 1;
19874 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19875 tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
19876 RTX_FRAME_RELATED_P (tmp) = 1;
19877 i++;
19878 regno++;
19879 XVECEXP (dwarf, 0, i) = tmp;
19880 first = false;
19883 while (i < num_regs)
19884 if (saved_regs_mask & (1 << regno))
19886 rtx reg1, reg2, mem1, mem2;
19887 rtx tmp0, tmp1, tmp2;
19888 int regno2;
19890 /* Find the register to pair with this one. */
19891 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
19892 regno2++)
19895 reg1 = gen_rtx_REG (SImode, regno);
19896 reg2 = gen_rtx_REG (SImode, regno2);
19898 if (first)
19900 rtx insn;
19902 first = false;
19903 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19904 stack_pointer_rtx,
19905 -4 * num_regs));
19906 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19907 stack_pointer_rtx,
19908 -4 * (num_regs - 1)));
19909 tmp0 = gen_rtx_SET (stack_pointer_rtx,
19910 plus_constant (Pmode, stack_pointer_rtx,
19911 -4 * (num_regs)));
19912 tmp1 = gen_rtx_SET (mem1, reg1);
19913 tmp2 = gen_rtx_SET (mem2, reg2);
19914 RTX_FRAME_RELATED_P (tmp0) = 1;
19915 RTX_FRAME_RELATED_P (tmp1) = 1;
19916 RTX_FRAME_RELATED_P (tmp2) = 1;
19917 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
19918 XVECEXP (par, 0, 0) = tmp0;
19919 XVECEXP (par, 0, 1) = tmp1;
19920 XVECEXP (par, 0, 2) = tmp2;
19921 insn = emit_insn (par);
19922 RTX_FRAME_RELATED_P (insn) = 1;
19923 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19925 else
19927 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19928 stack_pointer_rtx,
19929 4 * i));
19930 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19931 stack_pointer_rtx,
19932 4 * (i + 1)));
19933 tmp1 = gen_rtx_SET (mem1, reg1);
19934 tmp2 = gen_rtx_SET (mem2, reg2);
19935 RTX_FRAME_RELATED_P (tmp1) = 1;
19936 RTX_FRAME_RELATED_P (tmp2) = 1;
19937 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19938 XVECEXP (par, 0, 0) = tmp1;
19939 XVECEXP (par, 0, 1) = tmp2;
19940 emit_insn (par);
19943 /* Create unwind information. This is an approximation. */
19944 tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
19945 plus_constant (Pmode,
19946 stack_pointer_rtx,
19947 4 * i)),
19948 reg1);
19949 tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
19950 plus_constant (Pmode,
19951 stack_pointer_rtx,
19952 4 * (i + 1))),
19953 reg2);
19955 RTX_FRAME_RELATED_P (tmp1) = 1;
19956 RTX_FRAME_RELATED_P (tmp2) = 1;
19957 XVECEXP (dwarf, 0, i + 1) = tmp1;
19958 XVECEXP (dwarf, 0, i + 2) = tmp2;
19959 i += 2;
19960 regno = regno2 + 1;
19962 else
19963 regno++;
19965 return;
19968 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19969 whenever possible, otherwise it emits single-word stores. The first store
19970 also allocates stack space for all saved registers, using writeback with
19971 post-addressing mode. All other stores use offset addressing. If no STRD
19972 can be emitted, this function emits a sequence of single-word stores,
19973 and not an STM as before, because single-word stores provide more freedom
19974 scheduling and can be turned into an STM by peephole optimizations. */
19975 static void
19976 arm_emit_strd_push (unsigned long saved_regs_mask)
19978 int num_regs = 0;
19979 int i, j, dwarf_index = 0;
19980 int offset = 0;
19981 rtx dwarf = NULL_RTX;
19982 rtx insn = NULL_RTX;
19983 rtx tmp, mem;
19985 /* TODO: A more efficient code can be emitted by changing the
19986 layout, e.g., first push all pairs that can use STRD to keep the
19987 stack aligned, and then push all other registers. */
19988 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19989 if (saved_regs_mask & (1 << i))
19990 num_regs++;
19992 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19993 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19994 gcc_assert (num_regs > 0);
19996 /* Create sequence for DWARF info. */
19997 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19999 /* For dwarf info, we generate explicit stack update. */
20000 tmp = gen_rtx_SET (stack_pointer_rtx,
20001 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20002 RTX_FRAME_RELATED_P (tmp) = 1;
20003 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20005 /* Save registers. */
20006 offset = - 4 * num_regs;
20007 j = 0;
20008 while (j <= LAST_ARM_REGNUM)
20009 if (saved_regs_mask & (1 << j))
20011 if ((j % 2 == 0)
20012 && (saved_regs_mask & (1 << (j + 1))))
20014 /* Current register and previous register form register pair for
20015 which STRD can be generated. */
20016 if (offset < 0)
20018 /* Allocate stack space for all saved registers. */
20019 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20020 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20021 mem = gen_frame_mem (DImode, tmp);
20022 offset = 0;
20024 else if (offset > 0)
20025 mem = gen_frame_mem (DImode,
20026 plus_constant (Pmode,
20027 stack_pointer_rtx,
20028 offset));
20029 else
20030 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20032 tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
20033 RTX_FRAME_RELATED_P (tmp) = 1;
20034 tmp = emit_insn (tmp);
20036 /* Record the first store insn. */
20037 if (dwarf_index == 1)
20038 insn = tmp;
20040 /* Generate dwarf info. */
20041 mem = gen_frame_mem (SImode,
20042 plus_constant (Pmode,
20043 stack_pointer_rtx,
20044 offset));
20045 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20046 RTX_FRAME_RELATED_P (tmp) = 1;
20047 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20049 mem = gen_frame_mem (SImode,
20050 plus_constant (Pmode,
20051 stack_pointer_rtx,
20052 offset + 4));
20053 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
20054 RTX_FRAME_RELATED_P (tmp) = 1;
20055 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20057 offset += 8;
20058 j += 2;
20060 else
20062 /* Emit a single word store. */
20063 if (offset < 0)
20065 /* Allocate stack space for all saved registers. */
20066 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20067 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20068 mem = gen_frame_mem (SImode, tmp);
20069 offset = 0;
20071 else if (offset > 0)
20072 mem = gen_frame_mem (SImode,
20073 plus_constant (Pmode,
20074 stack_pointer_rtx,
20075 offset));
20076 else
20077 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20079 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20080 RTX_FRAME_RELATED_P (tmp) = 1;
20081 tmp = emit_insn (tmp);
20083 /* Record the first store insn. */
20084 if (dwarf_index == 1)
20085 insn = tmp;
20087 /* Generate dwarf info. */
20088 mem = gen_frame_mem (SImode,
20089 plus_constant(Pmode,
20090 stack_pointer_rtx,
20091 offset));
20092 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20093 RTX_FRAME_RELATED_P (tmp) = 1;
20094 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20096 offset += 4;
20097 j += 1;
20100 else
20101 j++;
20103 /* Attach dwarf info to the first insn we generate. */
20104 gcc_assert (insn != NULL_RTX);
20105 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20106 RTX_FRAME_RELATED_P (insn) = 1;
20109 /* Generate and emit an insn that we will recognize as a push_multi.
20110 Unfortunately, since this insn does not reflect very well the actual
20111 semantics of the operation, we need to annotate the insn for the benefit
20112 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
20113 MASK for registers that should be annotated for DWARF2 frame unwind
20114 information. */
20115 static rtx
20116 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
20118 int num_regs = 0;
20119 int num_dwarf_regs = 0;
20120 int i, j;
20121 rtx par;
20122 rtx dwarf;
20123 int dwarf_par_index;
20124 rtx tmp, reg;
20126 /* We don't record the PC in the dwarf frame information. */
20127 dwarf_regs_mask &= ~(1 << PC_REGNUM);
20129 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20131 if (mask & (1 << i))
20132 num_regs++;
20133 if (dwarf_regs_mask & (1 << i))
20134 num_dwarf_regs++;
20137 gcc_assert (num_regs && num_regs <= 16);
20138 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
20140 /* For the body of the insn we are going to generate an UNSPEC in
20141 parallel with several USEs. This allows the insn to be recognized
20142 by the push_multi pattern in the arm.md file.
20144 The body of the insn looks something like this:
20146 (parallel [
20147 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20148 (const_int:SI <num>)))
20149 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20150 (use (reg:SI XX))
20151 (use (reg:SI YY))
20155 For the frame note however, we try to be more explicit and actually
20156 show each register being stored into the stack frame, plus a (single)
20157 decrement of the stack pointer. We do it this way in order to be
20158 friendly to the stack unwinding code, which only wants to see a single
20159 stack decrement per instruction. The RTL we generate for the note looks
20160 something like this:
20162 (sequence [
20163 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20164 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20165 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20166 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20170 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20171 instead we'd have a parallel expression detailing all
20172 the stores to the various memory addresses so that debug
20173 information is more up-to-date. Remember however while writing
20174 this to take care of the constraints with the push instruction.
20176 Note also that this has to be taken care of for the VFP registers.
20178 For more see PR43399. */
20180 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20181 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20182 dwarf_par_index = 1;
20184 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20186 if (mask & (1 << i))
20188 reg = gen_rtx_REG (SImode, i);
20190 XVECEXP (par, 0, 0)
20191 = gen_rtx_SET (gen_frame_mem
20192 (BLKmode,
20193 gen_rtx_PRE_MODIFY (Pmode,
20194 stack_pointer_rtx,
20195 plus_constant
20196 (Pmode, stack_pointer_rtx,
20197 -4 * num_regs))
20199 gen_rtx_UNSPEC (BLKmode,
20200 gen_rtvec (1, reg),
20201 UNSPEC_PUSH_MULT));
20203 if (dwarf_regs_mask & (1 << i))
20205 tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
20206 reg);
20207 RTX_FRAME_RELATED_P (tmp) = 1;
20208 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20211 break;
20215 for (j = 1, i++; j < num_regs; i++)
20217 if (mask & (1 << i))
20219 reg = gen_rtx_REG (SImode, i);
20221 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20223 if (dwarf_regs_mask & (1 << i))
20226 = gen_rtx_SET (gen_frame_mem
20227 (SImode,
20228 plus_constant (Pmode, stack_pointer_rtx,
20229 4 * j)),
20230 reg);
20231 RTX_FRAME_RELATED_P (tmp) = 1;
20232 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20235 j++;
20239 par = emit_insn (par);
20241 tmp = gen_rtx_SET (stack_pointer_rtx,
20242 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20243 RTX_FRAME_RELATED_P (tmp) = 1;
20244 XVECEXP (dwarf, 0, 0) = tmp;
20246 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20248 return par;
20251 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20252 SIZE is the offset to be adjusted.
20253 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20254 static void
20255 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20257 rtx dwarf;
20259 RTX_FRAME_RELATED_P (insn) = 1;
20260 dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
20261 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20264 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20265 SAVED_REGS_MASK shows which registers need to be restored.
20267 Unfortunately, since this insn does not reflect very well the actual
20268 semantics of the operation, we need to annotate the insn for the benefit
20269 of DWARF2 frame unwind information. */
20270 static void
20271 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20273 int num_regs = 0;
20274 int i, j;
20275 rtx par;
20276 rtx dwarf = NULL_RTX;
20277 rtx tmp, reg;
20278 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20279 int offset_adj;
20280 int emit_update;
20282 offset_adj = return_in_pc ? 1 : 0;
20283 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20284 if (saved_regs_mask & (1 << i))
20285 num_regs++;
20287 gcc_assert (num_regs && num_regs <= 16);
20289 /* If SP is in reglist, then we don't emit SP update insn. */
20290 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20292 /* The parallel needs to hold num_regs SETs
20293 and one SET for the stack update. */
20294 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20296 if (return_in_pc)
20297 XVECEXP (par, 0, 0) = ret_rtx;
20299 if (emit_update)
20301 /* Increment the stack pointer, based on there being
20302 num_regs 4-byte registers to restore. */
20303 tmp = gen_rtx_SET (stack_pointer_rtx,
20304 plus_constant (Pmode,
20305 stack_pointer_rtx,
20306 4 * num_regs));
20307 RTX_FRAME_RELATED_P (tmp) = 1;
20308 XVECEXP (par, 0, offset_adj) = tmp;
20311 /* Now restore every reg, which may include PC. */
20312 for (j = 0, i = 0; j < num_regs; i++)
20313 if (saved_regs_mask & (1 << i))
20315 reg = gen_rtx_REG (SImode, i);
20316 if ((num_regs == 1) && emit_update && !return_in_pc)
20318 /* Emit single load with writeback. */
20319 tmp = gen_frame_mem (SImode,
20320 gen_rtx_POST_INC (Pmode,
20321 stack_pointer_rtx));
20322 tmp = emit_insn (gen_rtx_SET (reg, tmp));
20323 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20324 return;
20327 tmp = gen_rtx_SET (reg,
20328 gen_frame_mem
20329 (SImode,
20330 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20331 RTX_FRAME_RELATED_P (tmp) = 1;
20332 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20334 /* We need to maintain a sequence for DWARF info too. As dwarf info
20335 should not have PC, skip PC. */
20336 if (i != PC_REGNUM)
20337 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20339 j++;
20342 if (return_in_pc)
20343 par = emit_jump_insn (par);
20344 else
20345 par = emit_insn (par);
20347 REG_NOTES (par) = dwarf;
20348 if (!return_in_pc)
20349 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20350 stack_pointer_rtx, stack_pointer_rtx);
20353 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20354 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20356 Unfortunately, since this insn does not reflect very well the actual
20357 semantics of the operation, we need to annotate the insn for the benefit
20358 of DWARF2 frame unwind information. */
20359 static void
20360 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20362 int i, j;
20363 rtx par;
20364 rtx dwarf = NULL_RTX;
20365 rtx tmp, reg;
20367 gcc_assert (num_regs && num_regs <= 32);
20369 /* Workaround ARM10 VFPr1 bug. */
20370 if (num_regs == 2 && !arm_arch6)
20372 if (first_reg == 15)
20373 first_reg--;
20375 num_regs++;
20378 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20379 there could be up to 32 D-registers to restore.
20380 If there are more than 16 D-registers, make two recursive calls,
20381 each of which emits one pop_multi instruction. */
20382 if (num_regs > 16)
20384 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20385 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20386 return;
20389 /* The parallel needs to hold num_regs SETs
20390 and one SET for the stack update. */
20391 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20393 /* Increment the stack pointer, based on there being
20394 num_regs 8-byte registers to restore. */
20395 tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
20396 RTX_FRAME_RELATED_P (tmp) = 1;
20397 XVECEXP (par, 0, 0) = tmp;
20399 /* Now show every reg that will be restored, using a SET for each. */
20400 for (j = 0, i=first_reg; j < num_regs; i += 2)
20402 reg = gen_rtx_REG (DFmode, i);
20404 tmp = gen_rtx_SET (reg,
20405 gen_frame_mem
20406 (DFmode,
20407 plus_constant (Pmode, base_reg, 8 * j)));
20408 RTX_FRAME_RELATED_P (tmp) = 1;
20409 XVECEXP (par, 0, j + 1) = tmp;
20411 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20413 j++;
20416 par = emit_insn (par);
20417 REG_NOTES (par) = dwarf;
20419 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20420 if (REGNO (base_reg) == IP_REGNUM)
20422 RTX_FRAME_RELATED_P (par) = 1;
20423 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20425 else
20426 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20427 base_reg, base_reg);
20430 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20431 number of registers are being popped, multiple LDRD patterns are created for
20432 all register pairs. If odd number of registers are popped, last register is
20433 loaded by using LDR pattern. */
20434 static void
20435 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20437 int num_regs = 0;
20438 int i, j;
20439 rtx par = NULL_RTX;
20440 rtx dwarf = NULL_RTX;
20441 rtx tmp, reg, tmp1;
20442 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20444 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20445 if (saved_regs_mask & (1 << i))
20446 num_regs++;
20448 gcc_assert (num_regs && num_regs <= 16);
20450 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20451 to be popped. So, if num_regs is even, now it will become odd,
20452 and we can generate pop with PC. If num_regs is odd, it will be
20453 even now, and ldr with return can be generated for PC. */
20454 if (return_in_pc)
20455 num_regs--;
20457 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20459 /* Var j iterates over all the registers to gather all the registers in
20460 saved_regs_mask. Var i gives index of saved registers in stack frame.
20461 A PARALLEL RTX of register-pair is created here, so that pattern for
20462 LDRD can be matched. As PC is always last register to be popped, and
20463 we have already decremented num_regs if PC, we don't have to worry
20464 about PC in this loop. */
20465 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20466 if (saved_regs_mask & (1 << j))
20468 /* Create RTX for memory load. */
20469 reg = gen_rtx_REG (SImode, j);
20470 tmp = gen_rtx_SET (reg,
20471 gen_frame_mem (SImode,
20472 plus_constant (Pmode,
20473 stack_pointer_rtx, 4 * i)));
20474 RTX_FRAME_RELATED_P (tmp) = 1;
20476 if (i % 2 == 0)
20478 /* When saved-register index (i) is even, the RTX to be emitted is
20479 yet to be created. Hence create it first. The LDRD pattern we
20480 are generating is :
20481 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20482 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20483 where target registers need not be consecutive. */
20484 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20485 dwarf = NULL_RTX;
20488 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20489 added as 0th element and if i is odd, reg_i is added as 1st element
20490 of LDRD pattern shown above. */
20491 XVECEXP (par, 0, (i % 2)) = tmp;
20492 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20494 if ((i % 2) == 1)
20496 /* When saved-register index (i) is odd, RTXs for both the registers
20497 to be loaded are generated in above given LDRD pattern, and the
20498 pattern can be emitted now. */
20499 par = emit_insn (par);
20500 REG_NOTES (par) = dwarf;
20501 RTX_FRAME_RELATED_P (par) = 1;
20504 i++;
20507 /* If the number of registers pushed is odd AND return_in_pc is false OR
20508 number of registers are even AND return_in_pc is true, last register is
20509 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20510 then LDR with post increment. */
20512 /* Increment the stack pointer, based on there being
20513 num_regs 4-byte registers to restore. */
20514 tmp = gen_rtx_SET (stack_pointer_rtx,
20515 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20516 RTX_FRAME_RELATED_P (tmp) = 1;
20517 tmp = emit_insn (tmp);
20518 if (!return_in_pc)
20520 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20521 stack_pointer_rtx, stack_pointer_rtx);
20524 dwarf = NULL_RTX;
20526 if (((num_regs % 2) == 1 && !return_in_pc)
20527 || ((num_regs % 2) == 0 && return_in_pc))
20529 /* Scan for the single register to be popped. Skip until the saved
20530 register is found. */
20531 for (; (saved_regs_mask & (1 << j)) == 0; j++);
20533 /* Gen LDR with post increment here. */
20534 tmp1 = gen_rtx_MEM (SImode,
20535 gen_rtx_POST_INC (SImode,
20536 stack_pointer_rtx));
20537 set_mem_alias_set (tmp1, get_frame_alias_set ());
20539 reg = gen_rtx_REG (SImode, j);
20540 tmp = gen_rtx_SET (reg, tmp1);
20541 RTX_FRAME_RELATED_P (tmp) = 1;
20542 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20544 if (return_in_pc)
20546 /* If return_in_pc, j must be PC_REGNUM. */
20547 gcc_assert (j == PC_REGNUM);
20548 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20549 XVECEXP (par, 0, 0) = ret_rtx;
20550 XVECEXP (par, 0, 1) = tmp;
20551 par = emit_jump_insn (par);
20553 else
20555 par = emit_insn (tmp);
20556 REG_NOTES (par) = dwarf;
20557 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20558 stack_pointer_rtx, stack_pointer_rtx);
20562 else if ((num_regs % 2) == 1 && return_in_pc)
20564 /* There are 2 registers to be popped. So, generate the pattern
20565 pop_multiple_with_stack_update_and_return to pop in PC. */
20566 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20569 return;
20572 /* LDRD in ARM mode needs consecutive registers as operands. This function
20573 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20574 offset addressing and then generates one separate stack udpate. This provides
20575 more scheduling freedom, compared to writeback on every load. However,
20576 if the function returns using load into PC directly
20577 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20578 before the last load. TODO: Add a peephole optimization to recognize
20579 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20580 peephole optimization to merge the load at stack-offset zero
20581 with the stack update instruction using load with writeback
20582 in post-index addressing mode. */
20583 static void
20584 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20586 int j = 0;
20587 int offset = 0;
20588 rtx par = NULL_RTX;
20589 rtx dwarf = NULL_RTX;
20590 rtx tmp, mem;
20592 /* Restore saved registers. */
20593 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20594 j = 0;
20595 while (j <= LAST_ARM_REGNUM)
20596 if (saved_regs_mask & (1 << j))
20598 if ((j % 2) == 0
20599 && (saved_regs_mask & (1 << (j + 1)))
20600 && (j + 1) != PC_REGNUM)
20602 /* Current register and next register form register pair for which
20603 LDRD can be generated. PC is always the last register popped, and
20604 we handle it separately. */
20605 if (offset > 0)
20606 mem = gen_frame_mem (DImode,
20607 plus_constant (Pmode,
20608 stack_pointer_rtx,
20609 offset));
20610 else
20611 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20613 tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
20614 tmp = emit_insn (tmp);
20615 RTX_FRAME_RELATED_P (tmp) = 1;
20617 /* Generate dwarf info. */
20619 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20620 gen_rtx_REG (SImode, j),
20621 NULL_RTX);
20622 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20623 gen_rtx_REG (SImode, j + 1),
20624 dwarf);
20626 REG_NOTES (tmp) = dwarf;
20628 offset += 8;
20629 j += 2;
20631 else if (j != PC_REGNUM)
20633 /* Emit a single word load. */
20634 if (offset > 0)
20635 mem = gen_frame_mem (SImode,
20636 plus_constant (Pmode,
20637 stack_pointer_rtx,
20638 offset));
20639 else
20640 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20642 tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
20643 tmp = emit_insn (tmp);
20644 RTX_FRAME_RELATED_P (tmp) = 1;
20646 /* Generate dwarf info. */
20647 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20648 gen_rtx_REG (SImode, j),
20649 NULL_RTX);
20651 offset += 4;
20652 j += 1;
20654 else /* j == PC_REGNUM */
20655 j++;
20657 else
20658 j++;
20660 /* Update the stack. */
20661 if (offset > 0)
20663 tmp = gen_rtx_SET (stack_pointer_rtx,
20664 plus_constant (Pmode,
20665 stack_pointer_rtx,
20666 offset));
20667 tmp = emit_insn (tmp);
20668 arm_add_cfa_adjust_cfa_note (tmp, offset,
20669 stack_pointer_rtx, stack_pointer_rtx);
20670 offset = 0;
20673 if (saved_regs_mask & (1 << PC_REGNUM))
20675 /* Only PC is to be popped. */
20676 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20677 XVECEXP (par, 0, 0) = ret_rtx;
20678 tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
20679 gen_frame_mem (SImode,
20680 gen_rtx_POST_INC (SImode,
20681 stack_pointer_rtx)));
20682 RTX_FRAME_RELATED_P (tmp) = 1;
20683 XVECEXP (par, 0, 1) = tmp;
20684 par = emit_jump_insn (par);
20686 /* Generate dwarf info. */
20687 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20688 gen_rtx_REG (SImode, PC_REGNUM),
20689 NULL_RTX);
20690 REG_NOTES (par) = dwarf;
20691 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20692 stack_pointer_rtx, stack_pointer_rtx);
20696 /* Calculate the size of the return value that is passed in registers. */
20697 static unsigned
20698 arm_size_return_regs (void)
20700 machine_mode mode;
20702 if (crtl->return_rtx != 0)
20703 mode = GET_MODE (crtl->return_rtx);
20704 else
20705 mode = DECL_MODE (DECL_RESULT (current_function_decl));
20707 return GET_MODE_SIZE (mode);
20710 /* Return true if the current function needs to save/restore LR. */
20711 static bool
20712 thumb_force_lr_save (void)
20714 return !cfun->machine->lr_save_eliminated
20715 && (!crtl->is_leaf
20716 || thumb_far_jump_used_p ()
20717 || df_regs_ever_live_p (LR_REGNUM));
20720 /* We do not know if r3 will be available because
20721 we do have an indirect tailcall happening in this
20722 particular case. */
20723 static bool
20724 is_indirect_tailcall_p (rtx call)
20726 rtx pat = PATTERN (call);
20728 /* Indirect tail call. */
20729 pat = XVECEXP (pat, 0, 0);
20730 if (GET_CODE (pat) == SET)
20731 pat = SET_SRC (pat);
20733 pat = XEXP (XEXP (pat, 0), 0);
20734 return REG_P (pat);
20737 /* Return true if r3 is used by any of the tail call insns in the
20738 current function. */
20739 static bool
20740 any_sibcall_could_use_r3 (void)
20742 edge_iterator ei;
20743 edge e;
20745 if (!crtl->tail_call_emit)
20746 return false;
20747 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20748 if (e->flags & EDGE_SIBCALL)
20750 rtx_insn *call = BB_END (e->src);
20751 if (!CALL_P (call))
20752 call = prev_nonnote_nondebug_insn (call);
20753 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20754 if (find_regno_fusage (call, USE, 3)
20755 || is_indirect_tailcall_p (call))
20756 return true;
20758 return false;
20762 /* Compute the distance from register FROM to register TO.
20763 These can be the arg pointer (26), the soft frame pointer (25),
20764 the stack pointer (13) or the hard frame pointer (11).
20765 In thumb mode r7 is used as the soft frame pointer, if needed.
20766 Typical stack layout looks like this:
20768 old stack pointer -> | |
20769 ----
20770 | | \
20771 | | saved arguments for
20772 | | vararg functions
20773 | | /
20775 hard FP & arg pointer -> | | \
20776 | | stack
20777 | | frame
20778 | | /
20780 | | \
20781 | | call saved
20782 | | registers
20783 soft frame pointer -> | | /
20785 | | \
20786 | | local
20787 | | variables
20788 locals base pointer -> | | /
20790 | | \
20791 | | outgoing
20792 | | arguments
20793 current stack pointer -> | | /
20796 For a given function some or all of these stack components
20797 may not be needed, giving rise to the possibility of
20798 eliminating some of the registers.
20800 The values returned by this function must reflect the behavior
20801 of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
20803 The sign of the number returned reflects the direction of stack
20804 growth, so the values are positive for all eliminations except
20805 from the soft frame pointer to the hard frame pointer.
20807 SFP may point just inside the local variables block to ensure correct
20808 alignment. */
20811 /* Return cached stack offsets. */
20813 static arm_stack_offsets *
20814 arm_get_frame_offsets (void)
20816 struct arm_stack_offsets *offsets;
20818 offsets = &cfun->machine->stack_offsets;
20820 return offsets;
20824 /* Calculate stack offsets. These are used to calculate register elimination
20825 offsets and in prologue/epilogue code. Also calculates which registers
20826 should be saved. */
20828 static void
20829 arm_compute_frame_layout (void)
20831 struct arm_stack_offsets *offsets;
20832 unsigned long func_type;
20833 int saved;
20834 int core_saved;
20835 HOST_WIDE_INT frame_size;
20836 int i;
20838 offsets = &cfun->machine->stack_offsets;
20840 /* Initially this is the size of the local variables. It will translated
20841 into an offset once we have determined the size of preceding data. */
20842 frame_size = ROUND_UP_WORD (get_frame_size ());
20844 /* Space for variadic functions. */
20845 offsets->saved_args = crtl->args.pretend_args_size;
20847 /* In Thumb mode this is incorrect, but never used. */
20848 offsets->frame
20849 = (offsets->saved_args
20850 + arm_compute_static_chain_stack_bytes ()
20851 + (frame_pointer_needed ? 4 : 0));
20853 if (TARGET_32BIT)
20855 unsigned int regno;
20857 offsets->saved_regs_mask = arm_compute_save_core_reg_mask ();
20858 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20859 saved = core_saved;
20861 /* We know that SP will be doubleword aligned on entry, and we must
20862 preserve that condition at any subroutine call. We also require the
20863 soft frame pointer to be doubleword aligned. */
20865 if (TARGET_REALLY_IWMMXT)
20867 /* Check for the call-saved iWMMXt registers. */
20868 for (regno = FIRST_IWMMXT_REGNUM;
20869 regno <= LAST_IWMMXT_REGNUM;
20870 regno++)
20871 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20872 saved += 8;
20875 func_type = arm_current_func_type ();
20876 /* Space for saved VFP registers. */
20877 if (! IS_VOLATILE (func_type)
20878 && TARGET_HARD_FLOAT)
20879 saved += arm_get_vfp_saved_size ();
20881 else /* TARGET_THUMB1 */
20883 offsets->saved_regs_mask = thumb1_compute_save_core_reg_mask ();
20884 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20885 saved = core_saved;
20886 if (TARGET_BACKTRACE)
20887 saved += 16;
20890 /* Saved registers include the stack frame. */
20891 offsets->saved_regs
20892 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
20893 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
20895 /* A leaf function does not need any stack alignment if it has nothing
20896 on the stack. */
20897 if (crtl->is_leaf && frame_size == 0
20898 /* However if it calls alloca(), we have a dynamically allocated
20899 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20900 && ! cfun->calls_alloca)
20902 offsets->outgoing_args = offsets->soft_frame;
20903 offsets->locals_base = offsets->soft_frame;
20904 return;
20907 /* Ensure SFP has the correct alignment. */
20908 if (ARM_DOUBLEWORD_ALIGN
20909 && (offsets->soft_frame & 7))
20911 offsets->soft_frame += 4;
20912 /* Try to align stack by pushing an extra reg. Don't bother doing this
20913 when there is a stack frame as the alignment will be rolled into
20914 the normal stack adjustment. */
20915 if (frame_size + crtl->outgoing_args_size == 0)
20917 int reg = -1;
20919 /* Register r3 is caller-saved. Normally it does not need to be
20920 saved on entry by the prologue. However if we choose to save
20921 it for padding then we may confuse the compiler into thinking
20922 a prologue sequence is required when in fact it is not. This
20923 will occur when shrink-wrapping if r3 is used as a scratch
20924 register and there are no other callee-saved writes.
20926 This situation can be avoided when other callee-saved registers
20927 are available and r3 is not mandatory if we choose a callee-saved
20928 register for padding. */
20929 bool prefer_callee_reg_p = false;
20931 /* If it is safe to use r3, then do so. This sometimes
20932 generates better code on Thumb-2 by avoiding the need to
20933 use 32-bit push/pop instructions. */
20934 if (! any_sibcall_could_use_r3 ()
20935 && arm_size_return_regs () <= 12
20936 && (offsets->saved_regs_mask & (1 << 3)) == 0
20937 && (TARGET_THUMB2
20938 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
20940 reg = 3;
20941 if (!TARGET_THUMB2)
20942 prefer_callee_reg_p = true;
20944 if (reg == -1
20945 || prefer_callee_reg_p)
20947 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
20949 /* Avoid fixed registers; they may be changed at
20950 arbitrary times so it's unsafe to restore them
20951 during the epilogue. */
20952 if (!fixed_regs[i]
20953 && (offsets->saved_regs_mask & (1 << i)) == 0)
20955 reg = i;
20956 break;
20961 if (reg != -1)
20963 offsets->saved_regs += 4;
20964 offsets->saved_regs_mask |= (1 << reg);
20969 offsets->locals_base = offsets->soft_frame + frame_size;
20970 offsets->outgoing_args = (offsets->locals_base
20971 + crtl->outgoing_args_size);
20973 if (ARM_DOUBLEWORD_ALIGN)
20975 /* Ensure SP remains doubleword aligned. */
20976 if (offsets->outgoing_args & 7)
20977 offsets->outgoing_args += 4;
20978 gcc_assert (!(offsets->outgoing_args & 7));
20983 /* Calculate the relative offsets for the different stack pointers. Positive
20984 offsets are in the direction of stack growth. */
20986 HOST_WIDE_INT
20987 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20989 arm_stack_offsets *offsets;
20991 offsets = arm_get_frame_offsets ();
20993 /* OK, now we have enough information to compute the distances.
20994 There must be an entry in these switch tables for each pair
20995 of registers in ELIMINABLE_REGS, even if some of the entries
20996 seem to be redundant or useless. */
20997 switch (from)
20999 case ARG_POINTER_REGNUM:
21000 switch (to)
21002 case THUMB_HARD_FRAME_POINTER_REGNUM:
21003 return 0;
21005 case FRAME_POINTER_REGNUM:
21006 /* This is the reverse of the soft frame pointer
21007 to hard frame pointer elimination below. */
21008 return offsets->soft_frame - offsets->saved_args;
21010 case ARM_HARD_FRAME_POINTER_REGNUM:
21011 /* This is only non-zero in the case where the static chain register
21012 is stored above the frame. */
21013 return offsets->frame - offsets->saved_args - 4;
21015 case STACK_POINTER_REGNUM:
21016 /* If nothing has been pushed on the stack at all
21017 then this will return -4. This *is* correct! */
21018 return offsets->outgoing_args - (offsets->saved_args + 4);
21020 default:
21021 gcc_unreachable ();
21023 gcc_unreachable ();
21025 case FRAME_POINTER_REGNUM:
21026 switch (to)
21028 case THUMB_HARD_FRAME_POINTER_REGNUM:
21029 return 0;
21031 case ARM_HARD_FRAME_POINTER_REGNUM:
21032 /* The hard frame pointer points to the top entry in the
21033 stack frame. The soft frame pointer to the bottom entry
21034 in the stack frame. If there is no stack frame at all,
21035 then they are identical. */
21037 return offsets->frame - offsets->soft_frame;
21039 case STACK_POINTER_REGNUM:
21040 return offsets->outgoing_args - offsets->soft_frame;
21042 default:
21043 gcc_unreachable ();
21045 gcc_unreachable ();
21047 default:
21048 /* You cannot eliminate from the stack pointer.
21049 In theory you could eliminate from the hard frame
21050 pointer to the stack pointer, but this will never
21051 happen, since if a stack frame is not needed the
21052 hard frame pointer will never be used. */
21053 gcc_unreachable ();
21057 /* Given FROM and TO register numbers, say whether this elimination is
21058 allowed. Frame pointer elimination is automatically handled.
21060 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
21061 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
21062 pointer, we must eliminate FRAME_POINTER_REGNUM into
21063 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
21064 ARG_POINTER_REGNUM. */
21066 bool
21067 arm_can_eliminate (const int from, const int to)
21069 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
21070 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
21071 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
21072 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
21073 true);
21076 /* Emit RTL to save coprocessor registers on function entry. Returns the
21077 number of bytes pushed. */
21079 static int
21080 arm_save_coproc_regs(void)
21082 int saved_size = 0;
21083 unsigned reg;
21084 unsigned start_reg;
21085 rtx insn;
21087 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
21088 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
21090 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21091 insn = gen_rtx_MEM (V2SImode, insn);
21092 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
21093 RTX_FRAME_RELATED_P (insn) = 1;
21094 saved_size += 8;
21097 if (TARGET_HARD_FLOAT)
21099 start_reg = FIRST_VFP_REGNUM;
21101 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
21103 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
21104 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
21106 if (start_reg != reg)
21107 saved_size += vfp_emit_fstmd (start_reg,
21108 (reg - start_reg) / 2);
21109 start_reg = reg + 2;
21112 if (start_reg != reg)
21113 saved_size += vfp_emit_fstmd (start_reg,
21114 (reg - start_reg) / 2);
21116 return saved_size;
21120 /* Set the Thumb frame pointer from the stack pointer. */
21122 static void
21123 thumb_set_frame_pointer (arm_stack_offsets *offsets)
21125 HOST_WIDE_INT amount;
21126 rtx insn, dwarf;
21128 amount = offsets->outgoing_args - offsets->locals_base;
21129 if (amount < 1024)
21130 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21131 stack_pointer_rtx, GEN_INT (amount)));
21132 else
21134 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21135 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21136 expects the first two operands to be the same. */
21137 if (TARGET_THUMB2)
21139 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21140 stack_pointer_rtx,
21141 hard_frame_pointer_rtx));
21143 else
21145 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21146 hard_frame_pointer_rtx,
21147 stack_pointer_rtx));
21149 dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
21150 plus_constant (Pmode, stack_pointer_rtx, amount));
21151 RTX_FRAME_RELATED_P (dwarf) = 1;
21152 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21155 RTX_FRAME_RELATED_P (insn) = 1;
21158 struct scratch_reg {
21159 rtx reg;
21160 bool saved;
21163 /* Return a short-lived scratch register for use as a 2nd scratch register on
21164 function entry after the registers are saved in the prologue. This register
21165 must be released by means of release_scratch_register_on_entry. IP is not
21166 considered since it is always used as the 1st scratch register if available.
21168 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
21169 mask of live registers. */
21171 static void
21172 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
21173 unsigned long live_regs)
21175 int regno = -1;
21177 sr->saved = false;
21179 if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
21180 regno = LR_REGNUM;
21181 else
21183 unsigned int i;
21185 for (i = 4; i < 11; i++)
21186 if (regno1 != i && (live_regs & (1 << i)) != 0)
21188 regno = i;
21189 break;
21192 if (regno < 0)
21194 /* If IP is used as the 1st scratch register for a nested function,
21195 then either r3 wasn't available or is used to preserve IP. */
21196 if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
21197 regno1 = 3;
21198 regno = (regno1 == 3 ? 2 : 3);
21199 sr->saved
21200 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
21201 regno);
21205 sr->reg = gen_rtx_REG (SImode, regno);
21206 if (sr->saved)
21208 rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21209 rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
21210 rtx x = gen_rtx_SET (stack_pointer_rtx,
21211 plus_constant (Pmode, stack_pointer_rtx, -4));
21212 RTX_FRAME_RELATED_P (insn) = 1;
21213 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21217 /* Release a scratch register obtained from the preceding function. */
21219 static void
21220 release_scratch_register_on_entry (struct scratch_reg *sr)
21222 if (sr->saved)
21224 rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
21225 rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
21226 rtx x = gen_rtx_SET (stack_pointer_rtx,
21227 plus_constant (Pmode, stack_pointer_rtx, 4));
21228 RTX_FRAME_RELATED_P (insn) = 1;
21229 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21233 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
21235 #if PROBE_INTERVAL > 4096
21236 #error Cannot use indexed addressing mode for stack probing
21237 #endif
21239 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
21240 inclusive. These are offsets from the current stack pointer. REGNO1
21241 is the index number of the 1st scratch register and LIVE_REGS is the
21242 mask of live registers. */
21244 static void
21245 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
21246 unsigned int regno1, unsigned long live_regs)
21248 rtx reg1 = gen_rtx_REG (Pmode, regno1);
21250 /* See if we have a constant small number of probes to generate. If so,
21251 that's the easy case. */
21252 if (size <= PROBE_INTERVAL)
21254 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21255 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21256 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
21259 /* The run-time loop is made up of 10 insns in the generic case while the
21260 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
21261 else if (size <= 5 * PROBE_INTERVAL)
21263 HOST_WIDE_INT i, rem;
21265 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21266 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21267 emit_stack_probe (reg1);
21269 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
21270 it exceeds SIZE. If only two probes are needed, this will not
21271 generate any code. Then probe at FIRST + SIZE. */
21272 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
21274 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21275 emit_stack_probe (reg1);
21278 rem = size - (i - PROBE_INTERVAL);
21279 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21281 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21282 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
21284 else
21285 emit_stack_probe (plus_constant (Pmode, reg1, -rem));
21288 /* Otherwise, do the same as above, but in a loop. Note that we must be
21289 extra careful with variables wrapping around because we might be at
21290 the very top (or the very bottom) of the address space and we have
21291 to be able to handle this case properly; in particular, we use an
21292 equality test for the loop condition. */
21293 else
21295 HOST_WIDE_INT rounded_size;
21296 struct scratch_reg sr;
21298 get_scratch_register_on_entry (&sr, regno1, live_regs);
21300 emit_move_insn (reg1, GEN_INT (first));
21303 /* Step 1: round SIZE to the previous multiple of the interval. */
21305 rounded_size = size & -PROBE_INTERVAL;
21306 emit_move_insn (sr.reg, GEN_INT (rounded_size));
21309 /* Step 2: compute initial and final value of the loop counter. */
21311 /* TEST_ADDR = SP + FIRST. */
21312 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21314 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
21315 emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
21318 /* Step 3: the loop
21322 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
21323 probe at TEST_ADDR
21325 while (TEST_ADDR != LAST_ADDR)
21327 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
21328 until it is equal to ROUNDED_SIZE. */
21330 emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
21333 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
21334 that SIZE is equal to ROUNDED_SIZE. */
21336 if (size != rounded_size)
21338 HOST_WIDE_INT rem = size - rounded_size;
21340 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21342 emit_set_insn (sr.reg,
21343 plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
21344 emit_stack_probe (plus_constant (Pmode, sr.reg,
21345 PROBE_INTERVAL - rem));
21347 else
21348 emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
21351 release_scratch_register_on_entry (&sr);
21354 /* Make sure nothing is scheduled before we are done. */
21355 emit_insn (gen_blockage ());
21358 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
21359 absolute addresses. */
21361 const char *
21362 output_probe_stack_range (rtx reg1, rtx reg2)
21364 static int labelno = 0;
21365 char loop_lab[32];
21366 rtx xops[2];
21368 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
21370 /* Loop. */
21371 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
21373 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
21374 xops[0] = reg1;
21375 xops[1] = GEN_INT (PROBE_INTERVAL);
21376 output_asm_insn ("sub\t%0, %0, %1", xops);
21378 /* Probe at TEST_ADDR. */
21379 output_asm_insn ("str\tr0, [%0, #0]", xops);
21381 /* Test if TEST_ADDR == LAST_ADDR. */
21382 xops[1] = reg2;
21383 output_asm_insn ("cmp\t%0, %1", xops);
21385 /* Branch. */
21386 fputs ("\tbne\t", asm_out_file);
21387 assemble_name_raw (asm_out_file, loop_lab);
21388 fputc ('\n', asm_out_file);
21390 return "";
21393 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21394 function. */
21395 void
21396 arm_expand_prologue (void)
21398 rtx amount;
21399 rtx insn;
21400 rtx ip_rtx;
21401 unsigned long live_regs_mask;
21402 unsigned long func_type;
21403 int fp_offset = 0;
21404 int saved_pretend_args = 0;
21405 int saved_regs = 0;
21406 unsigned HOST_WIDE_INT args_to_push;
21407 HOST_WIDE_INT size;
21408 arm_stack_offsets *offsets;
21409 bool clobber_ip;
21411 func_type = arm_current_func_type ();
21413 /* Naked functions don't have prologues. */
21414 if (IS_NAKED (func_type))
21416 if (flag_stack_usage_info)
21417 current_function_static_stack_size = 0;
21418 return;
21421 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21422 args_to_push = crtl->args.pretend_args_size;
21424 /* Compute which register we will have to save onto the stack. */
21425 offsets = arm_get_frame_offsets ();
21426 live_regs_mask = offsets->saved_regs_mask;
21428 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21430 if (IS_STACKALIGN (func_type))
21432 rtx r0, r1;
21434 /* Handle a word-aligned stack pointer. We generate the following:
21436 mov r0, sp
21437 bic r1, r0, #7
21438 mov sp, r1
21439 <save and restore r0 in normal prologue/epilogue>
21440 mov sp, r0
21441 bx lr
21443 The unwinder doesn't need to know about the stack realignment.
21444 Just tell it we saved SP in r0. */
21445 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21447 r0 = gen_rtx_REG (SImode, R0_REGNUM);
21448 r1 = gen_rtx_REG (SImode, R1_REGNUM);
21450 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21451 RTX_FRAME_RELATED_P (insn) = 1;
21452 add_reg_note (insn, REG_CFA_REGISTER, NULL);
21454 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21456 /* ??? The CFA changes here, which may cause GDB to conclude that it
21457 has entered a different function. That said, the unwind info is
21458 correct, individually, before and after this instruction because
21459 we've described the save of SP, which will override the default
21460 handling of SP as restoring from the CFA. */
21461 emit_insn (gen_movsi (stack_pointer_rtx, r1));
21464 /* The static chain register is the same as the IP register. If it is
21465 clobbered when creating the frame, we need to save and restore it. */
21466 clobber_ip = IS_NESTED (func_type)
21467 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21468 || (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21469 && !df_regs_ever_live_p (LR_REGNUM)
21470 && arm_r3_live_at_start_p ()));
21472 /* Find somewhere to store IP whilst the frame is being created.
21473 We try the following places in order:
21475 1. The last argument register r3 if it is available.
21476 2. A slot on the stack above the frame if there are no
21477 arguments to push onto the stack.
21478 3. Register r3 again, after pushing the argument registers
21479 onto the stack, if this is a varargs function.
21480 4. The last slot on the stack created for the arguments to
21481 push, if this isn't a varargs function.
21483 Note - we only need to tell the dwarf2 backend about the SP
21484 adjustment in the second variant; the static chain register
21485 doesn't need to be unwound, as it doesn't contain a value
21486 inherited from the caller. */
21487 if (clobber_ip)
21489 if (!arm_r3_live_at_start_p ())
21490 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21491 else if (args_to_push == 0)
21493 rtx addr, dwarf;
21495 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21496 saved_regs += 4;
21498 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21499 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21500 fp_offset = 4;
21502 /* Just tell the dwarf backend that we adjusted SP. */
21503 dwarf = gen_rtx_SET (stack_pointer_rtx,
21504 plus_constant (Pmode, stack_pointer_rtx,
21505 -fp_offset));
21506 RTX_FRAME_RELATED_P (insn) = 1;
21507 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21509 else
21511 /* Store the args on the stack. */
21512 if (cfun->machine->uses_anonymous_args)
21514 insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21515 (0xf0 >> (args_to_push / 4)) & 0xf);
21516 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21517 saved_pretend_args = 1;
21519 else
21521 rtx addr, dwarf;
21523 if (args_to_push == 4)
21524 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21525 else
21526 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21527 plus_constant (Pmode,
21528 stack_pointer_rtx,
21529 -args_to_push));
21531 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21533 /* Just tell the dwarf backend that we adjusted SP. */
21534 dwarf = gen_rtx_SET (stack_pointer_rtx,
21535 plus_constant (Pmode, stack_pointer_rtx,
21536 -args_to_push));
21537 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21540 RTX_FRAME_RELATED_P (insn) = 1;
21541 fp_offset = args_to_push;
21542 args_to_push = 0;
21546 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21548 if (IS_INTERRUPT (func_type))
21550 /* Interrupt functions must not corrupt any registers.
21551 Creating a frame pointer however, corrupts the IP
21552 register, so we must push it first. */
21553 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21555 /* Do not set RTX_FRAME_RELATED_P on this insn.
21556 The dwarf stack unwinding code only wants to see one
21557 stack decrement per function, and this is not it. If
21558 this instruction is labeled as being part of the frame
21559 creation sequence then dwarf2out_frame_debug_expr will
21560 die when it encounters the assignment of IP to FP
21561 later on, since the use of SP here establishes SP as
21562 the CFA register and not IP.
21564 Anyway this instruction is not really part of the stack
21565 frame creation although it is part of the prologue. */
21568 insn = emit_set_insn (ip_rtx,
21569 plus_constant (Pmode, stack_pointer_rtx,
21570 fp_offset));
21571 RTX_FRAME_RELATED_P (insn) = 1;
21574 if (args_to_push)
21576 /* Push the argument registers, or reserve space for them. */
21577 if (cfun->machine->uses_anonymous_args)
21578 insn = emit_multi_reg_push
21579 ((0xf0 >> (args_to_push / 4)) & 0xf,
21580 (0xf0 >> (args_to_push / 4)) & 0xf);
21581 else
21582 insn = emit_insn
21583 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21584 GEN_INT (- args_to_push)));
21585 RTX_FRAME_RELATED_P (insn) = 1;
21588 /* If this is an interrupt service routine, and the link register
21589 is going to be pushed, and we're not generating extra
21590 push of IP (needed when frame is needed and frame layout if apcs),
21591 subtracting four from LR now will mean that the function return
21592 can be done with a single instruction. */
21593 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21594 && (live_regs_mask & (1 << LR_REGNUM)) != 0
21595 && !(frame_pointer_needed && TARGET_APCS_FRAME)
21596 && TARGET_ARM)
21598 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21600 emit_set_insn (lr, plus_constant (SImode, lr, -4));
21603 if (live_regs_mask)
21605 unsigned long dwarf_regs_mask = live_regs_mask;
21607 saved_regs += bit_count (live_regs_mask) * 4;
21608 if (optimize_size && !frame_pointer_needed
21609 && saved_regs == offsets->saved_regs - offsets->saved_args)
21611 /* If no coprocessor registers are being pushed and we don't have
21612 to worry about a frame pointer then push extra registers to
21613 create the stack frame. This is done in a way that does not
21614 alter the frame layout, so is independent of the epilogue. */
21615 int n;
21616 int frame;
21617 n = 0;
21618 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21619 n++;
21620 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21621 if (frame && n * 4 >= frame)
21623 n = frame / 4;
21624 live_regs_mask |= (1 << n) - 1;
21625 saved_regs += frame;
21629 if (TARGET_LDRD
21630 && current_tune->prefer_ldrd_strd
21631 && !optimize_function_for_size_p (cfun))
21633 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21634 if (TARGET_THUMB2)
21635 thumb2_emit_strd_push (live_regs_mask);
21636 else if (TARGET_ARM
21637 && !TARGET_APCS_FRAME
21638 && !IS_INTERRUPT (func_type))
21639 arm_emit_strd_push (live_regs_mask);
21640 else
21642 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21643 RTX_FRAME_RELATED_P (insn) = 1;
21646 else
21648 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21649 RTX_FRAME_RELATED_P (insn) = 1;
21653 if (! IS_VOLATILE (func_type))
21654 saved_regs += arm_save_coproc_regs ();
21656 if (frame_pointer_needed && TARGET_ARM)
21658 /* Create the new frame pointer. */
21659 if (TARGET_APCS_FRAME)
21661 insn = GEN_INT (-(4 + args_to_push + fp_offset));
21662 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21663 RTX_FRAME_RELATED_P (insn) = 1;
21665 else
21667 insn = GEN_INT (saved_regs - (4 + fp_offset));
21668 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21669 stack_pointer_rtx, insn));
21670 RTX_FRAME_RELATED_P (insn) = 1;
21674 size = offsets->outgoing_args - offsets->saved_args;
21675 if (flag_stack_usage_info)
21676 current_function_static_stack_size = size;
21678 /* If this isn't an interrupt service routine and we have a frame, then do
21679 stack checking. We use IP as the first scratch register, except for the
21680 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
21681 if (!IS_INTERRUPT (func_type)
21682 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
21684 unsigned int regno;
21686 if (!IS_NESTED (func_type) || clobber_ip)
21687 regno = IP_REGNUM;
21688 else if (df_regs_ever_live_p (LR_REGNUM))
21689 regno = LR_REGNUM;
21690 else
21691 regno = 3;
21693 if (crtl->is_leaf && !cfun->calls_alloca)
21695 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
21696 arm_emit_probe_stack_range (STACK_CHECK_PROTECT,
21697 size - STACK_CHECK_PROTECT,
21698 regno, live_regs_mask);
21700 else if (size > 0)
21701 arm_emit_probe_stack_range (STACK_CHECK_PROTECT, size,
21702 regno, live_regs_mask);
21705 /* Recover the static chain register. */
21706 if (clobber_ip)
21708 if (!arm_r3_live_at_start_p () || saved_pretend_args)
21709 insn = gen_rtx_REG (SImode, 3);
21710 else
21712 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21713 insn = gen_frame_mem (SImode, insn);
21715 emit_set_insn (ip_rtx, insn);
21716 emit_insn (gen_force_register_use (ip_rtx));
21719 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21721 /* This add can produce multiple insns for a large constant, so we
21722 need to get tricky. */
21723 rtx_insn *last = get_last_insn ();
21725 amount = GEN_INT (offsets->saved_args + saved_regs
21726 - offsets->outgoing_args);
21728 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21729 amount));
21732 last = last ? NEXT_INSN (last) : get_insns ();
21733 RTX_FRAME_RELATED_P (last) = 1;
21735 while (last != insn);
21737 /* If the frame pointer is needed, emit a special barrier that
21738 will prevent the scheduler from moving stores to the frame
21739 before the stack adjustment. */
21740 if (frame_pointer_needed)
21741 emit_insn (gen_stack_tie (stack_pointer_rtx,
21742 hard_frame_pointer_rtx));
21746 if (frame_pointer_needed && TARGET_THUMB2)
21747 thumb_set_frame_pointer (offsets);
21749 if (flag_pic && arm_pic_register != INVALID_REGNUM)
21751 unsigned long mask;
21753 mask = live_regs_mask;
21754 mask &= THUMB2_WORK_REGS;
21755 if (!IS_NESTED (func_type))
21756 mask |= (1 << IP_REGNUM);
21757 arm_load_pic_register (mask);
21760 /* If we are profiling, make sure no instructions are scheduled before
21761 the call to mcount. Similarly if the user has requested no
21762 scheduling in the prolog. Similarly if we want non-call exceptions
21763 using the EABI unwinder, to prevent faulting instructions from being
21764 swapped with a stack adjustment. */
21765 if (crtl->profile || !TARGET_SCHED_PROLOG
21766 || (arm_except_unwind_info (&global_options) == UI_TARGET
21767 && cfun->can_throw_non_call_exceptions))
21768 emit_insn (gen_blockage ());
21770 /* If the link register is being kept alive, with the return address in it,
21771 then make sure that it does not get reused by the ce2 pass. */
21772 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
21773 cfun->machine->lr_save_eliminated = 1;
21776 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21777 static void
21778 arm_print_condition (FILE *stream)
21780 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
21782 /* Branch conversion is not implemented for Thumb-2. */
21783 if (TARGET_THUMB)
21785 output_operand_lossage ("predicated Thumb instruction");
21786 return;
21788 if (current_insn_predicate != NULL)
21790 output_operand_lossage
21791 ("predicated instruction in conditional sequence");
21792 return;
21795 fputs (arm_condition_codes[arm_current_cc], stream);
21797 else if (current_insn_predicate)
21799 enum arm_cond_code code;
21801 if (TARGET_THUMB1)
21803 output_operand_lossage ("predicated Thumb instruction");
21804 return;
21807 code = get_arm_condition_code (current_insn_predicate);
21808 fputs (arm_condition_codes[code], stream);
21813 /* Globally reserved letters: acln
21814 Puncutation letters currently used: @_|?().!#
21815 Lower case letters currently used: bcdefhimpqtvwxyz
21816 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21817 Letters previously used, but now deprecated/obsolete: sVWXYZ.
21819 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21821 If CODE is 'd', then the X is a condition operand and the instruction
21822 should only be executed if the condition is true.
21823 if CODE is 'D', then the X is a condition operand and the instruction
21824 should only be executed if the condition is false: however, if the mode
21825 of the comparison is CCFPEmode, then always execute the instruction -- we
21826 do this because in these circumstances !GE does not necessarily imply LT;
21827 in these cases the instruction pattern will take care to make sure that
21828 an instruction containing %d will follow, thereby undoing the effects of
21829 doing this instruction unconditionally.
21830 If CODE is 'N' then X is a floating point operand that must be negated
21831 before output.
21832 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21833 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21834 static void
21835 arm_print_operand (FILE *stream, rtx x, int code)
21837 switch (code)
21839 case '@':
21840 fputs (ASM_COMMENT_START, stream);
21841 return;
21843 case '_':
21844 fputs (user_label_prefix, stream);
21845 return;
21847 case '|':
21848 fputs (REGISTER_PREFIX, stream);
21849 return;
21851 case '?':
21852 arm_print_condition (stream);
21853 return;
21855 case '.':
21856 /* The current condition code for a condition code setting instruction.
21857 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21858 fputc('s', stream);
21859 arm_print_condition (stream);
21860 return;
21862 case '!':
21863 /* If the instruction is conditionally executed then print
21864 the current condition code, otherwise print 's'. */
21865 gcc_assert (TARGET_THUMB2);
21866 if (current_insn_predicate)
21867 arm_print_condition (stream);
21868 else
21869 fputc('s', stream);
21870 break;
21872 /* %# is a "break" sequence. It doesn't output anything, but is used to
21873 separate e.g. operand numbers from following text, if that text consists
21874 of further digits which we don't want to be part of the operand
21875 number. */
21876 case '#':
21877 return;
21879 case 'N':
21881 REAL_VALUE_TYPE r;
21882 r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
21883 fprintf (stream, "%s", fp_const_from_val (&r));
21885 return;
21887 /* An integer or symbol address without a preceding # sign. */
21888 case 'c':
21889 switch (GET_CODE (x))
21891 case CONST_INT:
21892 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
21893 break;
21895 case SYMBOL_REF:
21896 output_addr_const (stream, x);
21897 break;
21899 case CONST:
21900 if (GET_CODE (XEXP (x, 0)) == PLUS
21901 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
21903 output_addr_const (stream, x);
21904 break;
21906 /* Fall through. */
21908 default:
21909 output_operand_lossage ("Unsupported operand for code '%c'", code);
21911 return;
21913 /* An integer that we want to print in HEX. */
21914 case 'x':
21915 switch (GET_CODE (x))
21917 case CONST_INT:
21918 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
21919 break;
21921 default:
21922 output_operand_lossage ("Unsupported operand for code '%c'", code);
21924 return;
21926 case 'B':
21927 if (CONST_INT_P (x))
21929 HOST_WIDE_INT val;
21930 val = ARM_SIGN_EXTEND (~INTVAL (x));
21931 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
21933 else
21935 putc ('~', stream);
21936 output_addr_const (stream, x);
21938 return;
21940 case 'b':
21941 /* Print the log2 of a CONST_INT. */
21943 HOST_WIDE_INT val;
21945 if (!CONST_INT_P (x)
21946 || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
21947 output_operand_lossage ("Unsupported operand for code '%c'", code);
21948 else
21949 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21951 return;
21953 case 'L':
21954 /* The low 16 bits of an immediate constant. */
21955 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
21956 return;
21958 case 'i':
21959 fprintf (stream, "%s", arithmetic_instr (x, 1));
21960 return;
21962 case 'I':
21963 fprintf (stream, "%s", arithmetic_instr (x, 0));
21964 return;
21966 case 'S':
21968 HOST_WIDE_INT val;
21969 const char *shift;
21971 shift = shift_op (x, &val);
21973 if (shift)
21975 fprintf (stream, ", %s ", shift);
21976 if (val == -1)
21977 arm_print_operand (stream, XEXP (x, 1), 0);
21978 else
21979 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21982 return;
21984 /* An explanation of the 'Q', 'R' and 'H' register operands:
21986 In a pair of registers containing a DI or DF value the 'Q'
21987 operand returns the register number of the register containing
21988 the least significant part of the value. The 'R' operand returns
21989 the register number of the register containing the most
21990 significant part of the value.
21992 The 'H' operand returns the higher of the two register numbers.
21993 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21994 same as the 'Q' operand, since the most significant part of the
21995 value is held in the lower number register. The reverse is true
21996 on systems where WORDS_BIG_ENDIAN is false.
21998 The purpose of these operands is to distinguish between cases
21999 where the endian-ness of the values is important (for example
22000 when they are added together), and cases where the endian-ness
22001 is irrelevant, but the order of register operations is important.
22002 For example when loading a value from memory into a register
22003 pair, the endian-ness does not matter. Provided that the value
22004 from the lower memory address is put into the lower numbered
22005 register, and the value from the higher address is put into the
22006 higher numbered register, the load will work regardless of whether
22007 the value being loaded is big-wordian or little-wordian. The
22008 order of the two register loads can matter however, if the address
22009 of the memory location is actually held in one of the registers
22010 being overwritten by the load.
22012 The 'Q' and 'R' constraints are also available for 64-bit
22013 constants. */
22014 case 'Q':
22015 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22017 rtx part = gen_lowpart (SImode, x);
22018 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22019 return;
22022 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22024 output_operand_lossage ("invalid operand for code '%c'", code);
22025 return;
22028 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
22029 return;
22031 case 'R':
22032 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22034 machine_mode mode = GET_MODE (x);
22035 rtx part;
22037 if (mode == VOIDmode)
22038 mode = DImode;
22039 part = gen_highpart_mode (SImode, mode, x);
22040 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22041 return;
22044 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22046 output_operand_lossage ("invalid operand for code '%c'", code);
22047 return;
22050 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
22051 return;
22053 case 'H':
22054 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22056 output_operand_lossage ("invalid operand for code '%c'", code);
22057 return;
22060 asm_fprintf (stream, "%r", REGNO (x) + 1);
22061 return;
22063 case 'J':
22064 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22066 output_operand_lossage ("invalid operand for code '%c'", code);
22067 return;
22070 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
22071 return;
22073 case 'K':
22074 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22076 output_operand_lossage ("invalid operand for code '%c'", code);
22077 return;
22080 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
22081 return;
22083 case 'm':
22084 asm_fprintf (stream, "%r",
22085 REG_P (XEXP (x, 0))
22086 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
22087 return;
22089 case 'M':
22090 asm_fprintf (stream, "{%r-%r}",
22091 REGNO (x),
22092 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
22093 return;
22095 /* Like 'M', but writing doubleword vector registers, for use by Neon
22096 insns. */
22097 case 'h':
22099 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
22100 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
22101 if (numregs == 1)
22102 asm_fprintf (stream, "{d%d}", regno);
22103 else
22104 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
22106 return;
22108 case 'd':
22109 /* CONST_TRUE_RTX means always -- that's the default. */
22110 if (x == const_true_rtx)
22111 return;
22113 if (!COMPARISON_P (x))
22115 output_operand_lossage ("invalid operand for code '%c'", code);
22116 return;
22119 fputs (arm_condition_codes[get_arm_condition_code (x)],
22120 stream);
22121 return;
22123 case 'D':
22124 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
22125 want to do that. */
22126 if (x == const_true_rtx)
22128 output_operand_lossage ("instruction never executed");
22129 return;
22131 if (!COMPARISON_P (x))
22133 output_operand_lossage ("invalid operand for code '%c'", code);
22134 return;
22137 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
22138 (get_arm_condition_code (x))],
22139 stream);
22140 return;
22142 case 's':
22143 case 'V':
22144 case 'W':
22145 case 'X':
22146 case 'Y':
22147 case 'Z':
22148 /* Former Maverick support, removed after GCC-4.7. */
22149 output_operand_lossage ("obsolete Maverick format code '%c'", code);
22150 return;
22152 case 'U':
22153 if (!REG_P (x)
22154 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
22155 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
22156 /* Bad value for wCG register number. */
22158 output_operand_lossage ("invalid operand for code '%c'", code);
22159 return;
22162 else
22163 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
22164 return;
22166 /* Print an iWMMXt control register name. */
22167 case 'w':
22168 if (!CONST_INT_P (x)
22169 || INTVAL (x) < 0
22170 || INTVAL (x) >= 16)
22171 /* Bad value for wC register number. */
22173 output_operand_lossage ("invalid operand for code '%c'", code);
22174 return;
22177 else
22179 static const char * wc_reg_names [16] =
22181 "wCID", "wCon", "wCSSF", "wCASF",
22182 "wC4", "wC5", "wC6", "wC7",
22183 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
22184 "wC12", "wC13", "wC14", "wC15"
22187 fputs (wc_reg_names [INTVAL (x)], stream);
22189 return;
22191 /* Print the high single-precision register of a VFP double-precision
22192 register. */
22193 case 'p':
22195 machine_mode mode = GET_MODE (x);
22196 int regno;
22198 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
22200 output_operand_lossage ("invalid operand for code '%c'", code);
22201 return;
22204 regno = REGNO (x);
22205 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
22207 output_operand_lossage ("invalid operand for code '%c'", code);
22208 return;
22211 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
22213 return;
22215 /* Print a VFP/Neon double precision or quad precision register name. */
22216 case 'P':
22217 case 'q':
22219 machine_mode mode = GET_MODE (x);
22220 int is_quad = (code == 'q');
22221 int regno;
22223 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
22225 output_operand_lossage ("invalid operand for code '%c'", code);
22226 return;
22229 if (!REG_P (x)
22230 || !IS_VFP_REGNUM (REGNO (x)))
22232 output_operand_lossage ("invalid operand for code '%c'", code);
22233 return;
22236 regno = REGNO (x);
22237 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
22238 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
22240 output_operand_lossage ("invalid operand for code '%c'", code);
22241 return;
22244 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
22245 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
22247 return;
22249 /* These two codes print the low/high doubleword register of a Neon quad
22250 register, respectively. For pair-structure types, can also print
22251 low/high quadword registers. */
22252 case 'e':
22253 case 'f':
22255 machine_mode mode = GET_MODE (x);
22256 int regno;
22258 if ((GET_MODE_SIZE (mode) != 16
22259 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
22261 output_operand_lossage ("invalid operand for code '%c'", code);
22262 return;
22265 regno = REGNO (x);
22266 if (!NEON_REGNO_OK_FOR_QUAD (regno))
22268 output_operand_lossage ("invalid operand for code '%c'", code);
22269 return;
22272 if (GET_MODE_SIZE (mode) == 16)
22273 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
22274 + (code == 'f' ? 1 : 0));
22275 else
22276 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
22277 + (code == 'f' ? 1 : 0));
22279 return;
22281 /* Print a VFPv3 floating-point constant, represented as an integer
22282 index. */
22283 case 'G':
22285 int index = vfp3_const_double_index (x);
22286 gcc_assert (index != -1);
22287 fprintf (stream, "%d", index);
22289 return;
22291 /* Print bits representing opcode features for Neon.
22293 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
22294 and polynomials as unsigned.
22296 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22298 Bit 2 is 1 for rounding functions, 0 otherwise. */
22300 /* Identify the type as 's', 'u', 'p' or 'f'. */
22301 case 'T':
22303 HOST_WIDE_INT bits = INTVAL (x);
22304 fputc ("uspf"[bits & 3], stream);
22306 return;
22308 /* Likewise, but signed and unsigned integers are both 'i'. */
22309 case 'F':
22311 HOST_WIDE_INT bits = INTVAL (x);
22312 fputc ("iipf"[bits & 3], stream);
22314 return;
22316 /* As for 'T', but emit 'u' instead of 'p'. */
22317 case 't':
22319 HOST_WIDE_INT bits = INTVAL (x);
22320 fputc ("usuf"[bits & 3], stream);
22322 return;
22324 /* Bit 2: rounding (vs none). */
22325 case 'O':
22327 HOST_WIDE_INT bits = INTVAL (x);
22328 fputs ((bits & 4) != 0 ? "r" : "", stream);
22330 return;
22332 /* Memory operand for vld1/vst1 instruction. */
22333 case 'A':
22335 rtx addr;
22336 bool postinc = FALSE;
22337 rtx postinc_reg = NULL;
22338 unsigned align, memsize, align_bits;
22340 gcc_assert (MEM_P (x));
22341 addr = XEXP (x, 0);
22342 if (GET_CODE (addr) == POST_INC)
22344 postinc = 1;
22345 addr = XEXP (addr, 0);
22347 if (GET_CODE (addr) == POST_MODIFY)
22349 postinc_reg = XEXP( XEXP (addr, 1), 1);
22350 addr = XEXP (addr, 0);
22352 asm_fprintf (stream, "[%r", REGNO (addr));
22354 /* We know the alignment of this access, so we can emit a hint in the
22355 instruction (for some alignments) as an aid to the memory subsystem
22356 of the target. */
22357 align = MEM_ALIGN (x) >> 3;
22358 memsize = MEM_SIZE (x);
22360 /* Only certain alignment specifiers are supported by the hardware. */
22361 if (memsize == 32 && (align % 32) == 0)
22362 align_bits = 256;
22363 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
22364 align_bits = 128;
22365 else if (memsize >= 8 && (align % 8) == 0)
22366 align_bits = 64;
22367 else
22368 align_bits = 0;
22370 if (align_bits != 0)
22371 asm_fprintf (stream, ":%d", align_bits);
22373 asm_fprintf (stream, "]");
22375 if (postinc)
22376 fputs("!", stream);
22377 if (postinc_reg)
22378 asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22380 return;
22382 case 'C':
22384 rtx addr;
22386 gcc_assert (MEM_P (x));
22387 addr = XEXP (x, 0);
22388 gcc_assert (REG_P (addr));
22389 asm_fprintf (stream, "[%r]", REGNO (addr));
22391 return;
22393 /* Translate an S register number into a D register number and element index. */
22394 case 'y':
22396 machine_mode mode = GET_MODE (x);
22397 int regno;
22399 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22401 output_operand_lossage ("invalid operand for code '%c'", code);
22402 return;
22405 regno = REGNO (x);
22406 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22408 output_operand_lossage ("invalid operand for code '%c'", code);
22409 return;
22412 regno = regno - FIRST_VFP_REGNUM;
22413 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22415 return;
22417 case 'v':
22418 gcc_assert (CONST_DOUBLE_P (x));
22419 int result;
22420 result = vfp3_const_double_for_fract_bits (x);
22421 if (result == 0)
22422 result = vfp3_const_double_for_bits (x);
22423 fprintf (stream, "#%d", result);
22424 return;
22426 /* Register specifier for vld1.16/vst1.16. Translate the S register
22427 number into a D register number and element index. */
22428 case 'z':
22430 machine_mode mode = GET_MODE (x);
22431 int regno;
22433 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22435 output_operand_lossage ("invalid operand for code '%c'", code);
22436 return;
22439 regno = REGNO (x);
22440 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22442 output_operand_lossage ("invalid operand for code '%c'", code);
22443 return;
22446 regno = regno - FIRST_VFP_REGNUM;
22447 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22449 return;
22451 default:
22452 if (x == 0)
22454 output_operand_lossage ("missing operand");
22455 return;
22458 switch (GET_CODE (x))
22460 case REG:
22461 asm_fprintf (stream, "%r", REGNO (x));
22462 break;
22464 case MEM:
22465 output_address (GET_MODE (x), XEXP (x, 0));
22466 break;
22468 case CONST_DOUBLE:
22470 char fpstr[20];
22471 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22472 sizeof (fpstr), 0, 1);
22473 fprintf (stream, "#%s", fpstr);
22475 break;
22477 default:
22478 gcc_assert (GET_CODE (x) != NEG);
22479 fputc ('#', stream);
22480 if (GET_CODE (x) == HIGH)
22482 fputs (":lower16:", stream);
22483 x = XEXP (x, 0);
22486 output_addr_const (stream, x);
22487 break;
22492 /* Target hook for printing a memory address. */
22493 static void
22494 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
22496 if (TARGET_32BIT)
22498 int is_minus = GET_CODE (x) == MINUS;
22500 if (REG_P (x))
22501 asm_fprintf (stream, "[%r]", REGNO (x));
22502 else if (GET_CODE (x) == PLUS || is_minus)
22504 rtx base = XEXP (x, 0);
22505 rtx index = XEXP (x, 1);
22506 HOST_WIDE_INT offset = 0;
22507 if (!REG_P (base)
22508 || (REG_P (index) && REGNO (index) == SP_REGNUM))
22510 /* Ensure that BASE is a register. */
22511 /* (one of them must be). */
22512 /* Also ensure the SP is not used as in index register. */
22513 std::swap (base, index);
22515 switch (GET_CODE (index))
22517 case CONST_INT:
22518 offset = INTVAL (index);
22519 if (is_minus)
22520 offset = -offset;
22521 asm_fprintf (stream, "[%r, #%wd]",
22522 REGNO (base), offset);
22523 break;
22525 case REG:
22526 asm_fprintf (stream, "[%r, %s%r]",
22527 REGNO (base), is_minus ? "-" : "",
22528 REGNO (index));
22529 break;
22531 case MULT:
22532 case ASHIFTRT:
22533 case LSHIFTRT:
22534 case ASHIFT:
22535 case ROTATERT:
22537 asm_fprintf (stream, "[%r, %s%r",
22538 REGNO (base), is_minus ? "-" : "",
22539 REGNO (XEXP (index, 0)));
22540 arm_print_operand (stream, index, 'S');
22541 fputs ("]", stream);
22542 break;
22545 default:
22546 gcc_unreachable ();
22549 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22550 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22552 gcc_assert (REG_P (XEXP (x, 0)));
22554 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22555 asm_fprintf (stream, "[%r, #%s%d]!",
22556 REGNO (XEXP (x, 0)),
22557 GET_CODE (x) == PRE_DEC ? "-" : "",
22558 GET_MODE_SIZE (mode));
22559 else
22560 asm_fprintf (stream, "[%r], #%s%d",
22561 REGNO (XEXP (x, 0)),
22562 GET_CODE (x) == POST_DEC ? "-" : "",
22563 GET_MODE_SIZE (mode));
22565 else if (GET_CODE (x) == PRE_MODIFY)
22567 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22568 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22569 asm_fprintf (stream, "#%wd]!",
22570 INTVAL (XEXP (XEXP (x, 1), 1)));
22571 else
22572 asm_fprintf (stream, "%r]!",
22573 REGNO (XEXP (XEXP (x, 1), 1)));
22575 else if (GET_CODE (x) == POST_MODIFY)
22577 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22578 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22579 asm_fprintf (stream, "#%wd",
22580 INTVAL (XEXP (XEXP (x, 1), 1)));
22581 else
22582 asm_fprintf (stream, "%r",
22583 REGNO (XEXP (XEXP (x, 1), 1)));
22585 else output_addr_const (stream, x);
22587 else
22589 if (REG_P (x))
22590 asm_fprintf (stream, "[%r]", REGNO (x));
22591 else if (GET_CODE (x) == POST_INC)
22592 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22593 else if (GET_CODE (x) == PLUS)
22595 gcc_assert (REG_P (XEXP (x, 0)));
22596 if (CONST_INT_P (XEXP (x, 1)))
22597 asm_fprintf (stream, "[%r, #%wd]",
22598 REGNO (XEXP (x, 0)),
22599 INTVAL (XEXP (x, 1)));
22600 else
22601 asm_fprintf (stream, "[%r, %r]",
22602 REGNO (XEXP (x, 0)),
22603 REGNO (XEXP (x, 1)));
22605 else
22606 output_addr_const (stream, x);
22610 /* Target hook for indicating whether a punctuation character for
22611 TARGET_PRINT_OPERAND is valid. */
22612 static bool
22613 arm_print_operand_punct_valid_p (unsigned char code)
22615 return (code == '@' || code == '|' || code == '.'
22616 || code == '(' || code == ')' || code == '#'
22617 || (TARGET_32BIT && (code == '?'))
22618 || (TARGET_THUMB2 && (code == '!'))
22619 || (TARGET_THUMB && (code == '_')));
22622 /* Target hook for assembling integer objects. The ARM version needs to
22623 handle word-sized values specially. */
22624 static bool
22625 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22627 machine_mode mode;
22629 if (size == UNITS_PER_WORD && aligned_p)
22631 fputs ("\t.word\t", asm_out_file);
22632 output_addr_const (asm_out_file, x);
22634 /* Mark symbols as position independent. We only do this in the
22635 .text segment, not in the .data segment. */
22636 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22637 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22639 /* See legitimize_pic_address for an explanation of the
22640 TARGET_VXWORKS_RTP check. */
22641 /* References to weak symbols cannot be resolved locally:
22642 they may be overridden by a non-weak definition at link
22643 time. */
22644 if (!arm_pic_data_is_text_relative
22645 || (GET_CODE (x) == SYMBOL_REF
22646 && (!SYMBOL_REF_LOCAL_P (x)
22647 || (SYMBOL_REF_DECL (x)
22648 ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0))))
22649 fputs ("(GOT)", asm_out_file);
22650 else
22651 fputs ("(GOTOFF)", asm_out_file);
22653 fputc ('\n', asm_out_file);
22654 return true;
22657 mode = GET_MODE (x);
22659 if (arm_vector_mode_supported_p (mode))
22661 int i, units;
22663 gcc_assert (GET_CODE (x) == CONST_VECTOR);
22665 units = CONST_VECTOR_NUNITS (x);
22666 size = GET_MODE_UNIT_SIZE (mode);
22668 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22669 for (i = 0; i < units; i++)
22671 rtx elt = CONST_VECTOR_ELT (x, i);
22672 assemble_integer
22673 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22675 else
22676 for (i = 0; i < units; i++)
22678 rtx elt = CONST_VECTOR_ELT (x, i);
22679 assemble_real
22680 (*CONST_DOUBLE_REAL_VALUE (elt),
22681 as_a <scalar_float_mode> (GET_MODE_INNER (mode)),
22682 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22685 return true;
22688 return default_assemble_integer (x, size, aligned_p);
22691 static void
22692 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22694 section *s;
22696 if (!TARGET_AAPCS_BASED)
22698 (is_ctor ?
22699 default_named_section_asm_out_constructor
22700 : default_named_section_asm_out_destructor) (symbol, priority);
22701 return;
22704 /* Put these in the .init_array section, using a special relocation. */
22705 if (priority != DEFAULT_INIT_PRIORITY)
22707 char buf[18];
22708 sprintf (buf, "%s.%.5u",
22709 is_ctor ? ".init_array" : ".fini_array",
22710 priority);
22711 s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL_TREE);
22713 else if (is_ctor)
22714 s = ctors_section;
22715 else
22716 s = dtors_section;
22718 switch_to_section (s);
22719 assemble_align (POINTER_SIZE);
22720 fputs ("\t.word\t", asm_out_file);
22721 output_addr_const (asm_out_file, symbol);
22722 fputs ("(target1)\n", asm_out_file);
22725 /* Add a function to the list of static constructors. */
22727 static void
22728 arm_elf_asm_constructor (rtx symbol, int priority)
22730 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22733 /* Add a function to the list of static destructors. */
22735 static void
22736 arm_elf_asm_destructor (rtx symbol, int priority)
22738 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22741 /* A finite state machine takes care of noticing whether or not instructions
22742 can be conditionally executed, and thus decrease execution time and code
22743 size by deleting branch instructions. The fsm is controlled by
22744 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22746 /* The state of the fsm controlling condition codes are:
22747 0: normal, do nothing special
22748 1: make ASM_OUTPUT_OPCODE not output this instruction
22749 2: make ASM_OUTPUT_OPCODE not output this instruction
22750 3: make instructions conditional
22751 4: make instructions conditional
22753 State transitions (state->state by whom under condition):
22754 0 -> 1 final_prescan_insn if the `target' is a label
22755 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22756 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22757 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22758 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22759 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22760 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22761 (the target insn is arm_target_insn).
22763 If the jump clobbers the conditions then we use states 2 and 4.
22765 A similar thing can be done with conditional return insns.
22767 XXX In case the `target' is an unconditional branch, this conditionalising
22768 of the instructions always reduces code size, but not always execution
22769 time. But then, I want to reduce the code size to somewhere near what
22770 /bin/cc produces. */
22772 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22773 instructions. When a COND_EXEC instruction is seen the subsequent
22774 instructions are scanned so that multiple conditional instructions can be
22775 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22776 specify the length and true/false mask for the IT block. These will be
22777 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22779 /* Returns the index of the ARM condition code string in
22780 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22781 COMPARISON should be an rtx like `(eq (...) (...))'. */
22783 enum arm_cond_code
22784 maybe_get_arm_condition_code (rtx comparison)
22786 machine_mode mode = GET_MODE (XEXP (comparison, 0));
22787 enum arm_cond_code code;
22788 enum rtx_code comp_code = GET_CODE (comparison);
22790 if (GET_MODE_CLASS (mode) != MODE_CC)
22791 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
22792 XEXP (comparison, 1));
22794 switch (mode)
22796 case E_CC_DNEmode: code = ARM_NE; goto dominance;
22797 case E_CC_DEQmode: code = ARM_EQ; goto dominance;
22798 case E_CC_DGEmode: code = ARM_GE; goto dominance;
22799 case E_CC_DGTmode: code = ARM_GT; goto dominance;
22800 case E_CC_DLEmode: code = ARM_LE; goto dominance;
22801 case E_CC_DLTmode: code = ARM_LT; goto dominance;
22802 case E_CC_DGEUmode: code = ARM_CS; goto dominance;
22803 case E_CC_DGTUmode: code = ARM_HI; goto dominance;
22804 case E_CC_DLEUmode: code = ARM_LS; goto dominance;
22805 case E_CC_DLTUmode: code = ARM_CC;
22807 dominance:
22808 if (comp_code == EQ)
22809 return ARM_INVERSE_CONDITION_CODE (code);
22810 if (comp_code == NE)
22811 return code;
22812 return ARM_NV;
22814 case E_CC_NOOVmode:
22815 switch (comp_code)
22817 case NE: return ARM_NE;
22818 case EQ: return ARM_EQ;
22819 case GE: return ARM_PL;
22820 case LT: return ARM_MI;
22821 default: return ARM_NV;
22824 case E_CC_Zmode:
22825 switch (comp_code)
22827 case NE: return ARM_NE;
22828 case EQ: return ARM_EQ;
22829 default: return ARM_NV;
22832 case E_CC_Nmode:
22833 switch (comp_code)
22835 case NE: return ARM_MI;
22836 case EQ: return ARM_PL;
22837 default: return ARM_NV;
22840 case E_CCFPEmode:
22841 case E_CCFPmode:
22842 /* We can handle all cases except UNEQ and LTGT. */
22843 switch (comp_code)
22845 case GE: return ARM_GE;
22846 case GT: return ARM_GT;
22847 case LE: return ARM_LS;
22848 case LT: return ARM_MI;
22849 case NE: return ARM_NE;
22850 case EQ: return ARM_EQ;
22851 case ORDERED: return ARM_VC;
22852 case UNORDERED: return ARM_VS;
22853 case UNLT: return ARM_LT;
22854 case UNLE: return ARM_LE;
22855 case UNGT: return ARM_HI;
22856 case UNGE: return ARM_PL;
22857 /* UNEQ and LTGT do not have a representation. */
22858 case UNEQ: /* Fall through. */
22859 case LTGT: /* Fall through. */
22860 default: return ARM_NV;
22863 case E_CC_SWPmode:
22864 switch (comp_code)
22866 case NE: return ARM_NE;
22867 case EQ: return ARM_EQ;
22868 case GE: return ARM_LE;
22869 case GT: return ARM_LT;
22870 case LE: return ARM_GE;
22871 case LT: return ARM_GT;
22872 case GEU: return ARM_LS;
22873 case GTU: return ARM_CC;
22874 case LEU: return ARM_CS;
22875 case LTU: return ARM_HI;
22876 default: return ARM_NV;
22879 case E_CC_Cmode:
22880 switch (comp_code)
22882 case LTU: return ARM_CS;
22883 case GEU: return ARM_CC;
22884 case NE: return ARM_CS;
22885 case EQ: return ARM_CC;
22886 default: return ARM_NV;
22889 case E_CC_CZmode:
22890 switch (comp_code)
22892 case NE: return ARM_NE;
22893 case EQ: return ARM_EQ;
22894 case GEU: return ARM_CS;
22895 case GTU: return ARM_HI;
22896 case LEU: return ARM_LS;
22897 case LTU: return ARM_CC;
22898 default: return ARM_NV;
22901 case E_CC_NCVmode:
22902 switch (comp_code)
22904 case GE: return ARM_GE;
22905 case LT: return ARM_LT;
22906 case GEU: return ARM_CS;
22907 case LTU: return ARM_CC;
22908 default: return ARM_NV;
22911 case E_CC_Vmode:
22912 switch (comp_code)
22914 case NE: return ARM_VS;
22915 case EQ: return ARM_VC;
22916 default: return ARM_NV;
22919 case E_CCmode:
22920 switch (comp_code)
22922 case NE: return ARM_NE;
22923 case EQ: return ARM_EQ;
22924 case GE: return ARM_GE;
22925 case GT: return ARM_GT;
22926 case LE: return ARM_LE;
22927 case LT: return ARM_LT;
22928 case GEU: return ARM_CS;
22929 case GTU: return ARM_HI;
22930 case LEU: return ARM_LS;
22931 case LTU: return ARM_CC;
22932 default: return ARM_NV;
22935 default: gcc_unreachable ();
22939 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22940 static enum arm_cond_code
22941 get_arm_condition_code (rtx comparison)
22943 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
22944 gcc_assert (code != ARM_NV);
22945 return code;
22948 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. We only have condition
22949 code registers when not targetting Thumb1. The VFP condition register
22950 only exists when generating hard-float code. */
22951 static bool
22952 arm_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
22954 if (!TARGET_32BIT)
22955 return false;
22957 *p1 = CC_REGNUM;
22958 *p2 = TARGET_HARD_FLOAT ? VFPCC_REGNUM : INVALID_REGNUM;
22959 return true;
22962 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22963 instructions. */
22964 void
22965 thumb2_final_prescan_insn (rtx_insn *insn)
22967 rtx_insn *first_insn = insn;
22968 rtx body = PATTERN (insn);
22969 rtx predicate;
22970 enum arm_cond_code code;
22971 int n;
22972 int mask;
22973 int max;
22975 /* max_insns_skipped in the tune was already taken into account in the
22976 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
22977 just emit the IT blocks as we can. It does not make sense to split
22978 the IT blocks. */
22979 max = MAX_INSN_PER_IT_BLOCK;
22981 /* Remove the previous insn from the count of insns to be output. */
22982 if (arm_condexec_count)
22983 arm_condexec_count--;
22985 /* Nothing to do if we are already inside a conditional block. */
22986 if (arm_condexec_count)
22987 return;
22989 if (GET_CODE (body) != COND_EXEC)
22990 return;
22992 /* Conditional jumps are implemented directly. */
22993 if (JUMP_P (insn))
22994 return;
22996 predicate = COND_EXEC_TEST (body);
22997 arm_current_cc = get_arm_condition_code (predicate);
22999 n = get_attr_ce_count (insn);
23000 arm_condexec_count = 1;
23001 arm_condexec_mask = (1 << n) - 1;
23002 arm_condexec_masklen = n;
23003 /* See if subsequent instructions can be combined into the same block. */
23004 for (;;)
23006 insn = next_nonnote_insn (insn);
23008 /* Jumping into the middle of an IT block is illegal, so a label or
23009 barrier terminates the block. */
23010 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
23011 break;
23013 body = PATTERN (insn);
23014 /* USE and CLOBBER aren't really insns, so just skip them. */
23015 if (GET_CODE (body) == USE
23016 || GET_CODE (body) == CLOBBER)
23017 continue;
23019 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
23020 if (GET_CODE (body) != COND_EXEC)
23021 break;
23022 /* Maximum number of conditionally executed instructions in a block. */
23023 n = get_attr_ce_count (insn);
23024 if (arm_condexec_masklen + n > max)
23025 break;
23027 predicate = COND_EXEC_TEST (body);
23028 code = get_arm_condition_code (predicate);
23029 mask = (1 << n) - 1;
23030 if (arm_current_cc == code)
23031 arm_condexec_mask |= (mask << arm_condexec_masklen);
23032 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
23033 break;
23035 arm_condexec_count++;
23036 arm_condexec_masklen += n;
23038 /* A jump must be the last instruction in a conditional block. */
23039 if (JUMP_P (insn))
23040 break;
23042 /* Restore recog_data (getting the attributes of other insns can
23043 destroy this array, but final.c assumes that it remains intact
23044 across this call). */
23045 extract_constrain_insn_cached (first_insn);
23048 void
23049 arm_final_prescan_insn (rtx_insn *insn)
23051 /* BODY will hold the body of INSN. */
23052 rtx body = PATTERN (insn);
23054 /* This will be 1 if trying to repeat the trick, and things need to be
23055 reversed if it appears to fail. */
23056 int reverse = 0;
23058 /* If we start with a return insn, we only succeed if we find another one. */
23059 int seeking_return = 0;
23060 enum rtx_code return_code = UNKNOWN;
23062 /* START_INSN will hold the insn from where we start looking. This is the
23063 first insn after the following code_label if REVERSE is true. */
23064 rtx_insn *start_insn = insn;
23066 /* If in state 4, check if the target branch is reached, in order to
23067 change back to state 0. */
23068 if (arm_ccfsm_state == 4)
23070 if (insn == arm_target_insn)
23072 arm_target_insn = NULL;
23073 arm_ccfsm_state = 0;
23075 return;
23078 /* If in state 3, it is possible to repeat the trick, if this insn is an
23079 unconditional branch to a label, and immediately following this branch
23080 is the previous target label which is only used once, and the label this
23081 branch jumps to is not too far off. */
23082 if (arm_ccfsm_state == 3)
23084 if (simplejump_p (insn))
23086 start_insn = next_nonnote_insn (start_insn);
23087 if (BARRIER_P (start_insn))
23089 /* XXX Isn't this always a barrier? */
23090 start_insn = next_nonnote_insn (start_insn);
23092 if (LABEL_P (start_insn)
23093 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23094 && LABEL_NUSES (start_insn) == 1)
23095 reverse = TRUE;
23096 else
23097 return;
23099 else if (ANY_RETURN_P (body))
23101 start_insn = next_nonnote_insn (start_insn);
23102 if (BARRIER_P (start_insn))
23103 start_insn = next_nonnote_insn (start_insn);
23104 if (LABEL_P (start_insn)
23105 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23106 && LABEL_NUSES (start_insn) == 1)
23108 reverse = TRUE;
23109 seeking_return = 1;
23110 return_code = GET_CODE (body);
23112 else
23113 return;
23115 else
23116 return;
23119 gcc_assert (!arm_ccfsm_state || reverse);
23120 if (!JUMP_P (insn))
23121 return;
23123 /* This jump might be paralleled with a clobber of the condition codes
23124 the jump should always come first */
23125 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
23126 body = XVECEXP (body, 0, 0);
23128 if (reverse
23129 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
23130 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
23132 int insns_skipped;
23133 int fail = FALSE, succeed = FALSE;
23134 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
23135 int then_not_else = TRUE;
23136 rtx_insn *this_insn = start_insn;
23137 rtx label = 0;
23139 /* Register the insn jumped to. */
23140 if (reverse)
23142 if (!seeking_return)
23143 label = XEXP (SET_SRC (body), 0);
23145 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
23146 label = XEXP (XEXP (SET_SRC (body), 1), 0);
23147 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
23149 label = XEXP (XEXP (SET_SRC (body), 2), 0);
23150 then_not_else = FALSE;
23152 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
23154 seeking_return = 1;
23155 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
23157 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
23159 seeking_return = 1;
23160 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
23161 then_not_else = FALSE;
23163 else
23164 gcc_unreachable ();
23166 /* See how many insns this branch skips, and what kind of insns. If all
23167 insns are okay, and the label or unconditional branch to the same
23168 label is not too far away, succeed. */
23169 for (insns_skipped = 0;
23170 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
23172 rtx scanbody;
23174 this_insn = next_nonnote_insn (this_insn);
23175 if (!this_insn)
23176 break;
23178 switch (GET_CODE (this_insn))
23180 case CODE_LABEL:
23181 /* Succeed if it is the target label, otherwise fail since
23182 control falls in from somewhere else. */
23183 if (this_insn == label)
23185 arm_ccfsm_state = 1;
23186 succeed = TRUE;
23188 else
23189 fail = TRUE;
23190 break;
23192 case BARRIER:
23193 /* Succeed if the following insn is the target label.
23194 Otherwise fail.
23195 If return insns are used then the last insn in a function
23196 will be a barrier. */
23197 this_insn = next_nonnote_insn (this_insn);
23198 if (this_insn && this_insn == label)
23200 arm_ccfsm_state = 1;
23201 succeed = TRUE;
23203 else
23204 fail = TRUE;
23205 break;
23207 case CALL_INSN:
23208 /* The AAPCS says that conditional calls should not be
23209 used since they make interworking inefficient (the
23210 linker can't transform BL<cond> into BLX). That's
23211 only a problem if the machine has BLX. */
23212 if (arm_arch5)
23214 fail = TRUE;
23215 break;
23218 /* Succeed if the following insn is the target label, or
23219 if the following two insns are a barrier and the
23220 target label. */
23221 this_insn = next_nonnote_insn (this_insn);
23222 if (this_insn && BARRIER_P (this_insn))
23223 this_insn = next_nonnote_insn (this_insn);
23225 if (this_insn && this_insn == label
23226 && insns_skipped < max_insns_skipped)
23228 arm_ccfsm_state = 1;
23229 succeed = TRUE;
23231 else
23232 fail = TRUE;
23233 break;
23235 case JUMP_INSN:
23236 /* If this is an unconditional branch to the same label, succeed.
23237 If it is to another label, do nothing. If it is conditional,
23238 fail. */
23239 /* XXX Probably, the tests for SET and the PC are
23240 unnecessary. */
23242 scanbody = PATTERN (this_insn);
23243 if (GET_CODE (scanbody) == SET
23244 && GET_CODE (SET_DEST (scanbody)) == PC)
23246 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
23247 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
23249 arm_ccfsm_state = 2;
23250 succeed = TRUE;
23252 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
23253 fail = TRUE;
23255 /* Fail if a conditional return is undesirable (e.g. on a
23256 StrongARM), but still allow this if optimizing for size. */
23257 else if (GET_CODE (scanbody) == return_code
23258 && !use_return_insn (TRUE, NULL)
23259 && !optimize_size)
23260 fail = TRUE;
23261 else if (GET_CODE (scanbody) == return_code)
23263 arm_ccfsm_state = 2;
23264 succeed = TRUE;
23266 else if (GET_CODE (scanbody) == PARALLEL)
23268 switch (get_attr_conds (this_insn))
23270 case CONDS_NOCOND:
23271 break;
23272 default:
23273 fail = TRUE;
23274 break;
23277 else
23278 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
23280 break;
23282 case INSN:
23283 /* Instructions using or affecting the condition codes make it
23284 fail. */
23285 scanbody = PATTERN (this_insn);
23286 if (!(GET_CODE (scanbody) == SET
23287 || GET_CODE (scanbody) == PARALLEL)
23288 || get_attr_conds (this_insn) != CONDS_NOCOND)
23289 fail = TRUE;
23290 break;
23292 default:
23293 break;
23296 if (succeed)
23298 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
23299 arm_target_label = CODE_LABEL_NUMBER (label);
23300 else
23302 gcc_assert (seeking_return || arm_ccfsm_state == 2);
23304 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
23306 this_insn = next_nonnote_insn (this_insn);
23307 gcc_assert (!this_insn
23308 || (!BARRIER_P (this_insn)
23309 && !LABEL_P (this_insn)));
23311 if (!this_insn)
23313 /* Oh, dear! we ran off the end.. give up. */
23314 extract_constrain_insn_cached (insn);
23315 arm_ccfsm_state = 0;
23316 arm_target_insn = NULL;
23317 return;
23319 arm_target_insn = this_insn;
23322 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23323 what it was. */
23324 if (!reverse)
23325 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
23327 if (reverse || then_not_else)
23328 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
23331 /* Restore recog_data (getting the attributes of other insns can
23332 destroy this array, but final.c assumes that it remains intact
23333 across this call. */
23334 extract_constrain_insn_cached (insn);
23338 /* Output IT instructions. */
23339 void
23340 thumb2_asm_output_opcode (FILE * stream)
23342 char buff[5];
23343 int n;
23345 if (arm_condexec_mask)
23347 for (n = 0; n < arm_condexec_masklen; n++)
23348 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
23349 buff[n] = 0;
23350 asm_fprintf(stream, "i%s\t%s\n\t", buff,
23351 arm_condition_codes[arm_current_cc]);
23352 arm_condexec_mask = 0;
23356 /* Implement TARGET_HARD_REGNO_MODE_OK. */
23357 static bool
23358 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
23360 if (GET_MODE_CLASS (mode) == MODE_CC)
23361 return (regno == CC_REGNUM
23362 || (TARGET_HARD_FLOAT
23363 && regno == VFPCC_REGNUM));
23365 if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
23366 return false;
23368 if (TARGET_THUMB1)
23369 /* For the Thumb we only allow values bigger than SImode in
23370 registers 0 - 6, so that there is always a second low
23371 register available to hold the upper part of the value.
23372 We probably we ought to ensure that the register is the
23373 start of an even numbered register pair. */
23374 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
23376 if (TARGET_HARD_FLOAT && IS_VFP_REGNUM (regno))
23378 if (mode == SFmode || mode == SImode)
23379 return VFP_REGNO_OK_FOR_SINGLE (regno);
23381 if (mode == DFmode)
23382 return VFP_REGNO_OK_FOR_DOUBLE (regno);
23384 if (mode == HFmode)
23385 return VFP_REGNO_OK_FOR_SINGLE (regno);
23387 /* VFP registers can hold HImode values. */
23388 if (mode == HImode)
23389 return VFP_REGNO_OK_FOR_SINGLE (regno);
23391 if (TARGET_NEON)
23392 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
23393 || (VALID_NEON_QREG_MODE (mode)
23394 && NEON_REGNO_OK_FOR_QUAD (regno))
23395 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
23396 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
23397 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
23398 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
23399 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23401 return false;
23404 if (TARGET_REALLY_IWMMXT)
23406 if (IS_IWMMXT_GR_REGNUM (regno))
23407 return mode == SImode;
23409 if (IS_IWMMXT_REGNUM (regno))
23410 return VALID_IWMMXT_REG_MODE (mode);
23413 /* We allow almost any value to be stored in the general registers.
23414 Restrict doubleword quantities to even register pairs in ARM state
23415 so that we can use ldrd. Do not allow very large Neon structure
23416 opaque modes in general registers; they would use too many. */
23417 if (regno <= LAST_ARM_REGNUM)
23419 if (ARM_NUM_REGS (mode) > 4)
23420 return false;
23422 if (TARGET_THUMB2)
23423 return true;
23425 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23428 if (regno == FRAME_POINTER_REGNUM
23429 || regno == ARG_POINTER_REGNUM)
23430 /* We only allow integers in the fake hard registers. */
23431 return GET_MODE_CLASS (mode) == MODE_INT;
23433 return false;
23436 /* Implement TARGET_MODES_TIEABLE_P. */
23438 static bool
23439 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
23441 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23442 return true;
23444 /* We specifically want to allow elements of "structure" modes to
23445 be tieable to the structure. This more general condition allows
23446 other rarer situations too. */
23447 if (TARGET_NEON
23448 && (VALID_NEON_DREG_MODE (mode1)
23449 || VALID_NEON_QREG_MODE (mode1)
23450 || VALID_NEON_STRUCT_MODE (mode1))
23451 && (VALID_NEON_DREG_MODE (mode2)
23452 || VALID_NEON_QREG_MODE (mode2)
23453 || VALID_NEON_STRUCT_MODE (mode2)))
23454 return true;
23456 return false;
23459 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23460 not used in arm mode. */
23462 enum reg_class
23463 arm_regno_class (int regno)
23465 if (regno == PC_REGNUM)
23466 return NO_REGS;
23468 if (TARGET_THUMB1)
23470 if (regno == STACK_POINTER_REGNUM)
23471 return STACK_REG;
23472 if (regno == CC_REGNUM)
23473 return CC_REG;
23474 if (regno < 8)
23475 return LO_REGS;
23476 return HI_REGS;
23479 if (TARGET_THUMB2 && regno < 8)
23480 return LO_REGS;
23482 if ( regno <= LAST_ARM_REGNUM
23483 || regno == FRAME_POINTER_REGNUM
23484 || regno == ARG_POINTER_REGNUM)
23485 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23487 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23488 return TARGET_THUMB2 ? CC_REG : NO_REGS;
23490 if (IS_VFP_REGNUM (regno))
23492 if (regno <= D7_VFP_REGNUM)
23493 return VFP_D0_D7_REGS;
23494 else if (regno <= LAST_LO_VFP_REGNUM)
23495 return VFP_LO_REGS;
23496 else
23497 return VFP_HI_REGS;
23500 if (IS_IWMMXT_REGNUM (regno))
23501 return IWMMXT_REGS;
23503 if (IS_IWMMXT_GR_REGNUM (regno))
23504 return IWMMXT_GR_REGS;
23506 return NO_REGS;
23509 /* Handle a special case when computing the offset
23510 of an argument from the frame pointer. */
23512 arm_debugger_arg_offset (int value, rtx addr)
23514 rtx_insn *insn;
23516 /* We are only interested if dbxout_parms() failed to compute the offset. */
23517 if (value != 0)
23518 return 0;
23520 /* We can only cope with the case where the address is held in a register. */
23521 if (!REG_P (addr))
23522 return 0;
23524 /* If we are using the frame pointer to point at the argument, then
23525 an offset of 0 is correct. */
23526 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23527 return 0;
23529 /* If we are using the stack pointer to point at the
23530 argument, then an offset of 0 is correct. */
23531 /* ??? Check this is consistent with thumb2 frame layout. */
23532 if ((TARGET_THUMB || !frame_pointer_needed)
23533 && REGNO (addr) == SP_REGNUM)
23534 return 0;
23536 /* Oh dear. The argument is pointed to by a register rather
23537 than being held in a register, or being stored at a known
23538 offset from the frame pointer. Since GDB only understands
23539 those two kinds of argument we must translate the address
23540 held in the register into an offset from the frame pointer.
23541 We do this by searching through the insns for the function
23542 looking to see where this register gets its value. If the
23543 register is initialized from the frame pointer plus an offset
23544 then we are in luck and we can continue, otherwise we give up.
23546 This code is exercised by producing debugging information
23547 for a function with arguments like this:
23549 double func (double a, double b, int c, double d) {return d;}
23551 Without this code the stab for parameter 'd' will be set to
23552 an offset of 0 from the frame pointer, rather than 8. */
23554 /* The if() statement says:
23556 If the insn is a normal instruction
23557 and if the insn is setting the value in a register
23558 and if the register being set is the register holding the address of the argument
23559 and if the address is computing by an addition
23560 that involves adding to a register
23561 which is the frame pointer
23562 a constant integer
23564 then... */
23566 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23568 if ( NONJUMP_INSN_P (insn)
23569 && GET_CODE (PATTERN (insn)) == SET
23570 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23571 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23572 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23573 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23574 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23577 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23579 break;
23583 if (value == 0)
23585 debug_rtx (addr);
23586 warning (0, "unable to compute real location of stacked parameter");
23587 value = 8; /* XXX magic hack */
23590 return value;
23593 /* Implement TARGET_PROMOTED_TYPE. */
23595 static tree
23596 arm_promoted_type (const_tree t)
23598 if (SCALAR_FLOAT_TYPE_P (t)
23599 && TYPE_PRECISION (t) == 16
23600 && TYPE_MAIN_VARIANT (t) == arm_fp16_type_node)
23601 return float_type_node;
23602 return NULL_TREE;
23605 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23606 This simply adds HFmode as a supported mode; even though we don't
23607 implement arithmetic on this type directly, it's supported by
23608 optabs conversions, much the way the double-word arithmetic is
23609 special-cased in the default hook. */
23611 static bool
23612 arm_scalar_mode_supported_p (scalar_mode mode)
23614 if (mode == HFmode)
23615 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
23616 else if (ALL_FIXED_POINT_MODE_P (mode))
23617 return true;
23618 else
23619 return default_scalar_mode_supported_p (mode);
23622 /* Set the value of FLT_EVAL_METHOD.
23623 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
23625 0: evaluate all operations and constants, whose semantic type has at
23626 most the range and precision of type float, to the range and
23627 precision of float; evaluate all other operations and constants to
23628 the range and precision of the semantic type;
23630 N, where _FloatN is a supported interchange floating type
23631 evaluate all operations and constants, whose semantic type has at
23632 most the range and precision of _FloatN type, to the range and
23633 precision of the _FloatN type; evaluate all other operations and
23634 constants to the range and precision of the semantic type;
23636 If we have the ARMv8.2-A extensions then we support _Float16 in native
23637 precision, so we should set this to 16. Otherwise, we support the type,
23638 but want to evaluate expressions in float precision, so set this to
23639 0. */
23641 static enum flt_eval_method
23642 arm_excess_precision (enum excess_precision_type type)
23644 switch (type)
23646 case EXCESS_PRECISION_TYPE_FAST:
23647 case EXCESS_PRECISION_TYPE_STANDARD:
23648 /* We can calculate either in 16-bit range and precision or
23649 32-bit range and precision. Make that decision based on whether
23650 we have native support for the ARMv8.2-A 16-bit floating-point
23651 instructions or not. */
23652 return (TARGET_VFP_FP16INST
23653 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
23654 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
23655 case EXCESS_PRECISION_TYPE_IMPLICIT:
23656 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
23657 default:
23658 gcc_unreachable ();
23660 return FLT_EVAL_METHOD_UNPREDICTABLE;
23664 /* Implement TARGET_FLOATN_MODE. Make very sure that we don't provide
23665 _Float16 if we are using anything other than ieee format for 16-bit
23666 floating point. Otherwise, punt to the default implementation. */
23667 static opt_scalar_float_mode
23668 arm_floatn_mode (int n, bool extended)
23670 if (!extended && n == 16)
23672 if (arm_fp16_format == ARM_FP16_FORMAT_IEEE)
23673 return HFmode;
23674 return opt_scalar_float_mode ();
23677 return default_floatn_mode (n, extended);
23681 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23682 not to early-clobber SRC registers in the process.
23684 We assume that the operands described by SRC and DEST represent a
23685 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
23686 number of components into which the copy has been decomposed. */
23687 void
23688 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
23690 unsigned int i;
23692 if (!reg_overlap_mentioned_p (operands[0], operands[1])
23693 || REGNO (operands[0]) < REGNO (operands[1]))
23695 for (i = 0; i < count; i++)
23697 operands[2 * i] = dest[i];
23698 operands[2 * i + 1] = src[i];
23701 else
23703 for (i = 0; i < count; i++)
23705 operands[2 * i] = dest[count - i - 1];
23706 operands[2 * i + 1] = src[count - i - 1];
23711 /* Split operands into moves from op[1] + op[2] into op[0]. */
23713 void
23714 neon_split_vcombine (rtx operands[3])
23716 unsigned int dest = REGNO (operands[0]);
23717 unsigned int src1 = REGNO (operands[1]);
23718 unsigned int src2 = REGNO (operands[2]);
23719 machine_mode halfmode = GET_MODE (operands[1]);
23720 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
23721 rtx destlo, desthi;
23723 if (src1 == dest && src2 == dest + halfregs)
23725 /* No-op move. Can't split to nothing; emit something. */
23726 emit_note (NOTE_INSN_DELETED);
23727 return;
23730 /* Preserve register attributes for variable tracking. */
23731 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
23732 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
23733 GET_MODE_SIZE (halfmode));
23735 /* Special case of reversed high/low parts. Use VSWP. */
23736 if (src2 == dest && src1 == dest + halfregs)
23738 rtx x = gen_rtx_SET (destlo, operands[1]);
23739 rtx y = gen_rtx_SET (desthi, operands[2]);
23740 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
23741 return;
23744 if (!reg_overlap_mentioned_p (operands[2], destlo))
23746 /* Try to avoid unnecessary moves if part of the result
23747 is in the right place already. */
23748 if (src1 != dest)
23749 emit_move_insn (destlo, operands[1]);
23750 if (src2 != dest + halfregs)
23751 emit_move_insn (desthi, operands[2]);
23753 else
23755 if (src2 != dest + halfregs)
23756 emit_move_insn (desthi, operands[2]);
23757 if (src1 != dest)
23758 emit_move_insn (destlo, operands[1]);
23762 /* Return the number (counting from 0) of
23763 the least significant set bit in MASK. */
23765 inline static int
23766 number_of_first_bit_set (unsigned mask)
23768 return ctz_hwi (mask);
23771 /* Like emit_multi_reg_push, but allowing for a different set of
23772 registers to be described as saved. MASK is the set of registers
23773 to be saved; REAL_REGS is the set of registers to be described as
23774 saved. If REAL_REGS is 0, only describe the stack adjustment. */
23776 static rtx_insn *
23777 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
23779 unsigned long regno;
23780 rtx par[10], tmp, reg;
23781 rtx_insn *insn;
23782 int i, j;
23784 /* Build the parallel of the registers actually being stored. */
23785 for (i = 0; mask; ++i, mask &= mask - 1)
23787 regno = ctz_hwi (mask);
23788 reg = gen_rtx_REG (SImode, regno);
23790 if (i == 0)
23791 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
23792 else
23793 tmp = gen_rtx_USE (VOIDmode, reg);
23795 par[i] = tmp;
23798 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23799 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
23800 tmp = gen_frame_mem (BLKmode, tmp);
23801 tmp = gen_rtx_SET (tmp, par[0]);
23802 par[0] = tmp;
23804 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
23805 insn = emit_insn (tmp);
23807 /* Always build the stack adjustment note for unwind info. */
23808 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23809 tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
23810 par[0] = tmp;
23812 /* Build the parallel of the registers recorded as saved for unwind. */
23813 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
23815 regno = ctz_hwi (real_regs);
23816 reg = gen_rtx_REG (SImode, regno);
23818 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
23819 tmp = gen_frame_mem (SImode, tmp);
23820 tmp = gen_rtx_SET (tmp, reg);
23821 RTX_FRAME_RELATED_P (tmp) = 1;
23822 par[j + 1] = tmp;
23825 if (j == 0)
23826 tmp = par[0];
23827 else
23829 RTX_FRAME_RELATED_P (par[0]) = 1;
23830 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
23833 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
23835 return insn;
23838 /* Emit code to push or pop registers to or from the stack. F is the
23839 assembly file. MASK is the registers to pop. */
23840 static void
23841 thumb_pop (FILE *f, unsigned long mask)
23843 int regno;
23844 int lo_mask = mask & 0xFF;
23846 gcc_assert (mask);
23848 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
23850 /* Special case. Do not generate a POP PC statement here, do it in
23851 thumb_exit() */
23852 thumb_exit (f, -1);
23853 return;
23856 fprintf (f, "\tpop\t{");
23858 /* Look at the low registers first. */
23859 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
23861 if (lo_mask & 1)
23863 asm_fprintf (f, "%r", regno);
23865 if ((lo_mask & ~1) != 0)
23866 fprintf (f, ", ");
23870 if (mask & (1 << PC_REGNUM))
23872 /* Catch popping the PC. */
23873 if (TARGET_INTERWORK || TARGET_BACKTRACE || crtl->calls_eh_return
23874 || IS_CMSE_ENTRY (arm_current_func_type ()))
23876 /* The PC is never poped directly, instead
23877 it is popped into r3 and then BX is used. */
23878 fprintf (f, "}\n");
23880 thumb_exit (f, -1);
23882 return;
23884 else
23886 if (mask & 0xFF)
23887 fprintf (f, ", ");
23889 asm_fprintf (f, "%r", PC_REGNUM);
23893 fprintf (f, "}\n");
23896 /* Generate code to return from a thumb function.
23897 If 'reg_containing_return_addr' is -1, then the return address is
23898 actually on the stack, at the stack pointer. */
23899 static void
23900 thumb_exit (FILE *f, int reg_containing_return_addr)
23902 unsigned regs_available_for_popping;
23903 unsigned regs_to_pop;
23904 int pops_needed;
23905 unsigned available;
23906 unsigned required;
23907 machine_mode mode;
23908 int size;
23909 int restore_a4 = FALSE;
23911 /* Compute the registers we need to pop. */
23912 regs_to_pop = 0;
23913 pops_needed = 0;
23915 if (reg_containing_return_addr == -1)
23917 regs_to_pop |= 1 << LR_REGNUM;
23918 ++pops_needed;
23921 if (TARGET_BACKTRACE)
23923 /* Restore the (ARM) frame pointer and stack pointer. */
23924 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
23925 pops_needed += 2;
23928 /* If there is nothing to pop then just emit the BX instruction and
23929 return. */
23930 if (pops_needed == 0)
23932 if (crtl->calls_eh_return)
23933 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23935 if (IS_CMSE_ENTRY (arm_current_func_type ()))
23937 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n",
23938 reg_containing_return_addr);
23939 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
23941 else
23942 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23943 return;
23945 /* Otherwise if we are not supporting interworking and we have not created
23946 a backtrace structure and the function was not entered in ARM mode then
23947 just pop the return address straight into the PC. */
23948 else if (!TARGET_INTERWORK
23949 && !TARGET_BACKTRACE
23950 && !is_called_in_ARM_mode (current_function_decl)
23951 && !crtl->calls_eh_return
23952 && !IS_CMSE_ENTRY (arm_current_func_type ()))
23954 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
23955 return;
23958 /* Find out how many of the (return) argument registers we can corrupt. */
23959 regs_available_for_popping = 0;
23961 /* If returning via __builtin_eh_return, the bottom three registers
23962 all contain information needed for the return. */
23963 if (crtl->calls_eh_return)
23964 size = 12;
23965 else
23967 /* If we can deduce the registers used from the function's
23968 return value. This is more reliable that examining
23969 df_regs_ever_live_p () because that will be set if the register is
23970 ever used in the function, not just if the register is used
23971 to hold a return value. */
23973 if (crtl->return_rtx != 0)
23974 mode = GET_MODE (crtl->return_rtx);
23975 else
23976 mode = DECL_MODE (DECL_RESULT (current_function_decl));
23978 size = GET_MODE_SIZE (mode);
23980 if (size == 0)
23982 /* In a void function we can use any argument register.
23983 In a function that returns a structure on the stack
23984 we can use the second and third argument registers. */
23985 if (mode == VOIDmode)
23986 regs_available_for_popping =
23987 (1 << ARG_REGISTER (1))
23988 | (1 << ARG_REGISTER (2))
23989 | (1 << ARG_REGISTER (3));
23990 else
23991 regs_available_for_popping =
23992 (1 << ARG_REGISTER (2))
23993 | (1 << ARG_REGISTER (3));
23995 else if (size <= 4)
23996 regs_available_for_popping =
23997 (1 << ARG_REGISTER (2))
23998 | (1 << ARG_REGISTER (3));
23999 else if (size <= 8)
24000 regs_available_for_popping =
24001 (1 << ARG_REGISTER (3));
24004 /* Match registers to be popped with registers into which we pop them. */
24005 for (available = regs_available_for_popping,
24006 required = regs_to_pop;
24007 required != 0 && available != 0;
24008 available &= ~(available & - available),
24009 required &= ~(required & - required))
24010 -- pops_needed;
24012 /* If we have any popping registers left over, remove them. */
24013 if (available > 0)
24014 regs_available_for_popping &= ~available;
24016 /* Otherwise if we need another popping register we can use
24017 the fourth argument register. */
24018 else if (pops_needed)
24020 /* If we have not found any free argument registers and
24021 reg a4 contains the return address, we must move it. */
24022 if (regs_available_for_popping == 0
24023 && reg_containing_return_addr == LAST_ARG_REGNUM)
24025 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24026 reg_containing_return_addr = LR_REGNUM;
24028 else if (size > 12)
24030 /* Register a4 is being used to hold part of the return value,
24031 but we have dire need of a free, low register. */
24032 restore_a4 = TRUE;
24034 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
24037 if (reg_containing_return_addr != LAST_ARG_REGNUM)
24039 /* The fourth argument register is available. */
24040 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
24042 --pops_needed;
24046 /* Pop as many registers as we can. */
24047 thumb_pop (f, regs_available_for_popping);
24049 /* Process the registers we popped. */
24050 if (reg_containing_return_addr == -1)
24052 /* The return address was popped into the lowest numbered register. */
24053 regs_to_pop &= ~(1 << LR_REGNUM);
24055 reg_containing_return_addr =
24056 number_of_first_bit_set (regs_available_for_popping);
24058 /* Remove this register for the mask of available registers, so that
24059 the return address will not be corrupted by further pops. */
24060 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
24063 /* If we popped other registers then handle them here. */
24064 if (regs_available_for_popping)
24066 int frame_pointer;
24068 /* Work out which register currently contains the frame pointer. */
24069 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
24071 /* Move it into the correct place. */
24072 asm_fprintf (f, "\tmov\t%r, %r\n",
24073 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
24075 /* (Temporarily) remove it from the mask of popped registers. */
24076 regs_available_for_popping &= ~(1 << frame_pointer);
24077 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
24079 if (regs_available_for_popping)
24081 int stack_pointer;
24083 /* We popped the stack pointer as well,
24084 find the register that contains it. */
24085 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
24087 /* Move it into the stack register. */
24088 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
24090 /* At this point we have popped all necessary registers, so
24091 do not worry about restoring regs_available_for_popping
24092 to its correct value:
24094 assert (pops_needed == 0)
24095 assert (regs_available_for_popping == (1 << frame_pointer))
24096 assert (regs_to_pop == (1 << STACK_POINTER)) */
24098 else
24100 /* Since we have just move the popped value into the frame
24101 pointer, the popping register is available for reuse, and
24102 we know that we still have the stack pointer left to pop. */
24103 regs_available_for_popping |= (1 << frame_pointer);
24107 /* If we still have registers left on the stack, but we no longer have
24108 any registers into which we can pop them, then we must move the return
24109 address into the link register and make available the register that
24110 contained it. */
24111 if (regs_available_for_popping == 0 && pops_needed > 0)
24113 regs_available_for_popping |= 1 << reg_containing_return_addr;
24115 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
24116 reg_containing_return_addr);
24118 reg_containing_return_addr = LR_REGNUM;
24121 /* If we have registers left on the stack then pop some more.
24122 We know that at most we will want to pop FP and SP. */
24123 if (pops_needed > 0)
24125 int popped_into;
24126 int move_to;
24128 thumb_pop (f, regs_available_for_popping);
24130 /* We have popped either FP or SP.
24131 Move whichever one it is into the correct register. */
24132 popped_into = number_of_first_bit_set (regs_available_for_popping);
24133 move_to = number_of_first_bit_set (regs_to_pop);
24135 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
24136 --pops_needed;
24139 /* If we still have not popped everything then we must have only
24140 had one register available to us and we are now popping the SP. */
24141 if (pops_needed > 0)
24143 int popped_into;
24145 thumb_pop (f, regs_available_for_popping);
24147 popped_into = number_of_first_bit_set (regs_available_for_popping);
24149 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
24151 assert (regs_to_pop == (1 << STACK_POINTER))
24152 assert (pops_needed == 1)
24156 /* If necessary restore the a4 register. */
24157 if (restore_a4)
24159 if (reg_containing_return_addr != LR_REGNUM)
24161 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24162 reg_containing_return_addr = LR_REGNUM;
24165 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
24168 if (crtl->calls_eh_return)
24169 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
24171 /* Return to caller. */
24172 if (IS_CMSE_ENTRY (arm_current_func_type ()))
24174 /* This is for the cases where LR is not being used to contain the return
24175 address. It may therefore contain information that we might not want
24176 to leak, hence it must be cleared. The value in R0 will never be a
24177 secret at this point, so it is safe to use it, see the clearing code
24178 in 'cmse_nonsecure_entry_clear_before_return'. */
24179 if (reg_containing_return_addr != LR_REGNUM)
24180 asm_fprintf (f, "\tmov\tlr, r0\n");
24182 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr);
24183 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
24185 else
24186 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
24189 /* Scan INSN just before assembler is output for it.
24190 For Thumb-1, we track the status of the condition codes; this
24191 information is used in the cbranchsi4_insn pattern. */
24192 void
24193 thumb1_final_prescan_insn (rtx_insn *insn)
24195 if (flag_print_asm_name)
24196 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
24197 INSN_ADDRESSES (INSN_UID (insn)));
24198 /* Don't overwrite the previous setter when we get to a cbranch. */
24199 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
24201 enum attr_conds conds;
24203 if (cfun->machine->thumb1_cc_insn)
24205 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
24206 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
24207 CC_STATUS_INIT;
24209 conds = get_attr_conds (insn);
24210 if (conds == CONDS_SET)
24212 rtx set = single_set (insn);
24213 cfun->machine->thumb1_cc_insn = insn;
24214 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
24215 cfun->machine->thumb1_cc_op1 = const0_rtx;
24216 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
24217 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
24219 rtx src1 = XEXP (SET_SRC (set), 1);
24220 if (src1 == const0_rtx)
24221 cfun->machine->thumb1_cc_mode = CCmode;
24223 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
24225 /* Record the src register operand instead of dest because
24226 cprop_hardreg pass propagates src. */
24227 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
24230 else if (conds != CONDS_NOCOND)
24231 cfun->machine->thumb1_cc_insn = NULL_RTX;
24234 /* Check if unexpected far jump is used. */
24235 if (cfun->machine->lr_save_eliminated
24236 && get_attr_far_jump (insn) == FAR_JUMP_YES)
24237 internal_error("Unexpected thumb1 far jump");
24241 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
24243 unsigned HOST_WIDE_INT mask = 0xff;
24244 int i;
24246 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
24247 if (val == 0) /* XXX */
24248 return 0;
24250 for (i = 0; i < 25; i++)
24251 if ((val & (mask << i)) == val)
24252 return 1;
24254 return 0;
24257 /* Returns nonzero if the current function contains,
24258 or might contain a far jump. */
24259 static int
24260 thumb_far_jump_used_p (void)
24262 rtx_insn *insn;
24263 bool far_jump = false;
24264 unsigned int func_size = 0;
24266 /* If we have already decided that far jumps may be used,
24267 do not bother checking again, and always return true even if
24268 it turns out that they are not being used. Once we have made
24269 the decision that far jumps are present (and that hence the link
24270 register will be pushed onto the stack) we cannot go back on it. */
24271 if (cfun->machine->far_jump_used)
24272 return 1;
24274 /* If this function is not being called from the prologue/epilogue
24275 generation code then it must be being called from the
24276 INITIAL_ELIMINATION_OFFSET macro. */
24277 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
24279 /* In this case we know that we are being asked about the elimination
24280 of the arg pointer register. If that register is not being used,
24281 then there are no arguments on the stack, and we do not have to
24282 worry that a far jump might force the prologue to push the link
24283 register, changing the stack offsets. In this case we can just
24284 return false, since the presence of far jumps in the function will
24285 not affect stack offsets.
24287 If the arg pointer is live (or if it was live, but has now been
24288 eliminated and so set to dead) then we do have to test to see if
24289 the function might contain a far jump. This test can lead to some
24290 false negatives, since before reload is completed, then length of
24291 branch instructions is not known, so gcc defaults to returning their
24292 longest length, which in turn sets the far jump attribute to true.
24294 A false negative will not result in bad code being generated, but it
24295 will result in a needless push and pop of the link register. We
24296 hope that this does not occur too often.
24298 If we need doubleword stack alignment this could affect the other
24299 elimination offsets so we can't risk getting it wrong. */
24300 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
24301 cfun->machine->arg_pointer_live = 1;
24302 else if (!cfun->machine->arg_pointer_live)
24303 return 0;
24306 /* We should not change far_jump_used during or after reload, as there is
24307 no chance to change stack frame layout. */
24308 if (reload_in_progress || reload_completed)
24309 return 0;
24311 /* Check to see if the function contains a branch
24312 insn with the far jump attribute set. */
24313 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
24315 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
24317 far_jump = true;
24319 func_size += get_attr_length (insn);
24322 /* Attribute far_jump will always be true for thumb1 before
24323 shorten_branch pass. So checking far_jump attribute before
24324 shorten_branch isn't much useful.
24326 Following heuristic tries to estimate more accurately if a far jump
24327 may finally be used. The heuristic is very conservative as there is
24328 no chance to roll-back the decision of not to use far jump.
24330 Thumb1 long branch offset is -2048 to 2046. The worst case is each
24331 2-byte insn is associated with a 4 byte constant pool. Using
24332 function size 2048/3 as the threshold is conservative enough. */
24333 if (far_jump)
24335 if ((func_size * 3) >= 2048)
24337 /* Record the fact that we have decided that
24338 the function does use far jumps. */
24339 cfun->machine->far_jump_used = 1;
24340 return 1;
24344 return 0;
24347 /* Return nonzero if FUNC must be entered in ARM mode. */
24348 static bool
24349 is_called_in_ARM_mode (tree func)
24351 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
24353 /* Ignore the problem about functions whose address is taken. */
24354 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
24355 return true;
24357 #ifdef ARM_PE
24358 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
24359 #else
24360 return false;
24361 #endif
24364 /* Given the stack offsets and register mask in OFFSETS, decide how
24365 many additional registers to push instead of subtracting a constant
24366 from SP. For epilogues the principle is the same except we use pop.
24367 FOR_PROLOGUE indicates which we're generating. */
24368 static int
24369 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
24371 HOST_WIDE_INT amount;
24372 unsigned long live_regs_mask = offsets->saved_regs_mask;
24373 /* Extract a mask of the ones we can give to the Thumb's push/pop
24374 instruction. */
24375 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
24376 /* Then count how many other high registers will need to be pushed. */
24377 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24378 int n_free, reg_base, size;
24380 if (!for_prologue && frame_pointer_needed)
24381 amount = offsets->locals_base - offsets->saved_regs;
24382 else
24383 amount = offsets->outgoing_args - offsets->saved_regs;
24385 /* If the stack frame size is 512 exactly, we can save one load
24386 instruction, which should make this a win even when optimizing
24387 for speed. */
24388 if (!optimize_size && amount != 512)
24389 return 0;
24391 /* Can't do this if there are high registers to push. */
24392 if (high_regs_pushed != 0)
24393 return 0;
24395 /* Shouldn't do it in the prologue if no registers would normally
24396 be pushed at all. In the epilogue, also allow it if we'll have
24397 a pop insn for the PC. */
24398 if (l_mask == 0
24399 && (for_prologue
24400 || TARGET_BACKTRACE
24401 || (live_regs_mask & 1 << LR_REGNUM) == 0
24402 || TARGET_INTERWORK
24403 || crtl->args.pretend_args_size != 0))
24404 return 0;
24406 /* Don't do this if thumb_expand_prologue wants to emit instructions
24407 between the push and the stack frame allocation. */
24408 if (for_prologue
24409 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
24410 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
24411 return 0;
24413 reg_base = 0;
24414 n_free = 0;
24415 if (!for_prologue)
24417 size = arm_size_return_regs ();
24418 reg_base = ARM_NUM_INTS (size);
24419 live_regs_mask >>= reg_base;
24422 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
24423 && (for_prologue || call_used_regs[reg_base + n_free]))
24425 live_regs_mask >>= 1;
24426 n_free++;
24429 if (n_free == 0)
24430 return 0;
24431 gcc_assert (amount / 4 * 4 == amount);
24433 if (amount >= 512 && (amount - n_free * 4) < 512)
24434 return (amount - 508) / 4;
24435 if (amount <= n_free * 4)
24436 return amount / 4;
24437 return 0;
24440 /* The bits which aren't usefully expanded as rtl. */
24441 const char *
24442 thumb1_unexpanded_epilogue (void)
24444 arm_stack_offsets *offsets;
24445 int regno;
24446 unsigned long live_regs_mask = 0;
24447 int high_regs_pushed = 0;
24448 int extra_pop;
24449 int had_to_push_lr;
24450 int size;
24452 if (cfun->machine->return_used_this_function != 0)
24453 return "";
24455 if (IS_NAKED (arm_current_func_type ()))
24456 return "";
24458 offsets = arm_get_frame_offsets ();
24459 live_regs_mask = offsets->saved_regs_mask;
24460 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24462 /* If we can deduce the registers used from the function's return value.
24463 This is more reliable that examining df_regs_ever_live_p () because that
24464 will be set if the register is ever used in the function, not just if
24465 the register is used to hold a return value. */
24466 size = arm_size_return_regs ();
24468 extra_pop = thumb1_extra_regs_pushed (offsets, false);
24469 if (extra_pop > 0)
24471 unsigned long extra_mask = (1 << extra_pop) - 1;
24472 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
24475 /* The prolog may have pushed some high registers to use as
24476 work registers. e.g. the testsuite file:
24477 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24478 compiles to produce:
24479 push {r4, r5, r6, r7, lr}
24480 mov r7, r9
24481 mov r6, r8
24482 push {r6, r7}
24483 as part of the prolog. We have to undo that pushing here. */
24485 if (high_regs_pushed)
24487 unsigned long mask = live_regs_mask & 0xff;
24488 int next_hi_reg;
24490 /* The available low registers depend on the size of the value we are
24491 returning. */
24492 if (size <= 12)
24493 mask |= 1 << 3;
24494 if (size <= 8)
24495 mask |= 1 << 2;
24497 if (mask == 0)
24498 /* Oh dear! We have no low registers into which we can pop
24499 high registers! */
24500 internal_error
24501 ("no low registers available for popping high registers");
24503 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
24504 if (live_regs_mask & (1 << next_hi_reg))
24505 break;
24507 while (high_regs_pushed)
24509 /* Find lo register(s) into which the high register(s) can
24510 be popped. */
24511 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24513 if (mask & (1 << regno))
24514 high_regs_pushed--;
24515 if (high_regs_pushed == 0)
24516 break;
24519 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
24521 /* Pop the values into the low register(s). */
24522 thumb_pop (asm_out_file, mask);
24524 /* Move the value(s) into the high registers. */
24525 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24527 if (mask & (1 << regno))
24529 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
24530 regno);
24532 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
24533 if (live_regs_mask & (1 << next_hi_reg))
24534 break;
24538 live_regs_mask &= ~0x0f00;
24541 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
24542 live_regs_mask &= 0xff;
24544 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
24546 /* Pop the return address into the PC. */
24547 if (had_to_push_lr)
24548 live_regs_mask |= 1 << PC_REGNUM;
24550 /* Either no argument registers were pushed or a backtrace
24551 structure was created which includes an adjusted stack
24552 pointer, so just pop everything. */
24553 if (live_regs_mask)
24554 thumb_pop (asm_out_file, live_regs_mask);
24556 /* We have either just popped the return address into the
24557 PC or it is was kept in LR for the entire function.
24558 Note that thumb_pop has already called thumb_exit if the
24559 PC was in the list. */
24560 if (!had_to_push_lr)
24561 thumb_exit (asm_out_file, LR_REGNUM);
24563 else
24565 /* Pop everything but the return address. */
24566 if (live_regs_mask)
24567 thumb_pop (asm_out_file, live_regs_mask);
24569 if (had_to_push_lr)
24571 if (size > 12)
24573 /* We have no free low regs, so save one. */
24574 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
24575 LAST_ARG_REGNUM);
24578 /* Get the return address into a temporary register. */
24579 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
24581 if (size > 12)
24583 /* Move the return address to lr. */
24584 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
24585 LAST_ARG_REGNUM);
24586 /* Restore the low register. */
24587 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
24588 IP_REGNUM);
24589 regno = LR_REGNUM;
24591 else
24592 regno = LAST_ARG_REGNUM;
24594 else
24595 regno = LR_REGNUM;
24597 /* Remove the argument registers that were pushed onto the stack. */
24598 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
24599 SP_REGNUM, SP_REGNUM,
24600 crtl->args.pretend_args_size);
24602 thumb_exit (asm_out_file, regno);
24605 return "";
24608 /* Functions to save and restore machine-specific function data. */
24609 static struct machine_function *
24610 arm_init_machine_status (void)
24612 struct machine_function *machine;
24613 machine = ggc_cleared_alloc<machine_function> ();
24615 #if ARM_FT_UNKNOWN != 0
24616 machine->func_type = ARM_FT_UNKNOWN;
24617 #endif
24618 return machine;
24621 /* Return an RTX indicating where the return address to the
24622 calling function can be found. */
24624 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
24626 if (count != 0)
24627 return NULL_RTX;
24629 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
24632 /* Do anything needed before RTL is emitted for each function. */
24633 void
24634 arm_init_expanders (void)
24636 /* Arrange to initialize and mark the machine per-function status. */
24637 init_machine_status = arm_init_machine_status;
24639 /* This is to stop the combine pass optimizing away the alignment
24640 adjustment of va_arg. */
24641 /* ??? It is claimed that this should not be necessary. */
24642 if (cfun)
24643 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
24646 /* Check that FUNC is called with a different mode. */
24648 bool
24649 arm_change_mode_p (tree func)
24651 if (TREE_CODE (func) != FUNCTION_DECL)
24652 return false;
24654 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
24656 if (!callee_tree)
24657 callee_tree = target_option_default_node;
24659 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
24660 int flags = callee_opts->x_target_flags;
24662 return (TARGET_THUMB_P (flags) != TARGET_THUMB);
24665 /* Like arm_compute_initial_elimination offset. Simpler because there
24666 isn't an ABI specified frame pointer for Thumb. Instead, we set it
24667 to point at the base of the local variables after static stack
24668 space for a function has been allocated. */
24670 HOST_WIDE_INT
24671 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
24673 arm_stack_offsets *offsets;
24675 offsets = arm_get_frame_offsets ();
24677 switch (from)
24679 case ARG_POINTER_REGNUM:
24680 switch (to)
24682 case STACK_POINTER_REGNUM:
24683 return offsets->outgoing_args - offsets->saved_args;
24685 case FRAME_POINTER_REGNUM:
24686 return offsets->soft_frame - offsets->saved_args;
24688 case ARM_HARD_FRAME_POINTER_REGNUM:
24689 return offsets->saved_regs - offsets->saved_args;
24691 case THUMB_HARD_FRAME_POINTER_REGNUM:
24692 return offsets->locals_base - offsets->saved_args;
24694 default:
24695 gcc_unreachable ();
24697 break;
24699 case FRAME_POINTER_REGNUM:
24700 switch (to)
24702 case STACK_POINTER_REGNUM:
24703 return offsets->outgoing_args - offsets->soft_frame;
24705 case ARM_HARD_FRAME_POINTER_REGNUM:
24706 return offsets->saved_regs - offsets->soft_frame;
24708 case THUMB_HARD_FRAME_POINTER_REGNUM:
24709 return offsets->locals_base - offsets->soft_frame;
24711 default:
24712 gcc_unreachable ();
24714 break;
24716 default:
24717 gcc_unreachable ();
24721 /* Generate the function's prologue. */
24723 void
24724 thumb1_expand_prologue (void)
24726 rtx_insn *insn;
24728 HOST_WIDE_INT amount;
24729 HOST_WIDE_INT size;
24730 arm_stack_offsets *offsets;
24731 unsigned long func_type;
24732 int regno;
24733 unsigned long live_regs_mask;
24734 unsigned long l_mask;
24735 unsigned high_regs_pushed = 0;
24736 bool lr_needs_saving;
24738 func_type = arm_current_func_type ();
24740 /* Naked functions don't have prologues. */
24741 if (IS_NAKED (func_type))
24743 if (flag_stack_usage_info)
24744 current_function_static_stack_size = 0;
24745 return;
24748 if (IS_INTERRUPT (func_type))
24750 error ("interrupt Service Routines cannot be coded in Thumb mode");
24751 return;
24754 if (is_called_in_ARM_mode (current_function_decl))
24755 emit_insn (gen_prologue_thumb1_interwork ());
24757 offsets = arm_get_frame_offsets ();
24758 live_regs_mask = offsets->saved_regs_mask;
24759 lr_needs_saving = live_regs_mask & (1 << LR_REGNUM);
24761 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
24762 l_mask = live_regs_mask & 0x40ff;
24763 /* Then count how many other high registers will need to be pushed. */
24764 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24766 if (crtl->args.pretend_args_size)
24768 rtx x = GEN_INT (-crtl->args.pretend_args_size);
24770 if (cfun->machine->uses_anonymous_args)
24772 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
24773 unsigned long mask;
24775 mask = 1ul << (LAST_ARG_REGNUM + 1);
24776 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
24778 insn = thumb1_emit_multi_reg_push (mask, 0);
24780 else
24782 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24783 stack_pointer_rtx, x));
24785 RTX_FRAME_RELATED_P (insn) = 1;
24788 if (TARGET_BACKTRACE)
24790 HOST_WIDE_INT offset = 0;
24791 unsigned work_register;
24792 rtx work_reg, x, arm_hfp_rtx;
24794 /* We have been asked to create a stack backtrace structure.
24795 The code looks like this:
24797 0 .align 2
24798 0 func:
24799 0 sub SP, #16 Reserve space for 4 registers.
24800 2 push {R7} Push low registers.
24801 4 add R7, SP, #20 Get the stack pointer before the push.
24802 6 str R7, [SP, #8] Store the stack pointer
24803 (before reserving the space).
24804 8 mov R7, PC Get hold of the start of this code + 12.
24805 10 str R7, [SP, #16] Store it.
24806 12 mov R7, FP Get hold of the current frame pointer.
24807 14 str R7, [SP, #4] Store it.
24808 16 mov R7, LR Get hold of the current return address.
24809 18 str R7, [SP, #12] Store it.
24810 20 add R7, SP, #16 Point at the start of the
24811 backtrace structure.
24812 22 mov FP, R7 Put this value into the frame pointer. */
24814 work_register = thumb_find_work_register (live_regs_mask);
24815 work_reg = gen_rtx_REG (SImode, work_register);
24816 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
24818 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24819 stack_pointer_rtx, GEN_INT (-16)));
24820 RTX_FRAME_RELATED_P (insn) = 1;
24822 if (l_mask)
24824 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
24825 RTX_FRAME_RELATED_P (insn) = 1;
24826 lr_needs_saving = false;
24828 offset = bit_count (l_mask) * UNITS_PER_WORD;
24831 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
24832 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24834 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
24835 x = gen_frame_mem (SImode, x);
24836 emit_move_insn (x, work_reg);
24838 /* Make sure that the instruction fetching the PC is in the right place
24839 to calculate "start of backtrace creation code + 12". */
24840 /* ??? The stores using the common WORK_REG ought to be enough to
24841 prevent the scheduler from doing anything weird. Failing that
24842 we could always move all of the following into an UNSPEC_VOLATILE. */
24843 if (l_mask)
24845 x = gen_rtx_REG (SImode, PC_REGNUM);
24846 emit_move_insn (work_reg, x);
24848 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24849 x = gen_frame_mem (SImode, x);
24850 emit_move_insn (x, work_reg);
24852 emit_move_insn (work_reg, arm_hfp_rtx);
24854 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24855 x = gen_frame_mem (SImode, x);
24856 emit_move_insn (x, work_reg);
24858 else
24860 emit_move_insn (work_reg, arm_hfp_rtx);
24862 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24863 x = gen_frame_mem (SImode, x);
24864 emit_move_insn (x, work_reg);
24866 x = gen_rtx_REG (SImode, PC_REGNUM);
24867 emit_move_insn (work_reg, x);
24869 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24870 x = gen_frame_mem (SImode, x);
24871 emit_move_insn (x, work_reg);
24874 x = gen_rtx_REG (SImode, LR_REGNUM);
24875 emit_move_insn (work_reg, x);
24877 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
24878 x = gen_frame_mem (SImode, x);
24879 emit_move_insn (x, work_reg);
24881 x = GEN_INT (offset + 12);
24882 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24884 emit_move_insn (arm_hfp_rtx, work_reg);
24886 /* Optimization: If we are not pushing any low registers but we are going
24887 to push some high registers then delay our first push. This will just
24888 be a push of LR and we can combine it with the push of the first high
24889 register. */
24890 else if ((l_mask & 0xff) != 0
24891 || (high_regs_pushed == 0 && lr_needs_saving))
24893 unsigned long mask = l_mask;
24894 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
24895 insn = thumb1_emit_multi_reg_push (mask, mask);
24896 RTX_FRAME_RELATED_P (insn) = 1;
24897 lr_needs_saving = false;
24900 if (high_regs_pushed)
24902 unsigned pushable_regs;
24903 unsigned next_hi_reg;
24904 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
24905 : crtl->args.info.nregs;
24906 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
24908 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
24909 if (live_regs_mask & (1 << next_hi_reg))
24910 break;
24912 /* Here we need to mask out registers used for passing arguments
24913 even if they can be pushed. This is to avoid using them to stash the high
24914 registers. Such kind of stash may clobber the use of arguments. */
24915 pushable_regs = l_mask & (~arg_regs_mask);
24916 if (lr_needs_saving)
24917 pushable_regs &= ~(1 << LR_REGNUM);
24919 if (pushable_regs == 0)
24920 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
24922 while (high_regs_pushed > 0)
24924 unsigned long real_regs_mask = 0;
24925 unsigned long push_mask = 0;
24927 for (regno = LR_REGNUM; regno >= 0; regno --)
24929 if (pushable_regs & (1 << regno))
24931 emit_move_insn (gen_rtx_REG (SImode, regno),
24932 gen_rtx_REG (SImode, next_hi_reg));
24934 high_regs_pushed --;
24935 real_regs_mask |= (1 << next_hi_reg);
24936 push_mask |= (1 << regno);
24938 if (high_regs_pushed)
24940 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
24941 next_hi_reg --)
24942 if (live_regs_mask & (1 << next_hi_reg))
24943 break;
24945 else
24946 break;
24950 /* If we had to find a work register and we have not yet
24951 saved the LR then add it to the list of regs to push. */
24952 if (lr_needs_saving)
24954 push_mask |= 1 << LR_REGNUM;
24955 real_regs_mask |= 1 << LR_REGNUM;
24956 lr_needs_saving = false;
24959 insn = thumb1_emit_multi_reg_push (push_mask, real_regs_mask);
24960 RTX_FRAME_RELATED_P (insn) = 1;
24964 /* Load the pic register before setting the frame pointer,
24965 so we can use r7 as a temporary work register. */
24966 if (flag_pic && arm_pic_register != INVALID_REGNUM)
24967 arm_load_pic_register (live_regs_mask);
24969 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
24970 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
24971 stack_pointer_rtx);
24973 size = offsets->outgoing_args - offsets->saved_args;
24974 if (flag_stack_usage_info)
24975 current_function_static_stack_size = size;
24977 /* If we have a frame, then do stack checking. FIXME: not implemented. */
24978 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK && size)
24979 sorry ("-fstack-check=specific for Thumb-1");
24981 amount = offsets->outgoing_args - offsets->saved_regs;
24982 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
24983 if (amount)
24985 if (amount < 512)
24987 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
24988 GEN_INT (- amount)));
24989 RTX_FRAME_RELATED_P (insn) = 1;
24991 else
24993 rtx reg, dwarf;
24995 /* The stack decrement is too big for an immediate value in a single
24996 insn. In theory we could issue multiple subtracts, but after
24997 three of them it becomes more space efficient to place the full
24998 value in the constant pool and load into a register. (Also the
24999 ARM debugger really likes to see only one stack decrement per
25000 function). So instead we look for a scratch register into which
25001 we can load the decrement, and then we subtract this from the
25002 stack pointer. Unfortunately on the thumb the only available
25003 scratch registers are the argument registers, and we cannot use
25004 these as they may hold arguments to the function. Instead we
25005 attempt to locate a call preserved register which is used by this
25006 function. If we can find one, then we know that it will have
25007 been pushed at the start of the prologue and so we can corrupt
25008 it now. */
25009 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
25010 if (live_regs_mask & (1 << regno))
25011 break;
25013 gcc_assert(regno <= LAST_LO_REGNUM);
25015 reg = gen_rtx_REG (SImode, regno);
25017 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
25019 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25020 stack_pointer_rtx, reg));
25022 dwarf = gen_rtx_SET (stack_pointer_rtx,
25023 plus_constant (Pmode, stack_pointer_rtx,
25024 -amount));
25025 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
25026 RTX_FRAME_RELATED_P (insn) = 1;
25030 if (frame_pointer_needed)
25031 thumb_set_frame_pointer (offsets);
25033 /* If we are profiling, make sure no instructions are scheduled before
25034 the call to mcount. Similarly if the user has requested no
25035 scheduling in the prolog. Similarly if we want non-call exceptions
25036 using the EABI unwinder, to prevent faulting instructions from being
25037 swapped with a stack adjustment. */
25038 if (crtl->profile || !TARGET_SCHED_PROLOG
25039 || (arm_except_unwind_info (&global_options) == UI_TARGET
25040 && cfun->can_throw_non_call_exceptions))
25041 emit_insn (gen_blockage ());
25043 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
25044 if (live_regs_mask & 0xff)
25045 cfun->machine->lr_save_eliminated = 0;
25048 /* Clear caller saved registers not used to pass return values and leaked
25049 condition flags before exiting a cmse_nonsecure_entry function. */
25051 void
25052 cmse_nonsecure_entry_clear_before_return (void)
25054 uint64_t to_clear_mask[2];
25055 uint32_t padding_bits_to_clear = 0;
25056 uint32_t * padding_bits_to_clear_ptr = &padding_bits_to_clear;
25057 int regno, maxregno = IP_REGNUM;
25058 tree result_type;
25059 rtx result_rtl;
25061 to_clear_mask[0] = (1ULL << (NUM_ARG_REGS)) - 1;
25062 to_clear_mask[0] |= (1ULL << IP_REGNUM);
25064 /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
25065 registers. We also check that TARGET_HARD_FLOAT and !TARGET_THUMB1 hold
25066 to make sure the instructions used to clear them are present. */
25067 if (TARGET_HARD_FLOAT && !TARGET_THUMB1)
25069 uint64_t float_mask = (1ULL << (D7_VFP_REGNUM + 1)) - 1;
25070 maxregno = LAST_VFP_REGNUM;
25072 float_mask &= ~((1ULL << FIRST_VFP_REGNUM) - 1);
25073 to_clear_mask[0] |= float_mask;
25075 float_mask = (1ULL << (maxregno - 63)) - 1;
25076 to_clear_mask[1] = float_mask;
25078 /* Make sure we don't clear the two scratch registers used to clear the
25079 relevant FPSCR bits in output_return_instruction. */
25080 emit_use (gen_rtx_REG (SImode, IP_REGNUM));
25081 to_clear_mask[0] &= ~(1ULL << IP_REGNUM);
25082 emit_use (gen_rtx_REG (SImode, 4));
25083 to_clear_mask[0] &= ~(1ULL << 4);
25086 /* If the user has defined registers to be caller saved, these are no longer
25087 restored by the function before returning and must thus be cleared for
25088 security purposes. */
25089 for (regno = NUM_ARG_REGS; regno < LAST_VFP_REGNUM; regno++)
25091 /* We do not touch registers that can be used to pass arguments as per
25092 the AAPCS, since these should never be made callee-saved by user
25093 options. */
25094 if (IN_RANGE (regno, FIRST_VFP_REGNUM, D7_VFP_REGNUM))
25095 continue;
25096 if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM))
25097 continue;
25098 if (call_used_regs[regno])
25099 to_clear_mask[regno / 64] |= (1ULL << (regno % 64));
25102 /* Make sure we do not clear the registers used to return the result in. */
25103 result_type = TREE_TYPE (DECL_RESULT (current_function_decl));
25104 if (!VOID_TYPE_P (result_type))
25106 result_rtl = arm_function_value (result_type, current_function_decl, 0);
25108 /* No need to check that we return in registers, because we don't
25109 support returning on stack yet. */
25110 to_clear_mask[0]
25111 &= ~compute_not_to_clear_mask (result_type, result_rtl, 0,
25112 padding_bits_to_clear_ptr);
25115 if (padding_bits_to_clear != 0)
25117 rtx reg_rtx;
25118 /* Padding bits to clear is not 0 so we know we are dealing with
25119 returning a composite type, which only uses r0. Let's make sure that
25120 r1-r3 is cleared too, we will use r1 as a scratch register. */
25121 gcc_assert ((to_clear_mask[0] & 0xe) == 0xe);
25123 reg_rtx = gen_rtx_REG (SImode, R1_REGNUM);
25125 /* Fill the lower half of the negated padding_bits_to_clear. */
25126 emit_move_insn (reg_rtx,
25127 GEN_INT ((((~padding_bits_to_clear) << 16u) >> 16u)));
25129 /* Also fill the top half of the negated padding_bits_to_clear. */
25130 if (((~padding_bits_to_clear) >> 16) > 0)
25131 emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode, reg_rtx,
25132 GEN_INT (16),
25133 GEN_INT (16)),
25134 GEN_INT ((~padding_bits_to_clear) >> 16)));
25136 emit_insn (gen_andsi3 (gen_rtx_REG (SImode, R0_REGNUM),
25137 gen_rtx_REG (SImode, R0_REGNUM),
25138 reg_rtx));
25141 for (regno = R0_REGNUM; regno <= maxregno; regno++)
25143 if (!(to_clear_mask[regno / 64] & (1ULL << (regno % 64))))
25144 continue;
25146 if (IS_VFP_REGNUM (regno))
25148 /* If regno is an even vfp register and its successor is also to
25149 be cleared, use vmov. */
25150 if (TARGET_VFP_DOUBLE
25151 && VFP_REGNO_OK_FOR_DOUBLE (regno)
25152 && to_clear_mask[regno / 64] & (1ULL << ((regno % 64) + 1)))
25154 emit_move_insn (gen_rtx_REG (DFmode, regno),
25155 CONST1_RTX (DFmode));
25156 emit_use (gen_rtx_REG (DFmode, regno));
25157 regno++;
25159 else
25161 emit_move_insn (gen_rtx_REG (SFmode, regno),
25162 CONST1_RTX (SFmode));
25163 emit_use (gen_rtx_REG (SFmode, regno));
25166 else
25168 if (TARGET_THUMB1)
25170 if (regno == R0_REGNUM)
25171 emit_move_insn (gen_rtx_REG (SImode, regno),
25172 const0_rtx);
25173 else
25174 /* R0 has either been cleared before, see code above, or it
25175 holds a return value, either way it is not secret
25176 information. */
25177 emit_move_insn (gen_rtx_REG (SImode, regno),
25178 gen_rtx_REG (SImode, R0_REGNUM));
25179 emit_use (gen_rtx_REG (SImode, regno));
25181 else
25183 emit_move_insn (gen_rtx_REG (SImode, regno),
25184 gen_rtx_REG (SImode, LR_REGNUM));
25185 emit_use (gen_rtx_REG (SImode, regno));
25191 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
25192 POP instruction can be generated. LR should be replaced by PC. All
25193 the checks required are already done by USE_RETURN_INSN (). Hence,
25194 all we really need to check here is if single register is to be
25195 returned, or multiple register return. */
25196 void
25197 thumb2_expand_return (bool simple_return)
25199 int i, num_regs;
25200 unsigned long saved_regs_mask;
25201 arm_stack_offsets *offsets;
25203 offsets = arm_get_frame_offsets ();
25204 saved_regs_mask = offsets->saved_regs_mask;
25206 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
25207 if (saved_regs_mask & (1 << i))
25208 num_regs++;
25210 if (!simple_return && saved_regs_mask)
25212 /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
25213 functions or adapt code to handle according to ACLE. This path should
25214 not be reachable for cmse_nonsecure_entry functions though we prefer
25215 to assert it for now to ensure that future code changes do not silently
25216 change this behavior. */
25217 gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
25218 if (num_regs == 1)
25220 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25221 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
25222 rtx addr = gen_rtx_MEM (SImode,
25223 gen_rtx_POST_INC (SImode,
25224 stack_pointer_rtx));
25225 set_mem_alias_set (addr, get_frame_alias_set ());
25226 XVECEXP (par, 0, 0) = ret_rtx;
25227 XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
25228 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
25229 emit_jump_insn (par);
25231 else
25233 saved_regs_mask &= ~ (1 << LR_REGNUM);
25234 saved_regs_mask |= (1 << PC_REGNUM);
25235 arm_emit_multi_reg_pop (saved_regs_mask);
25238 else
25240 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25241 cmse_nonsecure_entry_clear_before_return ();
25242 emit_jump_insn (simple_return_rtx);
25246 void
25247 thumb1_expand_epilogue (void)
25249 HOST_WIDE_INT amount;
25250 arm_stack_offsets *offsets;
25251 int regno;
25253 /* Naked functions don't have prologues. */
25254 if (IS_NAKED (arm_current_func_type ()))
25255 return;
25257 offsets = arm_get_frame_offsets ();
25258 amount = offsets->outgoing_args - offsets->saved_regs;
25260 if (frame_pointer_needed)
25262 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
25263 amount = offsets->locals_base - offsets->saved_regs;
25265 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
25267 gcc_assert (amount >= 0);
25268 if (amount)
25270 emit_insn (gen_blockage ());
25272 if (amount < 512)
25273 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25274 GEN_INT (amount)));
25275 else
25277 /* r3 is always free in the epilogue. */
25278 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
25280 emit_insn (gen_movsi (reg, GEN_INT (amount)));
25281 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
25285 /* Emit a USE (stack_pointer_rtx), so that
25286 the stack adjustment will not be deleted. */
25287 emit_insn (gen_force_register_use (stack_pointer_rtx));
25289 if (crtl->profile || !TARGET_SCHED_PROLOG)
25290 emit_insn (gen_blockage ());
25292 /* Emit a clobber for each insn that will be restored in the epilogue,
25293 so that flow2 will get register lifetimes correct. */
25294 for (regno = 0; regno < 13; regno++)
25295 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
25296 emit_clobber (gen_rtx_REG (SImode, regno));
25298 if (! df_regs_ever_live_p (LR_REGNUM))
25299 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
25301 /* Clear all caller-saved regs that are not used to return. */
25302 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25303 cmse_nonsecure_entry_clear_before_return ();
25306 /* Epilogue code for APCS frame. */
25307 static void
25308 arm_expand_epilogue_apcs_frame (bool really_return)
25310 unsigned long func_type;
25311 unsigned long saved_regs_mask;
25312 int num_regs = 0;
25313 int i;
25314 int floats_from_frame = 0;
25315 arm_stack_offsets *offsets;
25317 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
25318 func_type = arm_current_func_type ();
25320 /* Get frame offsets for ARM. */
25321 offsets = arm_get_frame_offsets ();
25322 saved_regs_mask = offsets->saved_regs_mask;
25324 /* Find the offset of the floating-point save area in the frame. */
25325 floats_from_frame
25326 = (offsets->saved_args
25327 + arm_compute_static_chain_stack_bytes ()
25328 - offsets->frame);
25330 /* Compute how many core registers saved and how far away the floats are. */
25331 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25332 if (saved_regs_mask & (1 << i))
25334 num_regs++;
25335 floats_from_frame += 4;
25338 if (TARGET_HARD_FLOAT)
25340 int start_reg;
25341 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
25343 /* The offset is from IP_REGNUM. */
25344 int saved_size = arm_get_vfp_saved_size ();
25345 if (saved_size > 0)
25347 rtx_insn *insn;
25348 floats_from_frame += saved_size;
25349 insn = emit_insn (gen_addsi3 (ip_rtx,
25350 hard_frame_pointer_rtx,
25351 GEN_INT (-floats_from_frame)));
25352 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
25353 ip_rtx, hard_frame_pointer_rtx);
25356 /* Generate VFP register multi-pop. */
25357 start_reg = FIRST_VFP_REGNUM;
25359 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
25360 /* Look for a case where a reg does not need restoring. */
25361 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25362 && (!df_regs_ever_live_p (i + 1)
25363 || call_used_regs[i + 1]))
25365 if (start_reg != i)
25366 arm_emit_vfp_multi_reg_pop (start_reg,
25367 (i - start_reg) / 2,
25368 gen_rtx_REG (SImode,
25369 IP_REGNUM));
25370 start_reg = i + 2;
25373 /* Restore the remaining regs that we have discovered (or possibly
25374 even all of them, if the conditional in the for loop never
25375 fired). */
25376 if (start_reg != i)
25377 arm_emit_vfp_multi_reg_pop (start_reg,
25378 (i - start_reg) / 2,
25379 gen_rtx_REG (SImode, IP_REGNUM));
25382 if (TARGET_IWMMXT)
25384 /* The frame pointer is guaranteed to be non-double-word aligned, as
25385 it is set to double-word-aligned old_stack_pointer - 4. */
25386 rtx_insn *insn;
25387 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
25389 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
25390 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25392 rtx addr = gen_frame_mem (V2SImode,
25393 plus_constant (Pmode, hard_frame_pointer_rtx,
25394 - lrm_count * 4));
25395 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25396 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25397 gen_rtx_REG (V2SImode, i),
25398 NULL_RTX);
25399 lrm_count += 2;
25403 /* saved_regs_mask should contain IP which contains old stack pointer
25404 at the time of activation creation. Since SP and IP are adjacent registers,
25405 we can restore the value directly into SP. */
25406 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
25407 saved_regs_mask &= ~(1 << IP_REGNUM);
25408 saved_regs_mask |= (1 << SP_REGNUM);
25410 /* There are two registers left in saved_regs_mask - LR and PC. We
25411 only need to restore LR (the return address), but to
25412 save time we can load it directly into PC, unless we need a
25413 special function exit sequence, or we are not really returning. */
25414 if (really_return
25415 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
25416 && !crtl->calls_eh_return)
25417 /* Delete LR from the register mask, so that LR on
25418 the stack is loaded into the PC in the register mask. */
25419 saved_regs_mask &= ~(1 << LR_REGNUM);
25420 else
25421 saved_regs_mask &= ~(1 << PC_REGNUM);
25423 num_regs = bit_count (saved_regs_mask);
25424 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
25426 rtx_insn *insn;
25427 emit_insn (gen_blockage ());
25428 /* Unwind the stack to just below the saved registers. */
25429 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25430 hard_frame_pointer_rtx,
25431 GEN_INT (- 4 * num_regs)));
25433 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
25434 stack_pointer_rtx, hard_frame_pointer_rtx);
25437 arm_emit_multi_reg_pop (saved_regs_mask);
25439 if (IS_INTERRUPT (func_type))
25441 /* Interrupt handlers will have pushed the
25442 IP onto the stack, so restore it now. */
25443 rtx_insn *insn;
25444 rtx addr = gen_rtx_MEM (SImode,
25445 gen_rtx_POST_INC (SImode,
25446 stack_pointer_rtx));
25447 set_mem_alias_set (addr, get_frame_alias_set ());
25448 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
25449 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25450 gen_rtx_REG (SImode, IP_REGNUM),
25451 NULL_RTX);
25454 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
25455 return;
25457 if (crtl->calls_eh_return)
25458 emit_insn (gen_addsi3 (stack_pointer_rtx,
25459 stack_pointer_rtx,
25460 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25462 if (IS_STACKALIGN (func_type))
25463 /* Restore the original stack pointer. Before prologue, the stack was
25464 realigned and the original stack pointer saved in r0. For details,
25465 see comment in arm_expand_prologue. */
25466 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25468 emit_jump_insn (simple_return_rtx);
25471 /* Generate RTL to represent ARM epilogue. Really_return is true if the
25472 function is not a sibcall. */
25473 void
25474 arm_expand_epilogue (bool really_return)
25476 unsigned long func_type;
25477 unsigned long saved_regs_mask;
25478 int num_regs = 0;
25479 int i;
25480 int amount;
25481 arm_stack_offsets *offsets;
25483 func_type = arm_current_func_type ();
25485 /* Naked functions don't have epilogue. Hence, generate return pattern, and
25486 let output_return_instruction take care of instruction emission if any. */
25487 if (IS_NAKED (func_type)
25488 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
25490 if (really_return)
25491 emit_jump_insn (simple_return_rtx);
25492 return;
25495 /* If we are throwing an exception, then we really must be doing a
25496 return, so we can't tail-call. */
25497 gcc_assert (!crtl->calls_eh_return || really_return);
25499 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
25501 arm_expand_epilogue_apcs_frame (really_return);
25502 return;
25505 /* Get frame offsets for ARM. */
25506 offsets = arm_get_frame_offsets ();
25507 saved_regs_mask = offsets->saved_regs_mask;
25508 num_regs = bit_count (saved_regs_mask);
25510 if (frame_pointer_needed)
25512 rtx_insn *insn;
25513 /* Restore stack pointer if necessary. */
25514 if (TARGET_ARM)
25516 /* In ARM mode, frame pointer points to first saved register.
25517 Restore stack pointer to last saved register. */
25518 amount = offsets->frame - offsets->saved_regs;
25520 /* Force out any pending memory operations that reference stacked data
25521 before stack de-allocation occurs. */
25522 emit_insn (gen_blockage ());
25523 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25524 hard_frame_pointer_rtx,
25525 GEN_INT (amount)));
25526 arm_add_cfa_adjust_cfa_note (insn, amount,
25527 stack_pointer_rtx,
25528 hard_frame_pointer_rtx);
25530 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25531 deleted. */
25532 emit_insn (gen_force_register_use (stack_pointer_rtx));
25534 else
25536 /* In Thumb-2 mode, the frame pointer points to the last saved
25537 register. */
25538 amount = offsets->locals_base - offsets->saved_regs;
25539 if (amount)
25541 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
25542 hard_frame_pointer_rtx,
25543 GEN_INT (amount)));
25544 arm_add_cfa_adjust_cfa_note (insn, amount,
25545 hard_frame_pointer_rtx,
25546 hard_frame_pointer_rtx);
25549 /* Force out any pending memory operations that reference stacked data
25550 before stack de-allocation occurs. */
25551 emit_insn (gen_blockage ());
25552 insn = emit_insn (gen_movsi (stack_pointer_rtx,
25553 hard_frame_pointer_rtx));
25554 arm_add_cfa_adjust_cfa_note (insn, 0,
25555 stack_pointer_rtx,
25556 hard_frame_pointer_rtx);
25557 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25558 deleted. */
25559 emit_insn (gen_force_register_use (stack_pointer_rtx));
25562 else
25564 /* Pop off outgoing args and local frame to adjust stack pointer to
25565 last saved register. */
25566 amount = offsets->outgoing_args - offsets->saved_regs;
25567 if (amount)
25569 rtx_insn *tmp;
25570 /* Force out any pending memory operations that reference stacked data
25571 before stack de-allocation occurs. */
25572 emit_insn (gen_blockage ());
25573 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
25574 stack_pointer_rtx,
25575 GEN_INT (amount)));
25576 arm_add_cfa_adjust_cfa_note (tmp, amount,
25577 stack_pointer_rtx, stack_pointer_rtx);
25578 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25579 not deleted. */
25580 emit_insn (gen_force_register_use (stack_pointer_rtx));
25584 if (TARGET_HARD_FLOAT)
25586 /* Generate VFP register multi-pop. */
25587 int end_reg = LAST_VFP_REGNUM + 1;
25589 /* Scan the registers in reverse order. We need to match
25590 any groupings made in the prologue and generate matching
25591 vldm operations. The need to match groups is because,
25592 unlike pop, vldm can only do consecutive regs. */
25593 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
25594 /* Look for a case where a reg does not need restoring. */
25595 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25596 && (!df_regs_ever_live_p (i + 1)
25597 || call_used_regs[i + 1]))
25599 /* Restore the regs discovered so far (from reg+2 to
25600 end_reg). */
25601 if (end_reg > i + 2)
25602 arm_emit_vfp_multi_reg_pop (i + 2,
25603 (end_reg - (i + 2)) / 2,
25604 stack_pointer_rtx);
25605 end_reg = i;
25608 /* Restore the remaining regs that we have discovered (or possibly
25609 even all of them, if the conditional in the for loop never
25610 fired). */
25611 if (end_reg > i + 2)
25612 arm_emit_vfp_multi_reg_pop (i + 2,
25613 (end_reg - (i + 2)) / 2,
25614 stack_pointer_rtx);
25617 if (TARGET_IWMMXT)
25618 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
25619 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25621 rtx_insn *insn;
25622 rtx addr = gen_rtx_MEM (V2SImode,
25623 gen_rtx_POST_INC (SImode,
25624 stack_pointer_rtx));
25625 set_mem_alias_set (addr, get_frame_alias_set ());
25626 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25627 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25628 gen_rtx_REG (V2SImode, i),
25629 NULL_RTX);
25630 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25631 stack_pointer_rtx, stack_pointer_rtx);
25634 if (saved_regs_mask)
25636 rtx insn;
25637 bool return_in_pc = false;
25639 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
25640 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
25641 && !IS_CMSE_ENTRY (func_type)
25642 && !IS_STACKALIGN (func_type)
25643 && really_return
25644 && crtl->args.pretend_args_size == 0
25645 && saved_regs_mask & (1 << LR_REGNUM)
25646 && !crtl->calls_eh_return)
25648 saved_regs_mask &= ~(1 << LR_REGNUM);
25649 saved_regs_mask |= (1 << PC_REGNUM);
25650 return_in_pc = true;
25653 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
25655 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25656 if (saved_regs_mask & (1 << i))
25658 rtx addr = gen_rtx_MEM (SImode,
25659 gen_rtx_POST_INC (SImode,
25660 stack_pointer_rtx));
25661 set_mem_alias_set (addr, get_frame_alias_set ());
25663 if (i == PC_REGNUM)
25665 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25666 XVECEXP (insn, 0, 0) = ret_rtx;
25667 XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
25668 addr);
25669 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
25670 insn = emit_jump_insn (insn);
25672 else
25674 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
25675 addr));
25676 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25677 gen_rtx_REG (SImode, i),
25678 NULL_RTX);
25679 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25680 stack_pointer_rtx,
25681 stack_pointer_rtx);
25685 else
25687 if (TARGET_LDRD
25688 && current_tune->prefer_ldrd_strd
25689 && !optimize_function_for_size_p (cfun))
25691 if (TARGET_THUMB2)
25692 thumb2_emit_ldrd_pop (saved_regs_mask);
25693 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
25694 arm_emit_ldrd_pop (saved_regs_mask);
25695 else
25696 arm_emit_multi_reg_pop (saved_regs_mask);
25698 else
25699 arm_emit_multi_reg_pop (saved_regs_mask);
25702 if (return_in_pc)
25703 return;
25706 amount
25707 = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
25708 if (amount)
25710 int i, j;
25711 rtx dwarf = NULL_RTX;
25712 rtx_insn *tmp =
25713 emit_insn (gen_addsi3 (stack_pointer_rtx,
25714 stack_pointer_rtx,
25715 GEN_INT (amount)));
25717 RTX_FRAME_RELATED_P (tmp) = 1;
25719 if (cfun->machine->uses_anonymous_args)
25721 /* Restore pretend args. Refer arm_expand_prologue on how to save
25722 pretend_args in stack. */
25723 int num_regs = crtl->args.pretend_args_size / 4;
25724 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
25725 for (j = 0, i = 0; j < num_regs; i++)
25726 if (saved_regs_mask & (1 << i))
25728 rtx reg = gen_rtx_REG (SImode, i);
25729 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
25730 j++;
25732 REG_NOTES (tmp) = dwarf;
25734 arm_add_cfa_adjust_cfa_note (tmp, amount,
25735 stack_pointer_rtx, stack_pointer_rtx);
25738 /* Clear all caller-saved regs that are not used to return. */
25739 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25741 /* CMSE_ENTRY always returns. */
25742 gcc_assert (really_return);
25743 cmse_nonsecure_entry_clear_before_return ();
25746 if (!really_return)
25747 return;
25749 if (crtl->calls_eh_return)
25750 emit_insn (gen_addsi3 (stack_pointer_rtx,
25751 stack_pointer_rtx,
25752 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25754 if (IS_STACKALIGN (func_type))
25755 /* Restore the original stack pointer. Before prologue, the stack was
25756 realigned and the original stack pointer saved in r0. For details,
25757 see comment in arm_expand_prologue. */
25758 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25760 emit_jump_insn (simple_return_rtx);
25763 /* Implementation of insn prologue_thumb1_interwork. This is the first
25764 "instruction" of a function called in ARM mode. Swap to thumb mode. */
25766 const char *
25767 thumb1_output_interwork (void)
25769 const char * name;
25770 FILE *f = asm_out_file;
25772 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
25773 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
25774 == SYMBOL_REF);
25775 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25777 /* Generate code sequence to switch us into Thumb mode. */
25778 /* The .code 32 directive has already been emitted by
25779 ASM_DECLARE_FUNCTION_NAME. */
25780 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
25781 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
25783 /* Generate a label, so that the debugger will notice the
25784 change in instruction sets. This label is also used by
25785 the assembler to bypass the ARM code when this function
25786 is called from a Thumb encoded function elsewhere in the
25787 same file. Hence the definition of STUB_NAME here must
25788 agree with the definition in gas/config/tc-arm.c. */
25790 #define STUB_NAME ".real_start_of"
25792 fprintf (f, "\t.code\t16\n");
25793 #ifdef ARM_PE
25794 if (arm_dllexport_name_p (name))
25795 name = arm_strip_name_encoding (name);
25796 #endif
25797 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
25798 fprintf (f, "\t.thumb_func\n");
25799 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
25801 return "";
25804 /* Handle the case of a double word load into a low register from
25805 a computed memory address. The computed address may involve a
25806 register which is overwritten by the load. */
25807 const char *
25808 thumb_load_double_from_address (rtx *operands)
25810 rtx addr;
25811 rtx base;
25812 rtx offset;
25813 rtx arg1;
25814 rtx arg2;
25816 gcc_assert (REG_P (operands[0]));
25817 gcc_assert (MEM_P (operands[1]));
25819 /* Get the memory address. */
25820 addr = XEXP (operands[1], 0);
25822 /* Work out how the memory address is computed. */
25823 switch (GET_CODE (addr))
25825 case REG:
25826 operands[2] = adjust_address (operands[1], SImode, 4);
25828 if (REGNO (operands[0]) == REGNO (addr))
25830 output_asm_insn ("ldr\t%H0, %2", operands);
25831 output_asm_insn ("ldr\t%0, %1", operands);
25833 else
25835 output_asm_insn ("ldr\t%0, %1", operands);
25836 output_asm_insn ("ldr\t%H0, %2", operands);
25838 break;
25840 case CONST:
25841 /* Compute <address> + 4 for the high order load. */
25842 operands[2] = adjust_address (operands[1], SImode, 4);
25844 output_asm_insn ("ldr\t%0, %1", operands);
25845 output_asm_insn ("ldr\t%H0, %2", operands);
25846 break;
25848 case PLUS:
25849 arg1 = XEXP (addr, 0);
25850 arg2 = XEXP (addr, 1);
25852 if (CONSTANT_P (arg1))
25853 base = arg2, offset = arg1;
25854 else
25855 base = arg1, offset = arg2;
25857 gcc_assert (REG_P (base));
25859 /* Catch the case of <address> = <reg> + <reg> */
25860 if (REG_P (offset))
25862 int reg_offset = REGNO (offset);
25863 int reg_base = REGNO (base);
25864 int reg_dest = REGNO (operands[0]);
25866 /* Add the base and offset registers together into the
25867 higher destination register. */
25868 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
25869 reg_dest + 1, reg_base, reg_offset);
25871 /* Load the lower destination register from the address in
25872 the higher destination register. */
25873 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
25874 reg_dest, reg_dest + 1);
25876 /* Load the higher destination register from its own address
25877 plus 4. */
25878 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
25879 reg_dest + 1, reg_dest + 1);
25881 else
25883 /* Compute <address> + 4 for the high order load. */
25884 operands[2] = adjust_address (operands[1], SImode, 4);
25886 /* If the computed address is held in the low order register
25887 then load the high order register first, otherwise always
25888 load the low order register first. */
25889 if (REGNO (operands[0]) == REGNO (base))
25891 output_asm_insn ("ldr\t%H0, %2", operands);
25892 output_asm_insn ("ldr\t%0, %1", operands);
25894 else
25896 output_asm_insn ("ldr\t%0, %1", operands);
25897 output_asm_insn ("ldr\t%H0, %2", operands);
25900 break;
25902 case LABEL_REF:
25903 /* With no registers to worry about we can just load the value
25904 directly. */
25905 operands[2] = adjust_address (operands[1], SImode, 4);
25907 output_asm_insn ("ldr\t%H0, %2", operands);
25908 output_asm_insn ("ldr\t%0, %1", operands);
25909 break;
25911 default:
25912 gcc_unreachable ();
25915 return "";
25918 const char *
25919 thumb_output_move_mem_multiple (int n, rtx *operands)
25921 switch (n)
25923 case 2:
25924 if (REGNO (operands[4]) > REGNO (operands[5]))
25925 std::swap (operands[4], operands[5]);
25927 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
25928 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
25929 break;
25931 case 3:
25932 if (REGNO (operands[4]) > REGNO (operands[5]))
25933 std::swap (operands[4], operands[5]);
25934 if (REGNO (operands[5]) > REGNO (operands[6]))
25935 std::swap (operands[5], operands[6]);
25936 if (REGNO (operands[4]) > REGNO (operands[5]))
25937 std::swap (operands[4], operands[5]);
25939 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
25940 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
25941 break;
25943 default:
25944 gcc_unreachable ();
25947 return "";
25950 /* Output a call-via instruction for thumb state. */
25951 const char *
25952 thumb_call_via_reg (rtx reg)
25954 int regno = REGNO (reg);
25955 rtx *labelp;
25957 gcc_assert (regno < LR_REGNUM);
25959 /* If we are in the normal text section we can use a single instance
25960 per compilation unit. If we are doing function sections, then we need
25961 an entry per section, since we can't rely on reachability. */
25962 if (in_section == text_section)
25964 thumb_call_reg_needed = 1;
25966 if (thumb_call_via_label[regno] == NULL)
25967 thumb_call_via_label[regno] = gen_label_rtx ();
25968 labelp = thumb_call_via_label + regno;
25970 else
25972 if (cfun->machine->call_via[regno] == NULL)
25973 cfun->machine->call_via[regno] = gen_label_rtx ();
25974 labelp = cfun->machine->call_via + regno;
25977 output_asm_insn ("bl\t%a0", labelp);
25978 return "";
25981 /* Routines for generating rtl. */
25982 void
25983 thumb_expand_movmemqi (rtx *operands)
25985 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
25986 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
25987 HOST_WIDE_INT len = INTVAL (operands[2]);
25988 HOST_WIDE_INT offset = 0;
25990 while (len >= 12)
25992 emit_insn (gen_movmem12b (out, in, out, in));
25993 len -= 12;
25996 if (len >= 8)
25998 emit_insn (gen_movmem8b (out, in, out, in));
25999 len -= 8;
26002 if (len >= 4)
26004 rtx reg = gen_reg_rtx (SImode);
26005 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
26006 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
26007 len -= 4;
26008 offset += 4;
26011 if (len >= 2)
26013 rtx reg = gen_reg_rtx (HImode);
26014 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
26015 plus_constant (Pmode, in,
26016 offset))));
26017 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
26018 offset)),
26019 reg));
26020 len -= 2;
26021 offset += 2;
26024 if (len)
26026 rtx reg = gen_reg_rtx (QImode);
26027 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
26028 plus_constant (Pmode, in,
26029 offset))));
26030 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
26031 offset)),
26032 reg));
26036 void
26037 thumb_reload_out_hi (rtx *operands)
26039 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
26042 /* Return the length of a function name prefix
26043 that starts with the character 'c'. */
26044 static int
26045 arm_get_strip_length (int c)
26047 switch (c)
26049 ARM_NAME_ENCODING_LENGTHS
26050 default: return 0;
26054 /* Return a pointer to a function's name with any
26055 and all prefix encodings stripped from it. */
26056 const char *
26057 arm_strip_name_encoding (const char *name)
26059 int skip;
26061 while ((skip = arm_get_strip_length (* name)))
26062 name += skip;
26064 return name;
26067 /* If there is a '*' anywhere in the name's prefix, then
26068 emit the stripped name verbatim, otherwise prepend an
26069 underscore if leading underscores are being used. */
26070 void
26071 arm_asm_output_labelref (FILE *stream, const char *name)
26073 int skip;
26074 int verbatim = 0;
26076 while ((skip = arm_get_strip_length (* name)))
26078 verbatim |= (*name == '*');
26079 name += skip;
26082 if (verbatim)
26083 fputs (name, stream);
26084 else
26085 asm_fprintf (stream, "%U%s", name);
26088 /* This function is used to emit an EABI tag and its associated value.
26089 We emit the numerical value of the tag in case the assembler does not
26090 support textual tags. (Eg gas prior to 2.20). If requested we include
26091 the tag name in a comment so that anyone reading the assembler output
26092 will know which tag is being set.
26094 This function is not static because arm-c.c needs it too. */
26096 void
26097 arm_emit_eabi_attribute (const char *name, int num, int val)
26099 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
26100 if (flag_verbose_asm || flag_debug_asm)
26101 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
26102 asm_fprintf (asm_out_file, "\n");
26105 /* This function is used to print CPU tuning information as comment
26106 in assembler file. Pointers are not printed for now. */
26108 void
26109 arm_print_tune_info (void)
26111 asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune parameters\n");
26112 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "constant_limit:\t%d\n",
26113 current_tune->constant_limit);
26114 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26115 "max_insns_skipped:\t%d\n", current_tune->max_insns_skipped);
26116 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26117 "prefetch.num_slots:\t%d\n", current_tune->prefetch.num_slots);
26118 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26119 "prefetch.l1_cache_size:\t%d\n",
26120 current_tune->prefetch.l1_cache_size);
26121 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26122 "prefetch.l1_cache_line_size:\t%d\n",
26123 current_tune->prefetch.l1_cache_line_size);
26124 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26125 "prefer_constant_pool:\t%d\n",
26126 (int) current_tune->prefer_constant_pool);
26127 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26128 "branch_cost:\t(s:speed, p:predictable)\n");
26129 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\ts&p\tcost\n");
26130 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t00\t%d\n",
26131 current_tune->branch_cost (false, false));
26132 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t01\t%d\n",
26133 current_tune->branch_cost (false, true));
26134 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t10\t%d\n",
26135 current_tune->branch_cost (true, false));
26136 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t11\t%d\n",
26137 current_tune->branch_cost (true, true));
26138 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26139 "prefer_ldrd_strd:\t%d\n",
26140 (int) current_tune->prefer_ldrd_strd);
26141 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26142 "logical_op_non_short_circuit:\t[%d,%d]\n",
26143 (int) current_tune->logical_op_non_short_circuit_thumb,
26144 (int) current_tune->logical_op_non_short_circuit_arm);
26145 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26146 "prefer_neon_for_64bits:\t%d\n",
26147 (int) current_tune->prefer_neon_for_64bits);
26148 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26149 "disparage_flag_setting_t16_encodings:\t%d\n",
26150 (int) current_tune->disparage_flag_setting_t16_encodings);
26151 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26152 "string_ops_prefer_neon:\t%d\n",
26153 (int) current_tune->string_ops_prefer_neon);
26154 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26155 "max_insns_inline_memset:\t%d\n",
26156 current_tune->max_insns_inline_memset);
26157 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "fusible_ops:\t%u\n",
26158 current_tune->fusible_ops);
26159 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "sched_autopref:\t%d\n",
26160 (int) current_tune->sched_autopref);
26163 /* Print .arch and .arch_extension directives corresponding to the
26164 current architecture configuration. */
26165 static void
26166 arm_print_asm_arch_directives ()
26168 const arch_option *arch
26169 = arm_parse_arch_option_name (all_architectures, "-march",
26170 arm_active_target.arch_name);
26171 auto_sbitmap opt_bits (isa_num_bits);
26173 gcc_assert (arch);
26175 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_active_target.arch_name);
26176 if (!arch->common.extensions)
26177 return;
26179 for (const struct cpu_arch_extension *opt = arch->common.extensions;
26180 opt->name != NULL;
26181 opt++)
26183 if (!opt->remove)
26185 arm_initialize_isa (opt_bits, opt->isa_bits);
26187 /* If every feature bit of this option is set in the target
26188 ISA specification, print out the option name. However,
26189 don't print anything if all the bits are part of the
26190 FPU specification. */
26191 if (bitmap_subset_p (opt_bits, arm_active_target.isa)
26192 && !bitmap_subset_p (opt_bits, isa_all_fpubits))
26193 asm_fprintf (asm_out_file, "\t.arch_extension %s\n", opt->name);
26198 static void
26199 arm_file_start (void)
26201 int val;
26203 if (TARGET_BPABI)
26205 /* We don't have a specified CPU. Use the architecture to
26206 generate the tags.
26208 Note: it might be better to do this unconditionally, then the
26209 assembler would not need to know about all new CPU names as
26210 they are added. */
26211 if (!arm_active_target.core_name)
26213 /* armv7ve doesn't support any extensions. */
26214 if (strcmp (arm_active_target.arch_name, "armv7ve") == 0)
26216 /* Keep backward compatability for assemblers
26217 which don't support armv7ve. */
26218 asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
26219 asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
26220 asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
26221 asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
26222 asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
26224 else
26225 arm_print_asm_arch_directives ();
26227 else if (strncmp (arm_active_target.core_name, "generic", 7) == 0)
26228 asm_fprintf (asm_out_file, "\t.arch %s\n",
26229 arm_active_target.core_name + 8);
26230 else
26232 const char* truncated_name
26233 = arm_rewrite_selected_cpu (arm_active_target.core_name);
26234 asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
26237 if (print_tune_info)
26238 arm_print_tune_info ();
26240 if (! TARGET_SOFT_FLOAT)
26242 if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
26243 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
26245 if (TARGET_HARD_FLOAT_ABI)
26246 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
26249 /* Some of these attributes only apply when the corresponding features
26250 are used. However we don't have any easy way of figuring this out.
26251 Conservatively record the setting that would have been used. */
26253 if (flag_rounding_math)
26254 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
26256 if (!flag_unsafe_math_optimizations)
26258 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
26259 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
26261 if (flag_signaling_nans)
26262 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
26264 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
26265 flag_finite_math_only ? 1 : 3);
26267 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
26268 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
26269 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
26270 flag_short_enums ? 1 : 2);
26272 /* Tag_ABI_optimization_goals. */
26273 if (optimize_size)
26274 val = 4;
26275 else if (optimize >= 2)
26276 val = 2;
26277 else if (optimize)
26278 val = 1;
26279 else
26280 val = 6;
26281 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
26283 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
26284 unaligned_access);
26286 if (arm_fp16_format)
26287 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
26288 (int) arm_fp16_format);
26290 if (arm_lang_output_object_attributes_hook)
26291 arm_lang_output_object_attributes_hook();
26294 default_file_start ();
26297 static void
26298 arm_file_end (void)
26300 int regno;
26302 if (NEED_INDICATE_EXEC_STACK)
26303 /* Add .note.GNU-stack. */
26304 file_end_indicate_exec_stack ();
26306 if (! thumb_call_reg_needed)
26307 return;
26309 switch_to_section (text_section);
26310 asm_fprintf (asm_out_file, "\t.code 16\n");
26311 ASM_OUTPUT_ALIGN (asm_out_file, 1);
26313 for (regno = 0; regno < LR_REGNUM; regno++)
26315 rtx label = thumb_call_via_label[regno];
26317 if (label != 0)
26319 targetm.asm_out.internal_label (asm_out_file, "L",
26320 CODE_LABEL_NUMBER (label));
26321 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
26326 #ifndef ARM_PE
26327 /* Symbols in the text segment can be accessed without indirecting via the
26328 constant pool; it may take an extra binary operation, but this is still
26329 faster than indirecting via memory. Don't do this when not optimizing,
26330 since we won't be calculating al of the offsets necessary to do this
26331 simplification. */
26333 static void
26334 arm_encode_section_info (tree decl, rtx rtl, int first)
26336 if (optimize > 0 && TREE_CONSTANT (decl))
26337 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
26339 default_encode_section_info (decl, rtl, first);
26341 #endif /* !ARM_PE */
26343 static void
26344 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
26346 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
26347 && !strcmp (prefix, "L"))
26349 arm_ccfsm_state = 0;
26350 arm_target_insn = NULL;
26352 default_internal_label (stream, prefix, labelno);
26355 /* Output code to add DELTA to the first argument, and then jump
26356 to FUNCTION. Used for C++ multiple inheritance. */
26358 static void
26359 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26360 HOST_WIDE_INT, tree function)
26362 static int thunk_label = 0;
26363 char label[256];
26364 char labelpc[256];
26365 int mi_delta = delta;
26366 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
26367 int shift = 0;
26368 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
26369 ? 1 : 0);
26370 if (mi_delta < 0)
26371 mi_delta = - mi_delta;
26373 final_start_function (emit_barrier (), file, 1);
26375 if (TARGET_THUMB1)
26377 int labelno = thunk_label++;
26378 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
26379 /* Thunks are entered in arm mode when available. */
26380 if (TARGET_THUMB1_ONLY)
26382 /* push r3 so we can use it as a temporary. */
26383 /* TODO: Omit this save if r3 is not used. */
26384 fputs ("\tpush {r3}\n", file);
26385 fputs ("\tldr\tr3, ", file);
26387 else
26389 fputs ("\tldr\tr12, ", file);
26391 assemble_name (file, label);
26392 fputc ('\n', file);
26393 if (flag_pic)
26395 /* If we are generating PIC, the ldr instruction below loads
26396 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
26397 the address of the add + 8, so we have:
26399 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
26400 = target + 1.
26402 Note that we have "+ 1" because some versions of GNU ld
26403 don't set the low bit of the result for R_ARM_REL32
26404 relocations against thumb function symbols.
26405 On ARMv6M this is +4, not +8. */
26406 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
26407 assemble_name (file, labelpc);
26408 fputs (":\n", file);
26409 if (TARGET_THUMB1_ONLY)
26411 /* This is 2 insns after the start of the thunk, so we know it
26412 is 4-byte aligned. */
26413 fputs ("\tadd\tr3, pc, r3\n", file);
26414 fputs ("\tmov r12, r3\n", file);
26416 else
26417 fputs ("\tadd\tr12, pc, r12\n", file);
26419 else if (TARGET_THUMB1_ONLY)
26420 fputs ("\tmov r12, r3\n", file);
26422 if (TARGET_THUMB1_ONLY)
26424 if (mi_delta > 255)
26426 fputs ("\tldr\tr3, ", file);
26427 assemble_name (file, label);
26428 fputs ("+4\n", file);
26429 asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
26430 mi_op, this_regno, this_regno);
26432 else if (mi_delta != 0)
26434 /* Thumb1 unified syntax requires s suffix in instruction name when
26435 one of the operands is immediate. */
26436 asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
26437 mi_op, this_regno, this_regno,
26438 mi_delta);
26441 else
26443 /* TODO: Use movw/movt for large constants when available. */
26444 while (mi_delta != 0)
26446 if ((mi_delta & (3 << shift)) == 0)
26447 shift += 2;
26448 else
26450 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
26451 mi_op, this_regno, this_regno,
26452 mi_delta & (0xff << shift));
26453 mi_delta &= ~(0xff << shift);
26454 shift += 8;
26458 if (TARGET_THUMB1)
26460 if (TARGET_THUMB1_ONLY)
26461 fputs ("\tpop\t{r3}\n", file);
26463 fprintf (file, "\tbx\tr12\n");
26464 ASM_OUTPUT_ALIGN (file, 2);
26465 assemble_name (file, label);
26466 fputs (":\n", file);
26467 if (flag_pic)
26469 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
26470 rtx tem = XEXP (DECL_RTL (function), 0);
26471 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
26472 pipeline offset is four rather than eight. Adjust the offset
26473 accordingly. */
26474 tem = plus_constant (GET_MODE (tem), tem,
26475 TARGET_THUMB1_ONLY ? -3 : -7);
26476 tem = gen_rtx_MINUS (GET_MODE (tem),
26477 tem,
26478 gen_rtx_SYMBOL_REF (Pmode,
26479 ggc_strdup (labelpc)));
26480 assemble_integer (tem, 4, BITS_PER_WORD, 1);
26482 else
26483 /* Output ".word .LTHUNKn". */
26484 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
26486 if (TARGET_THUMB1_ONLY && mi_delta > 255)
26487 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
26489 else
26491 fputs ("\tb\t", file);
26492 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
26493 if (NEED_PLT_RELOC)
26494 fputs ("(PLT)", file);
26495 fputc ('\n', file);
26498 final_end_function ();
26501 /* MI thunk handling for TARGET_32BIT. */
26503 static void
26504 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26505 HOST_WIDE_INT vcall_offset, tree function)
26507 /* On ARM, this_regno is R0 or R1 depending on
26508 whether the function returns an aggregate or not.
26510 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
26511 function)
26512 ? R1_REGNUM : R0_REGNUM);
26514 rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
26515 rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
26516 reload_completed = 1;
26517 emit_note (NOTE_INSN_PROLOGUE_END);
26519 /* Add DELTA to THIS_RTX. */
26520 if (delta != 0)
26521 arm_split_constant (PLUS, Pmode, NULL_RTX,
26522 delta, this_rtx, this_rtx, false);
26524 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
26525 if (vcall_offset != 0)
26527 /* Load *THIS_RTX. */
26528 emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
26529 /* Compute *THIS_RTX + VCALL_OFFSET. */
26530 arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
26531 false);
26532 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
26533 emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
26534 emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
26537 /* Generate a tail call to the target function. */
26538 if (!TREE_USED (function))
26540 assemble_external (function);
26541 TREE_USED (function) = 1;
26543 rtx funexp = XEXP (DECL_RTL (function), 0);
26544 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
26545 rtx_insn * insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
26546 SIBLING_CALL_P (insn) = 1;
26548 insn = get_insns ();
26549 shorten_branches (insn);
26550 final_start_function (insn, file, 1);
26551 final (insn, file, 1);
26552 final_end_function ();
26554 /* Stop pretending this is a post-reload pass. */
26555 reload_completed = 0;
26558 /* Output code to add DELTA to the first argument, and then jump
26559 to FUNCTION. Used for C++ multiple inheritance. */
26561 static void
26562 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
26563 HOST_WIDE_INT vcall_offset, tree function)
26565 if (TARGET_32BIT)
26566 arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
26567 else
26568 arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
26572 arm_emit_vector_const (FILE *file, rtx x)
26574 int i;
26575 const char * pattern;
26577 gcc_assert (GET_CODE (x) == CONST_VECTOR);
26579 switch (GET_MODE (x))
26581 case E_V2SImode: pattern = "%08x"; break;
26582 case E_V4HImode: pattern = "%04x"; break;
26583 case E_V8QImode: pattern = "%02x"; break;
26584 default: gcc_unreachable ();
26587 fprintf (file, "0x");
26588 for (i = CONST_VECTOR_NUNITS (x); i--;)
26590 rtx element;
26592 element = CONST_VECTOR_ELT (x, i);
26593 fprintf (file, pattern, INTVAL (element));
26596 return 1;
26599 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
26600 HFmode constant pool entries are actually loaded with ldr. */
26601 void
26602 arm_emit_fp16_const (rtx c)
26604 long bits;
26606 bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
26607 if (WORDS_BIG_ENDIAN)
26608 assemble_zeros (2);
26609 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
26610 if (!WORDS_BIG_ENDIAN)
26611 assemble_zeros (2);
26614 const char *
26615 arm_output_load_gr (rtx *operands)
26617 rtx reg;
26618 rtx offset;
26619 rtx wcgr;
26620 rtx sum;
26622 if (!MEM_P (operands [1])
26623 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
26624 || !REG_P (reg = XEXP (sum, 0))
26625 || !CONST_INT_P (offset = XEXP (sum, 1))
26626 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
26627 return "wldrw%?\t%0, %1";
26629 /* Fix up an out-of-range load of a GR register. */
26630 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
26631 wcgr = operands[0];
26632 operands[0] = reg;
26633 output_asm_insn ("ldr%?\t%0, %1", operands);
26635 operands[0] = wcgr;
26636 operands[1] = reg;
26637 output_asm_insn ("tmcr%?\t%0, %1", operands);
26638 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
26640 return "";
26643 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
26645 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
26646 named arg and all anonymous args onto the stack.
26647 XXX I know the prologue shouldn't be pushing registers, but it is faster
26648 that way. */
26650 static void
26651 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
26652 machine_mode mode,
26653 tree type,
26654 int *pretend_size,
26655 int second_time ATTRIBUTE_UNUSED)
26657 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
26658 int nregs;
26660 cfun->machine->uses_anonymous_args = 1;
26661 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
26663 nregs = pcum->aapcs_ncrn;
26664 if (nregs & 1)
26666 int res = arm_needs_doubleword_align (mode, type);
26667 if (res < 0 && warn_psabi)
26668 inform (input_location, "parameter passing for argument of "
26669 "type %qT changed in GCC 7.1", type);
26670 else if (res > 0)
26671 nregs++;
26674 else
26675 nregs = pcum->nregs;
26677 if (nregs < NUM_ARG_REGS)
26678 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
26681 /* We can't rely on the caller doing the proper promotion when
26682 using APCS or ATPCS. */
26684 static bool
26685 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
26687 return !TARGET_AAPCS_BASED;
26690 static machine_mode
26691 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
26692 machine_mode mode,
26693 int *punsignedp ATTRIBUTE_UNUSED,
26694 const_tree fntype ATTRIBUTE_UNUSED,
26695 int for_return ATTRIBUTE_UNUSED)
26697 if (GET_MODE_CLASS (mode) == MODE_INT
26698 && GET_MODE_SIZE (mode) < 4)
26699 return SImode;
26701 return mode;
26705 static bool
26706 arm_default_short_enums (void)
26708 return ARM_DEFAULT_SHORT_ENUMS;
26712 /* AAPCS requires that anonymous bitfields affect structure alignment. */
26714 static bool
26715 arm_align_anon_bitfield (void)
26717 return TARGET_AAPCS_BASED;
26721 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
26723 static tree
26724 arm_cxx_guard_type (void)
26726 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
26730 /* The EABI says test the least significant bit of a guard variable. */
26732 static bool
26733 arm_cxx_guard_mask_bit (void)
26735 return TARGET_AAPCS_BASED;
26739 /* The EABI specifies that all array cookies are 8 bytes long. */
26741 static tree
26742 arm_get_cookie_size (tree type)
26744 tree size;
26746 if (!TARGET_AAPCS_BASED)
26747 return default_cxx_get_cookie_size (type);
26749 size = build_int_cst (sizetype, 8);
26750 return size;
26754 /* The EABI says that array cookies should also contain the element size. */
26756 static bool
26757 arm_cookie_has_size (void)
26759 return TARGET_AAPCS_BASED;
26763 /* The EABI says constructors and destructors should return a pointer to
26764 the object constructed/destroyed. */
26766 static bool
26767 arm_cxx_cdtor_returns_this (void)
26769 return TARGET_AAPCS_BASED;
26772 /* The EABI says that an inline function may never be the key
26773 method. */
26775 static bool
26776 arm_cxx_key_method_may_be_inline (void)
26778 return !TARGET_AAPCS_BASED;
26781 static void
26782 arm_cxx_determine_class_data_visibility (tree decl)
26784 if (!TARGET_AAPCS_BASED
26785 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
26786 return;
26788 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
26789 is exported. However, on systems without dynamic vague linkage,
26790 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
26791 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
26792 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
26793 else
26794 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
26795 DECL_VISIBILITY_SPECIFIED (decl) = 1;
26798 static bool
26799 arm_cxx_class_data_always_comdat (void)
26801 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26802 vague linkage if the class has no key function. */
26803 return !TARGET_AAPCS_BASED;
26807 /* The EABI says __aeabi_atexit should be used to register static
26808 destructors. */
26810 static bool
26811 arm_cxx_use_aeabi_atexit (void)
26813 return TARGET_AAPCS_BASED;
26817 void
26818 arm_set_return_address (rtx source, rtx scratch)
26820 arm_stack_offsets *offsets;
26821 HOST_WIDE_INT delta;
26822 rtx addr;
26823 unsigned long saved_regs;
26825 offsets = arm_get_frame_offsets ();
26826 saved_regs = offsets->saved_regs_mask;
26828 if ((saved_regs & (1 << LR_REGNUM)) == 0)
26829 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26830 else
26832 if (frame_pointer_needed)
26833 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
26834 else
26836 /* LR will be the first saved register. */
26837 delta = offsets->outgoing_args - (offsets->frame + 4);
26840 if (delta >= 4096)
26842 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
26843 GEN_INT (delta & ~4095)));
26844 addr = scratch;
26845 delta &= 4095;
26847 else
26848 addr = stack_pointer_rtx;
26850 addr = plus_constant (Pmode, addr, delta);
26852 /* The store needs to be marked as frame related in order to prevent
26853 DSE from deleting it as dead if it is based on fp. */
26854 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26855 RTX_FRAME_RELATED_P (insn) = 1;
26856 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26861 void
26862 thumb_set_return_address (rtx source, rtx scratch)
26864 arm_stack_offsets *offsets;
26865 HOST_WIDE_INT delta;
26866 HOST_WIDE_INT limit;
26867 int reg;
26868 rtx addr;
26869 unsigned long mask;
26871 emit_use (source);
26873 offsets = arm_get_frame_offsets ();
26874 mask = offsets->saved_regs_mask;
26875 if (mask & (1 << LR_REGNUM))
26877 limit = 1024;
26878 /* Find the saved regs. */
26879 if (frame_pointer_needed)
26881 delta = offsets->soft_frame - offsets->saved_args;
26882 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
26883 if (TARGET_THUMB1)
26884 limit = 128;
26886 else
26888 delta = offsets->outgoing_args - offsets->saved_args;
26889 reg = SP_REGNUM;
26891 /* Allow for the stack frame. */
26892 if (TARGET_THUMB1 && TARGET_BACKTRACE)
26893 delta -= 16;
26894 /* The link register is always the first saved register. */
26895 delta -= 4;
26897 /* Construct the address. */
26898 addr = gen_rtx_REG (SImode, reg);
26899 if (delta > limit)
26901 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
26902 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
26903 addr = scratch;
26905 else
26906 addr = plus_constant (Pmode, addr, delta);
26908 /* The store needs to be marked as frame related in order to prevent
26909 DSE from deleting it as dead if it is based on fp. */
26910 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26911 RTX_FRAME_RELATED_P (insn) = 1;
26912 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26914 else
26915 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26918 /* Implements target hook vector_mode_supported_p. */
26919 bool
26920 arm_vector_mode_supported_p (machine_mode mode)
26922 /* Neon also supports V2SImode, etc. listed in the clause below. */
26923 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
26924 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
26925 || mode == V2DImode || mode == V8HFmode))
26926 return true;
26928 if ((TARGET_NEON || TARGET_IWMMXT)
26929 && ((mode == V2SImode)
26930 || (mode == V4HImode)
26931 || (mode == V8QImode)))
26932 return true;
26934 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
26935 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
26936 || mode == V2HAmode))
26937 return true;
26939 return false;
26942 /* Implements target hook array_mode_supported_p. */
26944 static bool
26945 arm_array_mode_supported_p (machine_mode mode,
26946 unsigned HOST_WIDE_INT nelems)
26948 if (TARGET_NEON
26949 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
26950 && (nelems >= 2 && nelems <= 4))
26951 return true;
26953 return false;
26956 /* Use the option -mvectorize-with-neon-double to override the use of quardword
26957 registers when autovectorizing for Neon, at least until multiple vector
26958 widths are supported properly by the middle-end. */
26960 static machine_mode
26961 arm_preferred_simd_mode (scalar_mode mode)
26963 if (TARGET_NEON)
26964 switch (mode)
26966 case E_SFmode:
26967 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
26968 case E_SImode:
26969 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
26970 case E_HImode:
26971 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
26972 case E_QImode:
26973 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
26974 case E_DImode:
26975 if (!TARGET_NEON_VECTORIZE_DOUBLE)
26976 return V2DImode;
26977 break;
26979 default:;
26982 if (TARGET_REALLY_IWMMXT)
26983 switch (mode)
26985 case E_SImode:
26986 return V2SImode;
26987 case E_HImode:
26988 return V4HImode;
26989 case E_QImode:
26990 return V8QImode;
26992 default:;
26995 return word_mode;
26998 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
27000 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
27001 using r0-r4 for function arguments, r7 for the stack frame and don't have
27002 enough left over to do doubleword arithmetic. For Thumb-2 all the
27003 potentially problematic instructions accept high registers so this is not
27004 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
27005 that require many low registers. */
27006 static bool
27007 arm_class_likely_spilled_p (reg_class_t rclass)
27009 if ((TARGET_THUMB1 && rclass == LO_REGS)
27010 || rclass == CC_REG)
27011 return true;
27013 return false;
27016 /* Implements target hook small_register_classes_for_mode_p. */
27017 bool
27018 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
27020 return TARGET_THUMB1;
27023 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
27024 ARM insns and therefore guarantee that the shift count is modulo 256.
27025 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
27026 guarantee no particular behavior for out-of-range counts. */
27028 static unsigned HOST_WIDE_INT
27029 arm_shift_truncation_mask (machine_mode mode)
27031 return mode == SImode ? 255 : 0;
27035 /* Map internal gcc register numbers to DWARF2 register numbers. */
27037 unsigned int
27038 arm_dbx_register_number (unsigned int regno)
27040 if (regno < 16)
27041 return regno;
27043 if (IS_VFP_REGNUM (regno))
27045 /* See comment in arm_dwarf_register_span. */
27046 if (VFP_REGNO_OK_FOR_SINGLE (regno))
27047 return 64 + regno - FIRST_VFP_REGNUM;
27048 else
27049 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
27052 if (IS_IWMMXT_GR_REGNUM (regno))
27053 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
27055 if (IS_IWMMXT_REGNUM (regno))
27056 return 112 + regno - FIRST_IWMMXT_REGNUM;
27058 return DWARF_FRAME_REGISTERS;
27061 /* Dwarf models VFPv3 registers as 32 64-bit registers.
27062 GCC models tham as 64 32-bit registers, so we need to describe this to
27063 the DWARF generation code. Other registers can use the default. */
27064 static rtx
27065 arm_dwarf_register_span (rtx rtl)
27067 machine_mode mode;
27068 unsigned regno;
27069 rtx parts[16];
27070 int nregs;
27071 int i;
27073 regno = REGNO (rtl);
27074 if (!IS_VFP_REGNUM (regno))
27075 return NULL_RTX;
27077 /* XXX FIXME: The EABI defines two VFP register ranges:
27078 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
27079 256-287: D0-D31
27080 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
27081 corresponding D register. Until GDB supports this, we shall use the
27082 legacy encodings. We also use these encodings for D0-D15 for
27083 compatibility with older debuggers. */
27084 mode = GET_MODE (rtl);
27085 if (GET_MODE_SIZE (mode) < 8)
27086 return NULL_RTX;
27088 if (VFP_REGNO_OK_FOR_SINGLE (regno))
27090 nregs = GET_MODE_SIZE (mode) / 4;
27091 for (i = 0; i < nregs; i += 2)
27092 if (TARGET_BIG_END)
27094 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
27095 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
27097 else
27099 parts[i] = gen_rtx_REG (SImode, regno + i);
27100 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
27103 else
27105 nregs = GET_MODE_SIZE (mode) / 8;
27106 for (i = 0; i < nregs; i++)
27107 parts[i] = gen_rtx_REG (DImode, regno + i);
27110 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
27113 #if ARM_UNWIND_INFO
27114 /* Emit unwind directives for a store-multiple instruction or stack pointer
27115 push during alignment.
27116 These should only ever be generated by the function prologue code, so
27117 expect them to have a particular form.
27118 The store-multiple instruction sometimes pushes pc as the last register,
27119 although it should not be tracked into unwind information, or for -Os
27120 sometimes pushes some dummy registers before first register that needs
27121 to be tracked in unwind information; such dummy registers are there just
27122 to avoid separate stack adjustment, and will not be restored in the
27123 epilogue. */
27125 static void
27126 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
27128 int i;
27129 HOST_WIDE_INT offset;
27130 HOST_WIDE_INT nregs;
27131 int reg_size;
27132 unsigned reg;
27133 unsigned lastreg;
27134 unsigned padfirst = 0, padlast = 0;
27135 rtx e;
27137 e = XVECEXP (p, 0, 0);
27138 gcc_assert (GET_CODE (e) == SET);
27140 /* First insn will adjust the stack pointer. */
27141 gcc_assert (GET_CODE (e) == SET
27142 && REG_P (SET_DEST (e))
27143 && REGNO (SET_DEST (e)) == SP_REGNUM
27144 && GET_CODE (SET_SRC (e)) == PLUS);
27146 offset = -INTVAL (XEXP (SET_SRC (e), 1));
27147 nregs = XVECLEN (p, 0) - 1;
27148 gcc_assert (nregs);
27150 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
27151 if (reg < 16)
27153 /* For -Os dummy registers can be pushed at the beginning to
27154 avoid separate stack pointer adjustment. */
27155 e = XVECEXP (p, 0, 1);
27156 e = XEXP (SET_DEST (e), 0);
27157 if (GET_CODE (e) == PLUS)
27158 padfirst = INTVAL (XEXP (e, 1));
27159 gcc_assert (padfirst == 0 || optimize_size);
27160 /* The function prologue may also push pc, but not annotate it as it is
27161 never restored. We turn this into a stack pointer adjustment. */
27162 e = XVECEXP (p, 0, nregs);
27163 e = XEXP (SET_DEST (e), 0);
27164 if (GET_CODE (e) == PLUS)
27165 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
27166 else
27167 padlast = offset - 4;
27168 gcc_assert (padlast == 0 || padlast == 4);
27169 if (padlast == 4)
27170 fprintf (asm_out_file, "\t.pad #4\n");
27171 reg_size = 4;
27172 fprintf (asm_out_file, "\t.save {");
27174 else if (IS_VFP_REGNUM (reg))
27176 reg_size = 8;
27177 fprintf (asm_out_file, "\t.vsave {");
27179 else
27180 /* Unknown register type. */
27181 gcc_unreachable ();
27183 /* If the stack increment doesn't match the size of the saved registers,
27184 something has gone horribly wrong. */
27185 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
27187 offset = padfirst;
27188 lastreg = 0;
27189 /* The remaining insns will describe the stores. */
27190 for (i = 1; i <= nregs; i++)
27192 /* Expect (set (mem <addr>) (reg)).
27193 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
27194 e = XVECEXP (p, 0, i);
27195 gcc_assert (GET_CODE (e) == SET
27196 && MEM_P (SET_DEST (e))
27197 && REG_P (SET_SRC (e)));
27199 reg = REGNO (SET_SRC (e));
27200 gcc_assert (reg >= lastreg);
27202 if (i != 1)
27203 fprintf (asm_out_file, ", ");
27204 /* We can't use %r for vfp because we need to use the
27205 double precision register names. */
27206 if (IS_VFP_REGNUM (reg))
27207 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
27208 else
27209 asm_fprintf (asm_out_file, "%r", reg);
27211 if (flag_checking)
27213 /* Check that the addresses are consecutive. */
27214 e = XEXP (SET_DEST (e), 0);
27215 if (GET_CODE (e) == PLUS)
27216 gcc_assert (REG_P (XEXP (e, 0))
27217 && REGNO (XEXP (e, 0)) == SP_REGNUM
27218 && CONST_INT_P (XEXP (e, 1))
27219 && offset == INTVAL (XEXP (e, 1)));
27220 else
27221 gcc_assert (i == 1
27222 && REG_P (e)
27223 && REGNO (e) == SP_REGNUM);
27224 offset += reg_size;
27227 fprintf (asm_out_file, "}\n");
27228 if (padfirst)
27229 fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
27232 /* Emit unwind directives for a SET. */
27234 static void
27235 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
27237 rtx e0;
27238 rtx e1;
27239 unsigned reg;
27241 e0 = XEXP (p, 0);
27242 e1 = XEXP (p, 1);
27243 switch (GET_CODE (e0))
27245 case MEM:
27246 /* Pushing a single register. */
27247 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
27248 || !REG_P (XEXP (XEXP (e0, 0), 0))
27249 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
27250 abort ();
27252 asm_fprintf (asm_out_file, "\t.save ");
27253 if (IS_VFP_REGNUM (REGNO (e1)))
27254 asm_fprintf(asm_out_file, "{d%d}\n",
27255 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
27256 else
27257 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
27258 break;
27260 case REG:
27261 if (REGNO (e0) == SP_REGNUM)
27263 /* A stack increment. */
27264 if (GET_CODE (e1) != PLUS
27265 || !REG_P (XEXP (e1, 0))
27266 || REGNO (XEXP (e1, 0)) != SP_REGNUM
27267 || !CONST_INT_P (XEXP (e1, 1)))
27268 abort ();
27270 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
27271 -INTVAL (XEXP (e1, 1)));
27273 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
27275 HOST_WIDE_INT offset;
27277 if (GET_CODE (e1) == PLUS)
27279 if (!REG_P (XEXP (e1, 0))
27280 || !CONST_INT_P (XEXP (e1, 1)))
27281 abort ();
27282 reg = REGNO (XEXP (e1, 0));
27283 offset = INTVAL (XEXP (e1, 1));
27284 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
27285 HARD_FRAME_POINTER_REGNUM, reg,
27286 offset);
27288 else if (REG_P (e1))
27290 reg = REGNO (e1);
27291 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
27292 HARD_FRAME_POINTER_REGNUM, reg);
27294 else
27295 abort ();
27297 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
27299 /* Move from sp to reg. */
27300 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
27302 else if (GET_CODE (e1) == PLUS
27303 && REG_P (XEXP (e1, 0))
27304 && REGNO (XEXP (e1, 0)) == SP_REGNUM
27305 && CONST_INT_P (XEXP (e1, 1)))
27307 /* Set reg to offset from sp. */
27308 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
27309 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
27311 else
27312 abort ();
27313 break;
27315 default:
27316 abort ();
27321 /* Emit unwind directives for the given insn. */
27323 static void
27324 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
27326 rtx note, pat;
27327 bool handled_one = false;
27329 if (arm_except_unwind_info (&global_options) != UI_TARGET)
27330 return;
27332 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27333 && (TREE_NOTHROW (current_function_decl)
27334 || crtl->all_throwers_are_sibcalls))
27335 return;
27337 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
27338 return;
27340 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
27342 switch (REG_NOTE_KIND (note))
27344 case REG_FRAME_RELATED_EXPR:
27345 pat = XEXP (note, 0);
27346 goto found;
27348 case REG_CFA_REGISTER:
27349 pat = XEXP (note, 0);
27350 if (pat == NULL)
27352 pat = PATTERN (insn);
27353 if (GET_CODE (pat) == PARALLEL)
27354 pat = XVECEXP (pat, 0, 0);
27357 /* Only emitted for IS_STACKALIGN re-alignment. */
27359 rtx dest, src;
27360 unsigned reg;
27362 src = SET_SRC (pat);
27363 dest = SET_DEST (pat);
27365 gcc_assert (src == stack_pointer_rtx);
27366 reg = REGNO (dest);
27367 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
27368 reg + 0x90, reg);
27370 handled_one = true;
27371 break;
27373 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
27374 to get correct dwarf information for shrink-wrap. We should not
27375 emit unwind information for it because these are used either for
27376 pretend arguments or notes to adjust sp and restore registers from
27377 stack. */
27378 case REG_CFA_DEF_CFA:
27379 case REG_CFA_ADJUST_CFA:
27380 case REG_CFA_RESTORE:
27381 return;
27383 case REG_CFA_EXPRESSION:
27384 case REG_CFA_OFFSET:
27385 /* ??? Only handling here what we actually emit. */
27386 gcc_unreachable ();
27388 default:
27389 break;
27392 if (handled_one)
27393 return;
27394 pat = PATTERN (insn);
27395 found:
27397 switch (GET_CODE (pat))
27399 case SET:
27400 arm_unwind_emit_set (asm_out_file, pat);
27401 break;
27403 case SEQUENCE:
27404 /* Store multiple. */
27405 arm_unwind_emit_sequence (asm_out_file, pat);
27406 break;
27408 default:
27409 abort();
27414 /* Output a reference from a function exception table to the type_info
27415 object X. The EABI specifies that the symbol should be relocated by
27416 an R_ARM_TARGET2 relocation. */
27418 static bool
27419 arm_output_ttype (rtx x)
27421 fputs ("\t.word\t", asm_out_file);
27422 output_addr_const (asm_out_file, x);
27423 /* Use special relocations for symbol references. */
27424 if (!CONST_INT_P (x))
27425 fputs ("(TARGET2)", asm_out_file);
27426 fputc ('\n', asm_out_file);
27428 return TRUE;
27431 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
27433 static void
27434 arm_asm_emit_except_personality (rtx personality)
27436 fputs ("\t.personality\t", asm_out_file);
27437 output_addr_const (asm_out_file, personality);
27438 fputc ('\n', asm_out_file);
27440 #endif /* ARM_UNWIND_INFO */
27442 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
27444 static void
27445 arm_asm_init_sections (void)
27447 #if ARM_UNWIND_INFO
27448 exception_section = get_unnamed_section (0, output_section_asm_op,
27449 "\t.handlerdata");
27450 #endif /* ARM_UNWIND_INFO */
27452 #ifdef OBJECT_FORMAT_ELF
27453 if (target_pure_code)
27454 text_section->unnamed.data = "\t.section .text,\"0x20000006\",%progbits";
27455 #endif
27458 /* Output unwind directives for the start/end of a function. */
27460 void
27461 arm_output_fn_unwind (FILE * f, bool prologue)
27463 if (arm_except_unwind_info (&global_options) != UI_TARGET)
27464 return;
27466 if (prologue)
27467 fputs ("\t.fnstart\n", f);
27468 else
27470 /* If this function will never be unwound, then mark it as such.
27471 The came condition is used in arm_unwind_emit to suppress
27472 the frame annotations. */
27473 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27474 && (TREE_NOTHROW (current_function_decl)
27475 || crtl->all_throwers_are_sibcalls))
27476 fputs("\t.cantunwind\n", f);
27478 fputs ("\t.fnend\n", f);
27482 static bool
27483 arm_emit_tls_decoration (FILE *fp, rtx x)
27485 enum tls_reloc reloc;
27486 rtx val;
27488 val = XVECEXP (x, 0, 0);
27489 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
27491 output_addr_const (fp, val);
27493 switch (reloc)
27495 case TLS_GD32:
27496 fputs ("(tlsgd)", fp);
27497 break;
27498 case TLS_LDM32:
27499 fputs ("(tlsldm)", fp);
27500 break;
27501 case TLS_LDO32:
27502 fputs ("(tlsldo)", fp);
27503 break;
27504 case TLS_IE32:
27505 fputs ("(gottpoff)", fp);
27506 break;
27507 case TLS_LE32:
27508 fputs ("(tpoff)", fp);
27509 break;
27510 case TLS_DESCSEQ:
27511 fputs ("(tlsdesc)", fp);
27512 break;
27513 default:
27514 gcc_unreachable ();
27517 switch (reloc)
27519 case TLS_GD32:
27520 case TLS_LDM32:
27521 case TLS_IE32:
27522 case TLS_DESCSEQ:
27523 fputs (" + (. - ", fp);
27524 output_addr_const (fp, XVECEXP (x, 0, 2));
27525 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
27526 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
27527 output_addr_const (fp, XVECEXP (x, 0, 3));
27528 fputc (')', fp);
27529 break;
27530 default:
27531 break;
27534 return TRUE;
27537 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
27539 static void
27540 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
27542 gcc_assert (size == 4);
27543 fputs ("\t.word\t", file);
27544 output_addr_const (file, x);
27545 fputs ("(tlsldo)", file);
27548 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
27550 static bool
27551 arm_output_addr_const_extra (FILE *fp, rtx x)
27553 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
27554 return arm_emit_tls_decoration (fp, x);
27555 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
27557 char label[256];
27558 int labelno = INTVAL (XVECEXP (x, 0, 0));
27560 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
27561 assemble_name_raw (fp, label);
27563 return TRUE;
27565 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
27567 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
27568 if (GOT_PCREL)
27569 fputs ("+.", fp);
27570 fputs ("-(", fp);
27571 output_addr_const (fp, XVECEXP (x, 0, 0));
27572 fputc (')', fp);
27573 return TRUE;
27575 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
27577 output_addr_const (fp, XVECEXP (x, 0, 0));
27578 if (GOT_PCREL)
27579 fputs ("+.", fp);
27580 fputs ("-(", fp);
27581 output_addr_const (fp, XVECEXP (x, 0, 1));
27582 fputc (')', fp);
27583 return TRUE;
27585 else if (GET_CODE (x) == CONST_VECTOR)
27586 return arm_emit_vector_const (fp, x);
27588 return FALSE;
27591 /* Output assembly for a shift instruction.
27592 SET_FLAGS determines how the instruction modifies the condition codes.
27593 0 - Do not set condition codes.
27594 1 - Set condition codes.
27595 2 - Use smallest instruction. */
27596 const char *
27597 arm_output_shift(rtx * operands, int set_flags)
27599 char pattern[100];
27600 static const char flag_chars[3] = {'?', '.', '!'};
27601 const char *shift;
27602 HOST_WIDE_INT val;
27603 char c;
27605 c = flag_chars[set_flags];
27606 shift = shift_op(operands[3], &val);
27607 if (shift)
27609 if (val != -1)
27610 operands[2] = GEN_INT(val);
27611 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
27613 else
27614 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
27616 output_asm_insn (pattern, operands);
27617 return "";
27620 /* Output assembly for a WMMX immediate shift instruction. */
27621 const char *
27622 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
27624 int shift = INTVAL (operands[2]);
27625 char templ[50];
27626 machine_mode opmode = GET_MODE (operands[0]);
27628 gcc_assert (shift >= 0);
27630 /* If the shift value in the register versions is > 63 (for D qualifier),
27631 31 (for W qualifier) or 15 (for H qualifier). */
27632 if (((opmode == V4HImode) && (shift > 15))
27633 || ((opmode == V2SImode) && (shift > 31))
27634 || ((opmode == DImode) && (shift > 63)))
27636 if (wror_or_wsra)
27638 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27639 output_asm_insn (templ, operands);
27640 if (opmode == DImode)
27642 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
27643 output_asm_insn (templ, operands);
27646 else
27648 /* The destination register will contain all zeros. */
27649 sprintf (templ, "wzero\t%%0");
27650 output_asm_insn (templ, operands);
27652 return "";
27655 if ((opmode == DImode) && (shift > 32))
27657 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27658 output_asm_insn (templ, operands);
27659 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
27660 output_asm_insn (templ, operands);
27662 else
27664 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
27665 output_asm_insn (templ, operands);
27667 return "";
27670 /* Output assembly for a WMMX tinsr instruction. */
27671 const char *
27672 arm_output_iwmmxt_tinsr (rtx *operands)
27674 int mask = INTVAL (operands[3]);
27675 int i;
27676 char templ[50];
27677 int units = mode_nunits[GET_MODE (operands[0])];
27678 gcc_assert ((mask & (mask - 1)) == 0);
27679 for (i = 0; i < units; ++i)
27681 if ((mask & 0x01) == 1)
27683 break;
27685 mask >>= 1;
27687 gcc_assert (i < units);
27689 switch (GET_MODE (operands[0]))
27691 case E_V8QImode:
27692 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
27693 break;
27694 case E_V4HImode:
27695 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
27696 break;
27697 case E_V2SImode:
27698 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
27699 break;
27700 default:
27701 gcc_unreachable ();
27702 break;
27704 output_asm_insn (templ, operands);
27706 return "";
27709 /* Output a Thumb-1 casesi dispatch sequence. */
27710 const char *
27711 thumb1_output_casesi (rtx *operands)
27713 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
27715 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27717 switch (GET_MODE(diff_vec))
27719 case E_QImode:
27720 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27721 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
27722 case E_HImode:
27723 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27724 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
27725 case E_SImode:
27726 return "bl\t%___gnu_thumb1_case_si";
27727 default:
27728 gcc_unreachable ();
27732 /* Output a Thumb-2 casesi instruction. */
27733 const char *
27734 thumb2_output_casesi (rtx *operands)
27736 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
27738 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27740 output_asm_insn ("cmp\t%0, %1", operands);
27741 output_asm_insn ("bhi\t%l3", operands);
27742 switch (GET_MODE(diff_vec))
27744 case E_QImode:
27745 return "tbb\t[%|pc, %0]";
27746 case E_HImode:
27747 return "tbh\t[%|pc, %0, lsl #1]";
27748 case E_SImode:
27749 if (flag_pic)
27751 output_asm_insn ("adr\t%4, %l2", operands);
27752 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
27753 output_asm_insn ("add\t%4, %4, %5", operands);
27754 return "bx\t%4";
27756 else
27758 output_asm_insn ("adr\t%4, %l2", operands);
27759 return "ldr\t%|pc, [%4, %0, lsl #2]";
27761 default:
27762 gcc_unreachable ();
27766 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
27767 per-core tuning structs. */
27768 static int
27769 arm_issue_rate (void)
27771 return current_tune->issue_rate;
27774 /* Return how many instructions should scheduler lookahead to choose the
27775 best one. */
27776 static int
27777 arm_first_cycle_multipass_dfa_lookahead (void)
27779 int issue_rate = arm_issue_rate ();
27781 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
27784 /* Enable modeling of L2 auto-prefetcher. */
27785 static int
27786 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
27788 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
27791 const char *
27792 arm_mangle_type (const_tree type)
27794 /* The ARM ABI documents (10th October 2008) say that "__va_list"
27795 has to be managled as if it is in the "std" namespace. */
27796 if (TARGET_AAPCS_BASED
27797 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
27798 return "St9__va_list";
27800 /* Half-precision float. */
27801 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
27802 return "Dh";
27804 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
27805 builtin type. */
27806 if (TYPE_NAME (type) != NULL)
27807 return arm_mangle_builtin_type (type);
27809 /* Use the default mangling. */
27810 return NULL;
27813 /* Order of allocation of core registers for Thumb: this allocation is
27814 written over the corresponding initial entries of the array
27815 initialized with REG_ALLOC_ORDER. We allocate all low registers
27816 first. Saving and restoring a low register is usually cheaper than
27817 using a call-clobbered high register. */
27819 static const int thumb_core_reg_alloc_order[] =
27821 3, 2, 1, 0, 4, 5, 6, 7,
27822 12, 14, 8, 9, 10, 11
27825 /* Adjust register allocation order when compiling for Thumb. */
27827 void
27828 arm_order_regs_for_local_alloc (void)
27830 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
27831 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
27832 if (TARGET_THUMB)
27833 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
27834 sizeof (thumb_core_reg_alloc_order));
27837 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
27839 bool
27840 arm_frame_pointer_required (void)
27842 if (SUBTARGET_FRAME_POINTER_REQUIRED)
27843 return true;
27845 /* If the function receives nonlocal gotos, it needs to save the frame
27846 pointer in the nonlocal_goto_save_area object. */
27847 if (cfun->has_nonlocal_label)
27848 return true;
27850 /* The frame pointer is required for non-leaf APCS frames. */
27851 if (TARGET_ARM && TARGET_APCS_FRAME && !crtl->is_leaf)
27852 return true;
27854 /* If we are probing the stack in the prologue, we will have a faulting
27855 instruction prior to the stack adjustment and this requires a frame
27856 pointer if we want to catch the exception using the EABI unwinder. */
27857 if (!IS_INTERRUPT (arm_current_func_type ())
27858 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK
27859 && arm_except_unwind_info (&global_options) == UI_TARGET
27860 && cfun->can_throw_non_call_exceptions)
27862 HOST_WIDE_INT size = get_frame_size ();
27864 /* That's irrelevant if there is no stack adjustment. */
27865 if (size <= 0)
27866 return false;
27868 /* That's relevant only if there is a stack probe. */
27869 if (crtl->is_leaf && !cfun->calls_alloca)
27871 /* We don't have the final size of the frame so adjust. */
27872 size += 32 * UNITS_PER_WORD;
27873 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
27874 return true;
27876 else
27877 return true;
27880 return false;
27883 /* Only thumb1 can't support conditional execution, so return true if
27884 the target is not thumb1. */
27885 static bool
27886 arm_have_conditional_execution (void)
27888 return !TARGET_THUMB1;
27891 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
27892 static HOST_WIDE_INT
27893 arm_vector_alignment (const_tree type)
27895 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
27897 if (TARGET_AAPCS_BASED)
27898 align = MIN (align, 64);
27900 return align;
27903 static unsigned int
27904 arm_autovectorize_vector_sizes (void)
27906 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
27909 static bool
27910 arm_vector_alignment_reachable (const_tree type, bool is_packed)
27912 /* Vectors which aren't in packed structures will not be less aligned than
27913 the natural alignment of their element type, so this is safe. */
27914 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27915 return !is_packed;
27917 return default_builtin_vector_alignment_reachable (type, is_packed);
27920 static bool
27921 arm_builtin_support_vector_misalignment (machine_mode mode,
27922 const_tree type, int misalignment,
27923 bool is_packed)
27925 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27927 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
27929 if (is_packed)
27930 return align == 1;
27932 /* If the misalignment is unknown, we should be able to handle the access
27933 so long as it is not to a member of a packed data structure. */
27934 if (misalignment == -1)
27935 return true;
27937 /* Return true if the misalignment is a multiple of the natural alignment
27938 of the vector's element type. This is probably always going to be
27939 true in practice, since we've already established that this isn't a
27940 packed access. */
27941 return ((misalignment % align) == 0);
27944 return default_builtin_support_vector_misalignment (mode, type, misalignment,
27945 is_packed);
27948 static void
27949 arm_conditional_register_usage (void)
27951 int regno;
27953 if (TARGET_THUMB1 && optimize_size)
27955 /* When optimizing for size on Thumb-1, it's better not
27956 to use the HI regs, because of the overhead of
27957 stacking them. */
27958 for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
27959 fixed_regs[regno] = call_used_regs[regno] = 1;
27962 /* The link register can be clobbered by any branch insn,
27963 but we have no way to track that at present, so mark
27964 it as unavailable. */
27965 if (TARGET_THUMB1)
27966 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
27968 if (TARGET_32BIT && TARGET_HARD_FLOAT)
27970 /* VFPv3 registers are disabled when earlier VFP
27971 versions are selected due to the definition of
27972 LAST_VFP_REGNUM. */
27973 for (regno = FIRST_VFP_REGNUM;
27974 regno <= LAST_VFP_REGNUM; ++ regno)
27976 fixed_regs[regno] = 0;
27977 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
27978 || regno >= FIRST_VFP_REGNUM + 32;
27982 if (TARGET_REALLY_IWMMXT)
27984 regno = FIRST_IWMMXT_GR_REGNUM;
27985 /* The 2002/10/09 revision of the XScale ABI has wCG0
27986 and wCG1 as call-preserved registers. The 2002/11/21
27987 revision changed this so that all wCG registers are
27988 scratch registers. */
27989 for (regno = FIRST_IWMMXT_GR_REGNUM;
27990 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
27991 fixed_regs[regno] = 0;
27992 /* The XScale ABI has wR0 - wR9 as scratch registers,
27993 the rest as call-preserved registers. */
27994 for (regno = FIRST_IWMMXT_REGNUM;
27995 regno <= LAST_IWMMXT_REGNUM; ++ regno)
27997 fixed_regs[regno] = 0;
27998 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
28002 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
28004 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
28005 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
28007 else if (TARGET_APCS_STACK)
28009 fixed_regs[10] = 1;
28010 call_used_regs[10] = 1;
28012 /* -mcaller-super-interworking reserves r11 for calls to
28013 _interwork_r11_call_via_rN(). Making the register global
28014 is an easy way of ensuring that it remains valid for all
28015 calls. */
28016 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
28017 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
28019 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28020 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28021 if (TARGET_CALLER_INTERWORKING)
28022 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28024 SUBTARGET_CONDITIONAL_REGISTER_USAGE
28027 static reg_class_t
28028 arm_preferred_rename_class (reg_class_t rclass)
28030 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
28031 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
28032 and code size can be reduced. */
28033 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
28034 return LO_REGS;
28035 else
28036 return NO_REGS;
28039 /* Compute the attribute "length" of insn "*push_multi".
28040 So this function MUST be kept in sync with that insn pattern. */
28042 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
28044 int i, regno, hi_reg;
28045 int num_saves = XVECLEN (parallel_op, 0);
28047 /* ARM mode. */
28048 if (TARGET_ARM)
28049 return 4;
28050 /* Thumb1 mode. */
28051 if (TARGET_THUMB1)
28052 return 2;
28054 /* Thumb2 mode. */
28055 regno = REGNO (first_op);
28056 /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
28057 list is 8-bit. Normally this means all registers in the list must be
28058 LO_REGS, that is (R0 -R7). If any HI_REGS used, then we must use 32-bit
28059 encodings. There is one exception for PUSH that LR in HI_REGS can be used
28060 with 16-bit encoding. */
28061 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28062 for (i = 1; i < num_saves && !hi_reg; i++)
28064 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
28065 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28068 if (!hi_reg)
28069 return 2;
28070 return 4;
28073 /* Compute the attribute "length" of insn. Currently, this function is used
28074 for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
28075 "*pop_multiple_with_writeback_and_return". OPERANDS is the toplevel PARALLEL
28076 rtx, RETURN_PC is true if OPERANDS contains return insn. WRITE_BACK_P is
28077 true if OPERANDS contains insn which explicit updates base register. */
28080 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
28082 /* ARM mode. */
28083 if (TARGET_ARM)
28084 return 4;
28085 /* Thumb1 mode. */
28086 if (TARGET_THUMB1)
28087 return 2;
28089 rtx parallel_op = operands[0];
28090 /* Initialize to elements number of PARALLEL. */
28091 unsigned indx = XVECLEN (parallel_op, 0) - 1;
28092 /* Initialize the value to base register. */
28093 unsigned regno = REGNO (operands[1]);
28094 /* Skip return and write back pattern.
28095 We only need register pop pattern for later analysis. */
28096 unsigned first_indx = 0;
28097 first_indx += return_pc ? 1 : 0;
28098 first_indx += write_back_p ? 1 : 0;
28100 /* A pop operation can be done through LDM or POP. If the base register is SP
28101 and if it's with write back, then a LDM will be alias of POP. */
28102 bool pop_p = (regno == SP_REGNUM && write_back_p);
28103 bool ldm_p = !pop_p;
28105 /* Check base register for LDM. */
28106 if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
28107 return 4;
28109 /* Check each register in the list. */
28110 for (; indx >= first_indx; indx--)
28112 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
28113 /* For POP, PC in HI_REGS can be used with 16-bit encoding. See similar
28114 comment in arm_attr_length_push_multi. */
28115 if (REGNO_REG_CLASS (regno) == HI_REGS
28116 && (regno != PC_REGNUM || ldm_p))
28117 return 4;
28120 return 2;
28123 /* Compute the number of instructions emitted by output_move_double. */
28125 arm_count_output_move_double_insns (rtx *operands)
28127 int count;
28128 rtx ops[2];
28129 /* output_move_double may modify the operands array, so call it
28130 here on a copy of the array. */
28131 ops[0] = operands[0];
28132 ops[1] = operands[1];
28133 output_move_double (ops, false, &count);
28134 return count;
28138 vfp3_const_double_for_fract_bits (rtx operand)
28140 REAL_VALUE_TYPE r0;
28142 if (!CONST_DOUBLE_P (operand))
28143 return 0;
28145 r0 = *CONST_DOUBLE_REAL_VALUE (operand);
28146 if (exact_real_inverse (DFmode, &r0)
28147 && !REAL_VALUE_NEGATIVE (r0))
28149 if (exact_real_truncate (DFmode, &r0))
28151 HOST_WIDE_INT value = real_to_integer (&r0);
28152 value = value & 0xffffffff;
28153 if ((value != 0) && ( (value & (value - 1)) == 0))
28155 int ret = exact_log2 (value);
28156 gcc_assert (IN_RANGE (ret, 0, 31));
28157 return ret;
28161 return 0;
28164 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
28165 log2 is in [1, 32], return that log2. Otherwise return -1.
28166 This is used in the patterns for vcvt.s32.f32 floating-point to
28167 fixed-point conversions. */
28170 vfp3_const_double_for_bits (rtx x)
28172 const REAL_VALUE_TYPE *r;
28174 if (!CONST_DOUBLE_P (x))
28175 return -1;
28177 r = CONST_DOUBLE_REAL_VALUE (x);
28179 if (REAL_VALUE_NEGATIVE (*r)
28180 || REAL_VALUE_ISNAN (*r)
28181 || REAL_VALUE_ISINF (*r)
28182 || !real_isinteger (r, SFmode))
28183 return -1;
28185 HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
28187 /* The exact_log2 above will have returned -1 if this is
28188 not an exact log2. */
28189 if (!IN_RANGE (hwint, 1, 32))
28190 return -1;
28192 return hwint;
28196 /* Emit a memory barrier around an atomic sequence according to MODEL. */
28198 static void
28199 arm_pre_atomic_barrier (enum memmodel model)
28201 if (need_atomic_barrier_p (model, true))
28202 emit_insn (gen_memory_barrier ());
28205 static void
28206 arm_post_atomic_barrier (enum memmodel model)
28208 if (need_atomic_barrier_p (model, false))
28209 emit_insn (gen_memory_barrier ());
28212 /* Emit the load-exclusive and store-exclusive instructions.
28213 Use acquire and release versions if necessary. */
28215 static void
28216 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
28218 rtx (*gen) (rtx, rtx);
28220 if (acq)
28222 switch (mode)
28224 case E_QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
28225 case E_HImode: gen = gen_arm_load_acquire_exclusivehi; break;
28226 case E_SImode: gen = gen_arm_load_acquire_exclusivesi; break;
28227 case E_DImode: gen = gen_arm_load_acquire_exclusivedi; break;
28228 default:
28229 gcc_unreachable ();
28232 else
28234 switch (mode)
28236 case E_QImode: gen = gen_arm_load_exclusiveqi; break;
28237 case E_HImode: gen = gen_arm_load_exclusivehi; break;
28238 case E_SImode: gen = gen_arm_load_exclusivesi; break;
28239 case E_DImode: gen = gen_arm_load_exclusivedi; break;
28240 default:
28241 gcc_unreachable ();
28245 emit_insn (gen (rval, mem));
28248 static void
28249 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
28250 rtx mem, bool rel)
28252 rtx (*gen) (rtx, rtx, rtx);
28254 if (rel)
28256 switch (mode)
28258 case E_QImode: gen = gen_arm_store_release_exclusiveqi; break;
28259 case E_HImode: gen = gen_arm_store_release_exclusivehi; break;
28260 case E_SImode: gen = gen_arm_store_release_exclusivesi; break;
28261 case E_DImode: gen = gen_arm_store_release_exclusivedi; break;
28262 default:
28263 gcc_unreachable ();
28266 else
28268 switch (mode)
28270 case E_QImode: gen = gen_arm_store_exclusiveqi; break;
28271 case E_HImode: gen = gen_arm_store_exclusivehi; break;
28272 case E_SImode: gen = gen_arm_store_exclusivesi; break;
28273 case E_DImode: gen = gen_arm_store_exclusivedi; break;
28274 default:
28275 gcc_unreachable ();
28279 emit_insn (gen (bval, rval, mem));
28282 /* Mark the previous jump instruction as unlikely. */
28284 static void
28285 emit_unlikely_jump (rtx insn)
28287 rtx_insn *jump = emit_jump_insn (insn);
28288 add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
28291 /* Expand a compare and swap pattern. */
28293 void
28294 arm_expand_compare_and_swap (rtx operands[])
28296 rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
28297 machine_mode mode;
28298 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx);
28300 bval = operands[0];
28301 rval = operands[1];
28302 mem = operands[2];
28303 oldval = operands[3];
28304 newval = operands[4];
28305 is_weak = operands[5];
28306 mod_s = operands[6];
28307 mod_f = operands[7];
28308 mode = GET_MODE (mem);
28310 /* Normally the succ memory model must be stronger than fail, but in the
28311 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
28312 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
28314 if (TARGET_HAVE_LDACQ
28315 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
28316 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
28317 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
28319 switch (mode)
28321 case E_QImode:
28322 case E_HImode:
28323 /* For narrow modes, we're going to perform the comparison in SImode,
28324 so do the zero-extension now. */
28325 rval = gen_reg_rtx (SImode);
28326 oldval = convert_modes (SImode, mode, oldval, true);
28327 /* FALLTHRU */
28329 case E_SImode:
28330 /* Force the value into a register if needed. We waited until after
28331 the zero-extension above to do this properly. */
28332 if (!arm_add_operand (oldval, SImode))
28333 oldval = force_reg (SImode, oldval);
28334 break;
28336 case E_DImode:
28337 if (!cmpdi_operand (oldval, mode))
28338 oldval = force_reg (mode, oldval);
28339 break;
28341 default:
28342 gcc_unreachable ();
28345 if (TARGET_THUMB1)
28347 switch (mode)
28349 case E_QImode: gen = gen_atomic_compare_and_swapt1qi_1; break;
28350 case E_HImode: gen = gen_atomic_compare_and_swapt1hi_1; break;
28351 case E_SImode: gen = gen_atomic_compare_and_swapt1si_1; break;
28352 case E_DImode: gen = gen_atomic_compare_and_swapt1di_1; break;
28353 default:
28354 gcc_unreachable ();
28357 else
28359 switch (mode)
28361 case E_QImode: gen = gen_atomic_compare_and_swap32qi_1; break;
28362 case E_HImode: gen = gen_atomic_compare_and_swap32hi_1; break;
28363 case E_SImode: gen = gen_atomic_compare_and_swap32si_1; break;
28364 case E_DImode: gen = gen_atomic_compare_and_swap32di_1; break;
28365 default:
28366 gcc_unreachable ();
28370 bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CC_Zmode, CC_REGNUM);
28371 emit_insn (gen (bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f));
28373 if (mode == QImode || mode == HImode)
28374 emit_move_insn (operands[1], gen_lowpart (mode, rval));
28376 /* In all cases, we arrange for success to be signaled by Z set.
28377 This arrangement allows for the boolean result to be used directly
28378 in a subsequent branch, post optimization. For Thumb-1 targets, the
28379 boolean negation of the result is also stored in bval because Thumb-1
28380 backend lacks dependency tracking for CC flag due to flag-setting not
28381 being represented at RTL level. */
28382 if (TARGET_THUMB1)
28383 emit_insn (gen_cstoresi_eq0_thumb1 (bval, bdst));
28384 else
28386 x = gen_rtx_EQ (SImode, bdst, const0_rtx);
28387 emit_insn (gen_rtx_SET (bval, x));
28391 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
28392 another memory store between the load-exclusive and store-exclusive can
28393 reset the monitor from Exclusive to Open state. This means we must wait
28394 until after reload to split the pattern, lest we get a register spill in
28395 the middle of the atomic sequence. Success of the compare and swap is
28396 indicated by the Z flag set for 32bit targets and by neg_bval being zero
28397 for Thumb-1 targets (ie. negation of the boolean value returned by
28398 atomic_compare_and_swapmode standard pattern in operand 0). */
28400 void
28401 arm_split_compare_and_swap (rtx operands[])
28403 rtx rval, mem, oldval, newval, neg_bval;
28404 machine_mode mode;
28405 enum memmodel mod_s, mod_f;
28406 bool is_weak;
28407 rtx_code_label *label1, *label2;
28408 rtx x, cond;
28410 rval = operands[1];
28411 mem = operands[2];
28412 oldval = operands[3];
28413 newval = operands[4];
28414 is_weak = (operands[5] != const0_rtx);
28415 mod_s = memmodel_from_int (INTVAL (operands[6]));
28416 mod_f = memmodel_from_int (INTVAL (operands[7]));
28417 neg_bval = TARGET_THUMB1 ? operands[0] : operands[8];
28418 mode = GET_MODE (mem);
28420 bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
28422 bool use_acquire = TARGET_HAVE_LDACQ
28423 && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28424 || is_mm_release (mod_s));
28426 bool use_release = TARGET_HAVE_LDACQ
28427 && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28428 || is_mm_acquire (mod_s));
28430 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
28431 a full barrier is emitted after the store-release. */
28432 if (is_armv8_sync)
28433 use_acquire = false;
28435 /* Checks whether a barrier is needed and emits one accordingly. */
28436 if (!(use_acquire || use_release))
28437 arm_pre_atomic_barrier (mod_s);
28439 label1 = NULL;
28440 if (!is_weak)
28442 label1 = gen_label_rtx ();
28443 emit_label (label1);
28445 label2 = gen_label_rtx ();
28447 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
28449 /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
28450 as required to communicate with arm_expand_compare_and_swap. */
28451 if (TARGET_32BIT)
28453 cond = arm_gen_compare_reg (NE, rval, oldval, neg_bval);
28454 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28455 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
28456 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
28457 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
28459 else
28461 emit_move_insn (neg_bval, const1_rtx);
28462 cond = gen_rtx_NE (VOIDmode, rval, oldval);
28463 if (thumb1_cmpneg_operand (oldval, SImode))
28464 emit_unlikely_jump (gen_cbranchsi4_scratch (neg_bval, rval, oldval,
28465 label2, cond));
28466 else
28467 emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2));
28470 arm_emit_store_exclusive (mode, neg_bval, mem, newval, use_release);
28472 /* Weak or strong, we want EQ to be true for success, so that we
28473 match the flags that we got from the compare above. */
28474 if (TARGET_32BIT)
28476 cond = gen_rtx_REG (CCmode, CC_REGNUM);
28477 x = gen_rtx_COMPARE (CCmode, neg_bval, const0_rtx);
28478 emit_insn (gen_rtx_SET (cond, x));
28481 if (!is_weak)
28483 /* Z is set to boolean value of !neg_bval, as required to communicate
28484 with arm_expand_compare_and_swap. */
28485 x = gen_rtx_NE (VOIDmode, neg_bval, const0_rtx);
28486 emit_unlikely_jump (gen_cbranchsi4 (x, neg_bval, const0_rtx, label1));
28489 if (!is_mm_relaxed (mod_f))
28490 emit_label (label2);
28492 /* Checks whether a barrier is needed and emits one accordingly. */
28493 if (is_armv8_sync
28494 || !(use_acquire || use_release))
28495 arm_post_atomic_barrier (mod_s);
28497 if (is_mm_relaxed (mod_f))
28498 emit_label (label2);
28501 /* Split an atomic operation pattern. Operation is given by CODE and is one
28502 of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
28503 operation). Operation is performed on the content at MEM and on VALUE
28504 following the memory model MODEL_RTX. The content at MEM before and after
28505 the operation is returned in OLD_OUT and NEW_OUT respectively while the
28506 success of the operation is returned in COND. Using a scratch register or
28507 an operand register for these determines what result is returned for that
28508 pattern. */
28510 void
28511 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
28512 rtx value, rtx model_rtx, rtx cond)
28514 enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
28515 machine_mode mode = GET_MODE (mem);
28516 machine_mode wmode = (mode == DImode ? DImode : SImode);
28517 rtx_code_label *label;
28518 bool all_low_regs, bind_old_new;
28519 rtx x;
28521 bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
28523 bool use_acquire = TARGET_HAVE_LDACQ
28524 && !(is_mm_relaxed (model) || is_mm_consume (model)
28525 || is_mm_release (model));
28527 bool use_release = TARGET_HAVE_LDACQ
28528 && !(is_mm_relaxed (model) || is_mm_consume (model)
28529 || is_mm_acquire (model));
28531 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
28532 a full barrier is emitted after the store-release. */
28533 if (is_armv8_sync)
28534 use_acquire = false;
28536 /* Checks whether a barrier is needed and emits one accordingly. */
28537 if (!(use_acquire || use_release))
28538 arm_pre_atomic_barrier (model);
28540 label = gen_label_rtx ();
28541 emit_label (label);
28543 if (new_out)
28544 new_out = gen_lowpart (wmode, new_out);
28545 if (old_out)
28546 old_out = gen_lowpart (wmode, old_out);
28547 else
28548 old_out = new_out;
28549 value = simplify_gen_subreg (wmode, value, mode, 0);
28551 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
28553 /* Does the operation require destination and first operand to use the same
28554 register? This is decided by register constraints of relevant insn
28555 patterns in thumb1.md. */
28556 gcc_assert (!new_out || REG_P (new_out));
28557 all_low_regs = REG_P (value) && REGNO_REG_CLASS (REGNO (value)) == LO_REGS
28558 && new_out && REGNO_REG_CLASS (REGNO (new_out)) == LO_REGS
28559 && REGNO_REG_CLASS (REGNO (old_out)) == LO_REGS;
28560 bind_old_new =
28561 (TARGET_THUMB1
28562 && code != SET
28563 && code != MINUS
28564 && (code != PLUS || (!all_low_regs && !satisfies_constraint_L (value))));
28566 /* We want to return the old value while putting the result of the operation
28567 in the same register as the old value so copy the old value over to the
28568 destination register and use that register for the operation. */
28569 if (old_out && bind_old_new)
28571 emit_move_insn (new_out, old_out);
28572 old_out = new_out;
28575 switch (code)
28577 case SET:
28578 new_out = value;
28579 break;
28581 case NOT:
28582 x = gen_rtx_AND (wmode, old_out, value);
28583 emit_insn (gen_rtx_SET (new_out, x));
28584 x = gen_rtx_NOT (wmode, new_out);
28585 emit_insn (gen_rtx_SET (new_out, x));
28586 break;
28588 case MINUS:
28589 if (CONST_INT_P (value))
28591 value = GEN_INT (-INTVAL (value));
28592 code = PLUS;
28594 /* FALLTHRU */
28596 case PLUS:
28597 if (mode == DImode)
28599 /* DImode plus/minus need to clobber flags. */
28600 /* The adddi3 and subdi3 patterns are incorrectly written so that
28601 they require matching operands, even when we could easily support
28602 three operands. Thankfully, this can be fixed up post-splitting,
28603 as the individual add+adc patterns do accept three operands and
28604 post-reload cprop can make these moves go away. */
28605 emit_move_insn (new_out, old_out);
28606 if (code == PLUS)
28607 x = gen_adddi3 (new_out, new_out, value);
28608 else
28609 x = gen_subdi3 (new_out, new_out, value);
28610 emit_insn (x);
28611 break;
28613 /* FALLTHRU */
28615 default:
28616 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
28617 emit_insn (gen_rtx_SET (new_out, x));
28618 break;
28621 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
28622 use_release);
28624 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28625 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
28627 /* Checks whether a barrier is needed and emits one accordingly. */
28628 if (is_armv8_sync
28629 || !(use_acquire || use_release))
28630 arm_post_atomic_barrier (model);
28633 #define MAX_VECT_LEN 16
28635 struct expand_vec_perm_d
28637 rtx target, op0, op1;
28638 unsigned char perm[MAX_VECT_LEN];
28639 machine_mode vmode;
28640 unsigned char nelt;
28641 bool one_vector_p;
28642 bool testing_p;
28645 /* Generate a variable permutation. */
28647 static void
28648 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
28650 machine_mode vmode = GET_MODE (target);
28651 bool one_vector_p = rtx_equal_p (op0, op1);
28653 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
28654 gcc_checking_assert (GET_MODE (op0) == vmode);
28655 gcc_checking_assert (GET_MODE (op1) == vmode);
28656 gcc_checking_assert (GET_MODE (sel) == vmode);
28657 gcc_checking_assert (TARGET_NEON);
28659 if (one_vector_p)
28661 if (vmode == V8QImode)
28662 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
28663 else
28664 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
28666 else
28668 rtx pair;
28670 if (vmode == V8QImode)
28672 pair = gen_reg_rtx (V16QImode);
28673 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
28674 pair = gen_lowpart (TImode, pair);
28675 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
28677 else
28679 pair = gen_reg_rtx (OImode);
28680 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
28681 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
28686 void
28687 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
28689 machine_mode vmode = GET_MODE (target);
28690 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
28691 bool one_vector_p = rtx_equal_p (op0, op1);
28692 rtx rmask[MAX_VECT_LEN], mask;
28694 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28695 numbering of elements for big-endian, we must reverse the order. */
28696 gcc_checking_assert (!BYTES_BIG_ENDIAN);
28698 /* The VTBL instruction does not use a modulo index, so we must take care
28699 of that ourselves. */
28700 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
28701 for (i = 0; i < nelt; ++i)
28702 rmask[i] = mask;
28703 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
28704 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
28706 arm_expand_vec_perm_1 (target, op0, op1, sel);
28709 /* Map lane ordering between architectural lane order, and GCC lane order,
28710 taking into account ABI. See comment above output_move_neon for details. */
28712 static int
28713 neon_endian_lane_map (machine_mode mode, int lane)
28715 if (BYTES_BIG_ENDIAN)
28717 int nelems = GET_MODE_NUNITS (mode);
28718 /* Reverse lane order. */
28719 lane = (nelems - 1 - lane);
28720 /* Reverse D register order, to match ABI. */
28721 if (GET_MODE_SIZE (mode) == 16)
28722 lane = lane ^ (nelems / 2);
28724 return lane;
28727 /* Some permutations index into pairs of vectors, this is a helper function
28728 to map indexes into those pairs of vectors. */
28730 static int
28731 neon_pair_endian_lane_map (machine_mode mode, int lane)
28733 int nelem = GET_MODE_NUNITS (mode);
28734 if (BYTES_BIG_ENDIAN)
28735 lane =
28736 neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
28737 return lane;
28740 /* Generate or test for an insn that supports a constant permutation. */
28742 /* Recognize patterns for the VUZP insns. */
28744 static bool
28745 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
28747 unsigned int i, odd, mask, nelt = d->nelt;
28748 rtx out0, out1, in0, in1;
28749 rtx (*gen)(rtx, rtx, rtx, rtx);
28750 int first_elem;
28751 int swap_nelt;
28753 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28754 return false;
28756 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
28757 big endian pattern on 64 bit vectors, so we correct for that. */
28758 swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
28759 && GET_MODE_SIZE (d->vmode) == 8 ? d->nelt : 0;
28761 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
28763 if (first_elem == neon_endian_lane_map (d->vmode, 0))
28764 odd = 0;
28765 else if (first_elem == neon_endian_lane_map (d->vmode, 1))
28766 odd = 1;
28767 else
28768 return false;
28769 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28771 for (i = 0; i < nelt; i++)
28773 unsigned elt =
28774 (neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
28775 if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
28776 return false;
28779 /* Success! */
28780 if (d->testing_p)
28781 return true;
28783 switch (d->vmode)
28785 case E_V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
28786 case E_V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
28787 case E_V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
28788 case E_V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
28789 case E_V8HFmode: gen = gen_neon_vuzpv8hf_internal; break;
28790 case E_V4HFmode: gen = gen_neon_vuzpv4hf_internal; break;
28791 case E_V4SImode: gen = gen_neon_vuzpv4si_internal; break;
28792 case E_V2SImode: gen = gen_neon_vuzpv2si_internal; break;
28793 case E_V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
28794 case E_V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
28795 default:
28796 gcc_unreachable ();
28799 in0 = d->op0;
28800 in1 = d->op1;
28801 if (swap_nelt != 0)
28802 std::swap (in0, in1);
28804 out0 = d->target;
28805 out1 = gen_reg_rtx (d->vmode);
28806 if (odd)
28807 std::swap (out0, out1);
28809 emit_insn (gen (out0, in0, in1, out1));
28810 return true;
28813 /* Recognize patterns for the VZIP insns. */
28815 static bool
28816 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
28818 unsigned int i, high, mask, nelt = d->nelt;
28819 rtx out0, out1, in0, in1;
28820 rtx (*gen)(rtx, rtx, rtx, rtx);
28821 int first_elem;
28822 bool is_swapped;
28824 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28825 return false;
28827 is_swapped = BYTES_BIG_ENDIAN;
28829 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
28831 high = nelt / 2;
28832 if (first_elem == neon_endian_lane_map (d->vmode, high))
28834 else if (first_elem == neon_endian_lane_map (d->vmode, 0))
28835 high = 0;
28836 else
28837 return false;
28838 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28840 for (i = 0; i < nelt / 2; i++)
28842 unsigned elt =
28843 neon_pair_endian_lane_map (d->vmode, i + high) & mask;
28844 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
28845 != elt)
28846 return false;
28847 elt =
28848 neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
28849 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
28850 != elt)
28851 return false;
28854 /* Success! */
28855 if (d->testing_p)
28856 return true;
28858 switch (d->vmode)
28860 case E_V16QImode: gen = gen_neon_vzipv16qi_internal; break;
28861 case E_V8QImode: gen = gen_neon_vzipv8qi_internal; break;
28862 case E_V8HImode: gen = gen_neon_vzipv8hi_internal; break;
28863 case E_V4HImode: gen = gen_neon_vzipv4hi_internal; break;
28864 case E_V8HFmode: gen = gen_neon_vzipv8hf_internal; break;
28865 case E_V4HFmode: gen = gen_neon_vzipv4hf_internal; break;
28866 case E_V4SImode: gen = gen_neon_vzipv4si_internal; break;
28867 case E_V2SImode: gen = gen_neon_vzipv2si_internal; break;
28868 case E_V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
28869 case E_V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
28870 default:
28871 gcc_unreachable ();
28874 in0 = d->op0;
28875 in1 = d->op1;
28876 if (is_swapped)
28877 std::swap (in0, in1);
28879 out0 = d->target;
28880 out1 = gen_reg_rtx (d->vmode);
28881 if (high)
28882 std::swap (out0, out1);
28884 emit_insn (gen (out0, in0, in1, out1));
28885 return true;
28888 /* Recognize patterns for the VREV insns. */
28890 static bool
28891 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
28893 unsigned int i, j, diff, nelt = d->nelt;
28894 rtx (*gen)(rtx, rtx);
28896 if (!d->one_vector_p)
28897 return false;
28899 diff = d->perm[0];
28900 switch (diff)
28902 case 7:
28903 switch (d->vmode)
28905 case E_V16QImode: gen = gen_neon_vrev64v16qi; break;
28906 case E_V8QImode: gen = gen_neon_vrev64v8qi; break;
28907 default:
28908 return false;
28910 break;
28911 case 3:
28912 switch (d->vmode)
28914 case E_V16QImode: gen = gen_neon_vrev32v16qi; break;
28915 case E_V8QImode: gen = gen_neon_vrev32v8qi; break;
28916 case E_V8HImode: gen = gen_neon_vrev64v8hi; break;
28917 case E_V4HImode: gen = gen_neon_vrev64v4hi; break;
28918 case E_V8HFmode: gen = gen_neon_vrev64v8hf; break;
28919 case E_V4HFmode: gen = gen_neon_vrev64v4hf; break;
28920 default:
28921 return false;
28923 break;
28924 case 1:
28925 switch (d->vmode)
28927 case E_V16QImode: gen = gen_neon_vrev16v16qi; break;
28928 case E_V8QImode: gen = gen_neon_vrev16v8qi; break;
28929 case E_V8HImode: gen = gen_neon_vrev32v8hi; break;
28930 case E_V4HImode: gen = gen_neon_vrev32v4hi; break;
28931 case E_V4SImode: gen = gen_neon_vrev64v4si; break;
28932 case E_V2SImode: gen = gen_neon_vrev64v2si; break;
28933 case E_V4SFmode: gen = gen_neon_vrev64v4sf; break;
28934 case E_V2SFmode: gen = gen_neon_vrev64v2sf; break;
28935 default:
28936 return false;
28938 break;
28939 default:
28940 return false;
28943 for (i = 0; i < nelt ; i += diff + 1)
28944 for (j = 0; j <= diff; j += 1)
28946 /* This is guaranteed to be true as the value of diff
28947 is 7, 3, 1 and we should have enough elements in the
28948 queue to generate this. Getting a vector mask with a
28949 value of diff other than these values implies that
28950 something is wrong by the time we get here. */
28951 gcc_assert (i + j < nelt);
28952 if (d->perm[i + j] != i + diff - j)
28953 return false;
28956 /* Success! */
28957 if (d->testing_p)
28958 return true;
28960 emit_insn (gen (d->target, d->op0));
28961 return true;
28964 /* Recognize patterns for the VTRN insns. */
28966 static bool
28967 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
28969 unsigned int i, odd, mask, nelt = d->nelt;
28970 rtx out0, out1, in0, in1;
28971 rtx (*gen)(rtx, rtx, rtx, rtx);
28973 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28974 return false;
28976 /* Note that these are little-endian tests. Adjust for big-endian later. */
28977 if (d->perm[0] == 0)
28978 odd = 0;
28979 else if (d->perm[0] == 1)
28980 odd = 1;
28981 else
28982 return false;
28983 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28985 for (i = 0; i < nelt; i += 2)
28987 if (d->perm[i] != i + odd)
28988 return false;
28989 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
28990 return false;
28993 /* Success! */
28994 if (d->testing_p)
28995 return true;
28997 switch (d->vmode)
28999 case E_V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
29000 case E_V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
29001 case E_V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
29002 case E_V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
29003 case E_V8HFmode: gen = gen_neon_vtrnv8hf_internal; break;
29004 case E_V4HFmode: gen = gen_neon_vtrnv4hf_internal; break;
29005 case E_V4SImode: gen = gen_neon_vtrnv4si_internal; break;
29006 case E_V2SImode: gen = gen_neon_vtrnv2si_internal; break;
29007 case E_V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
29008 case E_V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
29009 default:
29010 gcc_unreachable ();
29013 in0 = d->op0;
29014 in1 = d->op1;
29015 if (BYTES_BIG_ENDIAN)
29017 std::swap (in0, in1);
29018 odd = !odd;
29021 out0 = d->target;
29022 out1 = gen_reg_rtx (d->vmode);
29023 if (odd)
29024 std::swap (out0, out1);
29026 emit_insn (gen (out0, in0, in1, out1));
29027 return true;
29030 /* Recognize patterns for the VEXT insns. */
29032 static bool
29033 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
29035 unsigned int i, nelt = d->nelt;
29036 rtx (*gen) (rtx, rtx, rtx, rtx);
29037 rtx offset;
29039 unsigned int location;
29041 unsigned int next = d->perm[0] + 1;
29043 /* TODO: Handle GCC's numbering of elements for big-endian. */
29044 if (BYTES_BIG_ENDIAN)
29045 return false;
29047 /* Check if the extracted indexes are increasing by one. */
29048 for (i = 1; i < nelt; next++, i++)
29050 /* If we hit the most significant element of the 2nd vector in
29051 the previous iteration, no need to test further. */
29052 if (next == 2 * nelt)
29053 return false;
29055 /* If we are operating on only one vector: it could be a
29056 rotation. If there are only two elements of size < 64, let
29057 arm_evpc_neon_vrev catch it. */
29058 if (d->one_vector_p && (next == nelt))
29060 if ((nelt == 2) && (d->vmode != V2DImode))
29061 return false;
29062 else
29063 next = 0;
29066 if (d->perm[i] != next)
29067 return false;
29070 location = d->perm[0];
29072 switch (d->vmode)
29074 case E_V16QImode: gen = gen_neon_vextv16qi; break;
29075 case E_V8QImode: gen = gen_neon_vextv8qi; break;
29076 case E_V4HImode: gen = gen_neon_vextv4hi; break;
29077 case E_V8HImode: gen = gen_neon_vextv8hi; break;
29078 case E_V2SImode: gen = gen_neon_vextv2si; break;
29079 case E_V4SImode: gen = gen_neon_vextv4si; break;
29080 case E_V4HFmode: gen = gen_neon_vextv4hf; break;
29081 case E_V8HFmode: gen = gen_neon_vextv8hf; break;
29082 case E_V2SFmode: gen = gen_neon_vextv2sf; break;
29083 case E_V4SFmode: gen = gen_neon_vextv4sf; break;
29084 case E_V2DImode: gen = gen_neon_vextv2di; break;
29085 default:
29086 return false;
29089 /* Success! */
29090 if (d->testing_p)
29091 return true;
29093 offset = GEN_INT (location);
29094 emit_insn (gen (d->target, d->op0, d->op1, offset));
29095 return true;
29098 /* The NEON VTBL instruction is a fully variable permuation that's even
29099 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
29100 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
29101 can do slightly better by expanding this as a constant where we don't
29102 have to apply a mask. */
29104 static bool
29105 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
29107 rtx rperm[MAX_VECT_LEN], sel;
29108 machine_mode vmode = d->vmode;
29109 unsigned int i, nelt = d->nelt;
29111 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
29112 numbering of elements for big-endian, we must reverse the order. */
29113 if (BYTES_BIG_ENDIAN)
29114 return false;
29116 if (d->testing_p)
29117 return true;
29119 /* Generic code will try constant permutation twice. Once with the
29120 original mode and again with the elements lowered to QImode.
29121 So wait and don't do the selector expansion ourselves. */
29122 if (vmode != V8QImode && vmode != V16QImode)
29123 return false;
29125 for (i = 0; i < nelt; ++i)
29126 rperm[i] = GEN_INT (d->perm[i]);
29127 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
29128 sel = force_reg (vmode, sel);
29130 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
29131 return true;
29134 static bool
29135 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
29137 /* Check if the input mask matches vext before reordering the
29138 operands. */
29139 if (TARGET_NEON)
29140 if (arm_evpc_neon_vext (d))
29141 return true;
29143 /* The pattern matching functions above are written to look for a small
29144 number to begin the sequence (0, 1, N/2). If we begin with an index
29145 from the second operand, we can swap the operands. */
29146 if (d->perm[0] >= d->nelt)
29148 unsigned i, nelt = d->nelt;
29150 for (i = 0; i < nelt; ++i)
29151 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
29153 std::swap (d->op0, d->op1);
29156 if (TARGET_NEON)
29158 if (arm_evpc_neon_vuzp (d))
29159 return true;
29160 if (arm_evpc_neon_vzip (d))
29161 return true;
29162 if (arm_evpc_neon_vrev (d))
29163 return true;
29164 if (arm_evpc_neon_vtrn (d))
29165 return true;
29166 return arm_evpc_neon_vtbl (d);
29168 return false;
29171 /* Expand a vec_perm_const pattern. */
29173 bool
29174 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
29176 struct expand_vec_perm_d d;
29177 int i, nelt, which;
29179 d.target = target;
29180 d.op0 = op0;
29181 d.op1 = op1;
29183 d.vmode = GET_MODE (target);
29184 gcc_assert (VECTOR_MODE_P (d.vmode));
29185 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
29186 d.testing_p = false;
29188 for (i = which = 0; i < nelt; ++i)
29190 rtx e = XVECEXP (sel, 0, i);
29191 int ei = INTVAL (e) & (2 * nelt - 1);
29192 which |= (ei < nelt ? 1 : 2);
29193 d.perm[i] = ei;
29196 switch (which)
29198 default:
29199 gcc_unreachable();
29201 case 3:
29202 d.one_vector_p = false;
29203 if (!rtx_equal_p (op0, op1))
29204 break;
29206 /* The elements of PERM do not suggest that only the first operand
29207 is used, but both operands are identical. Allow easier matching
29208 of the permutation by folding the permutation into the single
29209 input vector. */
29210 /* FALLTHRU */
29211 case 2:
29212 for (i = 0; i < nelt; ++i)
29213 d.perm[i] &= nelt - 1;
29214 d.op0 = op1;
29215 d.one_vector_p = true;
29216 break;
29218 case 1:
29219 d.op1 = op0;
29220 d.one_vector_p = true;
29221 break;
29224 return arm_expand_vec_perm_const_1 (&d);
29227 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
29229 static bool
29230 arm_vectorize_vec_perm_const_ok (machine_mode vmode,
29231 const unsigned char *sel)
29233 struct expand_vec_perm_d d;
29234 unsigned int i, nelt, which;
29235 bool ret;
29237 d.vmode = vmode;
29238 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
29239 d.testing_p = true;
29240 memcpy (d.perm, sel, nelt);
29242 /* Categorize the set of elements in the selector. */
29243 for (i = which = 0; i < nelt; ++i)
29245 unsigned char e = d.perm[i];
29246 gcc_assert (e < 2 * nelt);
29247 which |= (e < nelt ? 1 : 2);
29250 /* For all elements from second vector, fold the elements to first. */
29251 if (which == 2)
29252 for (i = 0; i < nelt; ++i)
29253 d.perm[i] -= nelt;
29255 /* Check whether the mask can be applied to the vector type. */
29256 d.one_vector_p = (which != 3);
29258 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
29259 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
29260 if (!d.one_vector_p)
29261 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
29263 start_sequence ();
29264 ret = arm_expand_vec_perm_const_1 (&d);
29265 end_sequence ();
29267 return ret;
29270 bool
29271 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
29273 /* If we are soft float and we do not have ldrd
29274 then all auto increment forms are ok. */
29275 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
29276 return true;
29278 switch (code)
29280 /* Post increment and Pre Decrement are supported for all
29281 instruction forms except for vector forms. */
29282 case ARM_POST_INC:
29283 case ARM_PRE_DEC:
29284 if (VECTOR_MODE_P (mode))
29286 if (code != ARM_PRE_DEC)
29287 return true;
29288 else
29289 return false;
29292 return true;
29294 case ARM_POST_DEC:
29295 case ARM_PRE_INC:
29296 /* Without LDRD and mode size greater than
29297 word size, there is no point in auto-incrementing
29298 because ldm and stm will not have these forms. */
29299 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
29300 return false;
29302 /* Vector and floating point modes do not support
29303 these auto increment forms. */
29304 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
29305 return false;
29307 return true;
29309 default:
29310 return false;
29314 return false;
29317 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
29318 on ARM, since we know that shifts by negative amounts are no-ops.
29319 Additionally, the default expansion code is not available or suitable
29320 for post-reload insn splits (this can occur when the register allocator
29321 chooses not to do a shift in NEON).
29323 This function is used in both initial expand and post-reload splits, and
29324 handles all kinds of 64-bit shifts.
29326 Input requirements:
29327 - It is safe for the input and output to be the same register, but
29328 early-clobber rules apply for the shift amount and scratch registers.
29329 - Shift by register requires both scratch registers. In all other cases
29330 the scratch registers may be NULL.
29331 - Ashiftrt by a register also clobbers the CC register. */
29332 void
29333 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
29334 rtx amount, rtx scratch1, rtx scratch2)
29336 rtx out_high = gen_highpart (SImode, out);
29337 rtx out_low = gen_lowpart (SImode, out);
29338 rtx in_high = gen_highpart (SImode, in);
29339 rtx in_low = gen_lowpart (SImode, in);
29341 /* Terminology:
29342 in = the register pair containing the input value.
29343 out = the destination register pair.
29344 up = the high- or low-part of each pair.
29345 down = the opposite part to "up".
29346 In a shift, we can consider bits to shift from "up"-stream to
29347 "down"-stream, so in a left-shift "up" is the low-part and "down"
29348 is the high-part of each register pair. */
29350 rtx out_up = code == ASHIFT ? out_low : out_high;
29351 rtx out_down = code == ASHIFT ? out_high : out_low;
29352 rtx in_up = code == ASHIFT ? in_low : in_high;
29353 rtx in_down = code == ASHIFT ? in_high : in_low;
29355 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
29356 gcc_assert (out
29357 && (REG_P (out) || GET_CODE (out) == SUBREG)
29358 && GET_MODE (out) == DImode);
29359 gcc_assert (in
29360 && (REG_P (in) || GET_CODE (in) == SUBREG)
29361 && GET_MODE (in) == DImode);
29362 gcc_assert (amount
29363 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
29364 && GET_MODE (amount) == SImode)
29365 || CONST_INT_P (amount)));
29366 gcc_assert (scratch1 == NULL
29367 || (GET_CODE (scratch1) == SCRATCH)
29368 || (GET_MODE (scratch1) == SImode
29369 && REG_P (scratch1)));
29370 gcc_assert (scratch2 == NULL
29371 || (GET_CODE (scratch2) == SCRATCH)
29372 || (GET_MODE (scratch2) == SImode
29373 && REG_P (scratch2)));
29374 gcc_assert (!REG_P (out) || !REG_P (amount)
29375 || !HARD_REGISTER_P (out)
29376 || (REGNO (out) != REGNO (amount)
29377 && REGNO (out) + 1 != REGNO (amount)));
29379 /* Macros to make following code more readable. */
29380 #define SUB_32(DEST,SRC) \
29381 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
29382 #define RSB_32(DEST,SRC) \
29383 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
29384 #define SUB_S_32(DEST,SRC) \
29385 gen_addsi3_compare0 ((DEST), (SRC), \
29386 GEN_INT (-32))
29387 #define SET(DEST,SRC) \
29388 gen_rtx_SET ((DEST), (SRC))
29389 #define SHIFT(CODE,SRC,AMOUNT) \
29390 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
29391 #define LSHIFT(CODE,SRC,AMOUNT) \
29392 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
29393 SImode, (SRC), (AMOUNT))
29394 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
29395 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
29396 SImode, (SRC), (AMOUNT))
29397 #define ORR(A,B) \
29398 gen_rtx_IOR (SImode, (A), (B))
29399 #define BRANCH(COND,LABEL) \
29400 gen_arm_cond_branch ((LABEL), \
29401 gen_rtx_ ## COND (CCmode, cc_reg, \
29402 const0_rtx), \
29403 cc_reg)
29405 /* Shifts by register and shifts by constant are handled separately. */
29406 if (CONST_INT_P (amount))
29408 /* We have a shift-by-constant. */
29410 /* First, handle out-of-range shift amounts.
29411 In both cases we try to match the result an ARM instruction in a
29412 shift-by-register would give. This helps reduce execution
29413 differences between optimization levels, but it won't stop other
29414 parts of the compiler doing different things. This is "undefined
29415 behavior, in any case. */
29416 if (INTVAL (amount) <= 0)
29417 emit_insn (gen_movdi (out, in));
29418 else if (INTVAL (amount) >= 64)
29420 if (code == ASHIFTRT)
29422 rtx const31_rtx = GEN_INT (31);
29423 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
29424 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
29426 else
29427 emit_insn (gen_movdi (out, const0_rtx));
29430 /* Now handle valid shifts. */
29431 else if (INTVAL (amount) < 32)
29433 /* Shifts by a constant less than 32. */
29434 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
29436 /* Clearing the out register in DImode first avoids lots
29437 of spilling and results in less stack usage.
29438 Later this redundant insn is completely removed.
29439 Do that only if "in" and "out" are different registers. */
29440 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29441 emit_insn (SET (out, const0_rtx));
29442 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29443 emit_insn (SET (out_down,
29444 ORR (REV_LSHIFT (code, in_up, reverse_amount),
29445 out_down)));
29446 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29448 else
29450 /* Shifts by a constant greater than 31. */
29451 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
29453 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29454 emit_insn (SET (out, const0_rtx));
29455 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
29456 if (code == ASHIFTRT)
29457 emit_insn (gen_ashrsi3 (out_up, in_up,
29458 GEN_INT (31)));
29459 else
29460 emit_insn (SET (out_up, const0_rtx));
29463 else
29465 /* We have a shift-by-register. */
29466 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
29468 /* This alternative requires the scratch registers. */
29469 gcc_assert (scratch1 && REG_P (scratch1));
29470 gcc_assert (scratch2 && REG_P (scratch2));
29472 /* We will need the values "amount-32" and "32-amount" later.
29473 Swapping them around now allows the later code to be more general. */
29474 switch (code)
29476 case ASHIFT:
29477 emit_insn (SUB_32 (scratch1, amount));
29478 emit_insn (RSB_32 (scratch2, amount));
29479 break;
29480 case ASHIFTRT:
29481 emit_insn (RSB_32 (scratch1, amount));
29482 /* Also set CC = amount > 32. */
29483 emit_insn (SUB_S_32 (scratch2, amount));
29484 break;
29485 case LSHIFTRT:
29486 emit_insn (RSB_32 (scratch1, amount));
29487 emit_insn (SUB_32 (scratch2, amount));
29488 break;
29489 default:
29490 gcc_unreachable ();
29493 /* Emit code like this:
29495 arithmetic-left:
29496 out_down = in_down << amount;
29497 out_down = (in_up << (amount - 32)) | out_down;
29498 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
29499 out_up = in_up << amount;
29501 arithmetic-right:
29502 out_down = in_down >> amount;
29503 out_down = (in_up << (32 - amount)) | out_down;
29504 if (amount < 32)
29505 out_down = ((signed)in_up >> (amount - 32)) | out_down;
29506 out_up = in_up << amount;
29508 logical-right:
29509 out_down = in_down >> amount;
29510 out_down = (in_up << (32 - amount)) | out_down;
29511 if (amount < 32)
29512 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
29513 out_up = in_up << amount;
29515 The ARM and Thumb2 variants are the same but implemented slightly
29516 differently. If this were only called during expand we could just
29517 use the Thumb2 case and let combine do the right thing, but this
29518 can also be called from post-reload splitters. */
29520 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29522 if (!TARGET_THUMB2)
29524 /* Emit code for ARM mode. */
29525 emit_insn (SET (out_down,
29526 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
29527 if (code == ASHIFTRT)
29529 rtx_code_label *done_label = gen_label_rtx ();
29530 emit_jump_insn (BRANCH (LT, done_label));
29531 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
29532 out_down)));
29533 emit_label (done_label);
29535 else
29536 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
29537 out_down)));
29539 else
29541 /* Emit code for Thumb2 mode.
29542 Thumb2 can't do shift and or in one insn. */
29543 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
29544 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
29546 if (code == ASHIFTRT)
29548 rtx_code_label *done_label = gen_label_rtx ();
29549 emit_jump_insn (BRANCH (LT, done_label));
29550 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
29551 emit_insn (SET (out_down, ORR (out_down, scratch2)));
29552 emit_label (done_label);
29554 else
29556 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
29557 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
29561 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29564 #undef SUB_32
29565 #undef RSB_32
29566 #undef SUB_S_32
29567 #undef SET
29568 #undef SHIFT
29569 #undef LSHIFT
29570 #undef REV_LSHIFT
29571 #undef ORR
29572 #undef BRANCH
29575 /* Returns true if the pattern is a valid symbolic address, which is either a
29576 symbol_ref or (symbol_ref + addend).
29578 According to the ARM ELF ABI, the initial addend of REL-type relocations
29579 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
29580 literal field of the instruction as a 16-bit signed value in the range
29581 -32768 <= A < 32768. */
29583 bool
29584 arm_valid_symbolic_address_p (rtx addr)
29586 rtx xop0, xop1 = NULL_RTX;
29587 rtx tmp = addr;
29589 if (GET_CODE (tmp) == SYMBOL_REF || GET_CODE (tmp) == LABEL_REF)
29590 return true;
29592 /* (const (plus: symbol_ref const_int)) */
29593 if (GET_CODE (addr) == CONST)
29594 tmp = XEXP (addr, 0);
29596 if (GET_CODE (tmp) == PLUS)
29598 xop0 = XEXP (tmp, 0);
29599 xop1 = XEXP (tmp, 1);
29601 if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
29602 return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
29605 return false;
29608 /* Returns true if a valid comparison operation and makes
29609 the operands in a form that is valid. */
29610 bool
29611 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
29613 enum rtx_code code = GET_CODE (*comparison);
29614 int code_int;
29615 machine_mode mode = (GET_MODE (*op1) == VOIDmode)
29616 ? GET_MODE (*op2) : GET_MODE (*op1);
29618 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
29620 if (code == UNEQ || code == LTGT)
29621 return false;
29623 code_int = (int)code;
29624 arm_canonicalize_comparison (&code_int, op1, op2, 0);
29625 PUT_CODE (*comparison, (enum rtx_code)code_int);
29627 switch (mode)
29629 case E_SImode:
29630 if (!arm_add_operand (*op1, mode))
29631 *op1 = force_reg (mode, *op1);
29632 if (!arm_add_operand (*op2, mode))
29633 *op2 = force_reg (mode, *op2);
29634 return true;
29636 case E_DImode:
29637 if (!cmpdi_operand (*op1, mode))
29638 *op1 = force_reg (mode, *op1);
29639 if (!cmpdi_operand (*op2, mode))
29640 *op2 = force_reg (mode, *op2);
29641 return true;
29643 case E_HFmode:
29644 if (!TARGET_VFP_FP16INST)
29645 break;
29646 /* FP16 comparisons are done in SF mode. */
29647 mode = SFmode;
29648 *op1 = convert_to_mode (mode, *op1, 1);
29649 *op2 = convert_to_mode (mode, *op2, 1);
29650 /* Fall through. */
29651 case E_SFmode:
29652 case E_DFmode:
29653 if (!vfp_compare_operand (*op1, mode))
29654 *op1 = force_reg (mode, *op1);
29655 if (!vfp_compare_operand (*op2, mode))
29656 *op2 = force_reg (mode, *op2);
29657 return true;
29658 default:
29659 break;
29662 return false;
29666 /* Maximum number of instructions to set block of memory. */
29667 static int
29668 arm_block_set_max_insns (void)
29670 if (optimize_function_for_size_p (cfun))
29671 return 4;
29672 else
29673 return current_tune->max_insns_inline_memset;
29676 /* Return TRUE if it's profitable to set block of memory for
29677 non-vectorized case. VAL is the value to set the memory
29678 with. LENGTH is the number of bytes to set. ALIGN is the
29679 alignment of the destination memory in bytes. UNALIGNED_P
29680 is TRUE if we can only set the memory with instructions
29681 meeting alignment requirements. USE_STRD_P is TRUE if we
29682 can use strd to set the memory. */
29683 static bool
29684 arm_block_set_non_vect_profit_p (rtx val,
29685 unsigned HOST_WIDE_INT length,
29686 unsigned HOST_WIDE_INT align,
29687 bool unaligned_p, bool use_strd_p)
29689 int num = 0;
29690 /* For leftovers in bytes of 0-7, we can set the memory block using
29691 strb/strh/str with minimum instruction number. */
29692 const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
29694 if (unaligned_p)
29696 num = arm_const_inline_cost (SET, val);
29697 num += length / align + length % align;
29699 else if (use_strd_p)
29701 num = arm_const_double_inline_cost (val);
29702 num += (length >> 3) + leftover[length & 7];
29704 else
29706 num = arm_const_inline_cost (SET, val);
29707 num += (length >> 2) + leftover[length & 3];
29710 /* We may be able to combine last pair STRH/STRB into a single STR
29711 by shifting one byte back. */
29712 if (unaligned_access && length > 3 && (length & 3) == 3)
29713 num--;
29715 return (num <= arm_block_set_max_insns ());
29718 /* Return TRUE if it's profitable to set block of memory for
29719 vectorized case. LENGTH is the number of bytes to set.
29720 ALIGN is the alignment of destination memory in bytes.
29721 MODE is the vector mode used to set the memory. */
29722 static bool
29723 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
29724 unsigned HOST_WIDE_INT align,
29725 machine_mode mode)
29727 int num;
29728 bool unaligned_p = ((align & 3) != 0);
29729 unsigned int nelt = GET_MODE_NUNITS (mode);
29731 /* Instruction loading constant value. */
29732 num = 1;
29733 /* Instructions storing the memory. */
29734 num += (length + nelt - 1) / nelt;
29735 /* Instructions adjusting the address expression. Only need to
29736 adjust address expression if it's 4 bytes aligned and bytes
29737 leftover can only be stored by mis-aligned store instruction. */
29738 if (!unaligned_p && (length & 3) != 0)
29739 num++;
29741 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
29742 if (!unaligned_p && mode == V16QImode)
29743 num--;
29745 return (num <= arm_block_set_max_insns ());
29748 /* Set a block of memory using vectorization instructions for the
29749 unaligned case. We fill the first LENGTH bytes of the memory
29750 area starting from DSTBASE with byte constant VALUE. ALIGN is
29751 the alignment requirement of memory. Return TRUE if succeeded. */
29752 static bool
29753 arm_block_set_unaligned_vect (rtx dstbase,
29754 unsigned HOST_WIDE_INT length,
29755 unsigned HOST_WIDE_INT value,
29756 unsigned HOST_WIDE_INT align)
29758 unsigned int i, j, nelt_v16, nelt_v8, nelt_mode;
29759 rtx dst, mem;
29760 rtx val_elt, val_vec, reg;
29761 rtx rval[MAX_VECT_LEN];
29762 rtx (*gen_func) (rtx, rtx);
29763 machine_mode mode;
29764 unsigned HOST_WIDE_INT v = value;
29765 unsigned int offset = 0;
29766 gcc_assert ((align & 0x3) != 0);
29767 nelt_v8 = GET_MODE_NUNITS (V8QImode);
29768 nelt_v16 = GET_MODE_NUNITS (V16QImode);
29769 if (length >= nelt_v16)
29771 mode = V16QImode;
29772 gen_func = gen_movmisalignv16qi;
29774 else
29776 mode = V8QImode;
29777 gen_func = gen_movmisalignv8qi;
29779 nelt_mode = GET_MODE_NUNITS (mode);
29780 gcc_assert (length >= nelt_mode);
29781 /* Skip if it isn't profitable. */
29782 if (!arm_block_set_vect_profit_p (length, align, mode))
29783 return false;
29785 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29786 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29788 v = sext_hwi (v, BITS_PER_WORD);
29789 val_elt = GEN_INT (v);
29790 for (j = 0; j < nelt_mode; j++)
29791 rval[j] = val_elt;
29793 reg = gen_reg_rtx (mode);
29794 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
29795 /* Emit instruction loading the constant value. */
29796 emit_move_insn (reg, val_vec);
29798 /* Handle nelt_mode bytes in a vector. */
29799 for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
29801 emit_insn ((*gen_func) (mem, reg));
29802 if (i + 2 * nelt_mode <= length)
29804 emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
29805 offset += nelt_mode;
29806 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29810 /* If there are not less than nelt_v8 bytes leftover, we must be in
29811 V16QI mode. */
29812 gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
29814 /* Handle (8, 16) bytes leftover. */
29815 if (i + nelt_v8 < length)
29817 emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
29818 offset += length - i;
29819 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29821 /* We are shifting bytes back, set the alignment accordingly. */
29822 if ((length & 1) != 0 && align >= 2)
29823 set_mem_align (mem, BITS_PER_UNIT);
29825 emit_insn (gen_movmisalignv16qi (mem, reg));
29827 /* Handle (0, 8] bytes leftover. */
29828 else if (i < length && i + nelt_v8 >= length)
29830 if (mode == V16QImode)
29831 reg = gen_lowpart (V8QImode, reg);
29833 emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
29834 + (nelt_mode - nelt_v8))));
29835 offset += (length - i) + (nelt_mode - nelt_v8);
29836 mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
29838 /* We are shifting bytes back, set the alignment accordingly. */
29839 if ((length & 1) != 0 && align >= 2)
29840 set_mem_align (mem, BITS_PER_UNIT);
29842 emit_insn (gen_movmisalignv8qi (mem, reg));
29845 return true;
29848 /* Set a block of memory using vectorization instructions for the
29849 aligned case. We fill the first LENGTH bytes of the memory area
29850 starting from DSTBASE with byte constant VALUE. ALIGN is the
29851 alignment requirement of memory. Return TRUE if succeeded. */
29852 static bool
29853 arm_block_set_aligned_vect (rtx dstbase,
29854 unsigned HOST_WIDE_INT length,
29855 unsigned HOST_WIDE_INT value,
29856 unsigned HOST_WIDE_INT align)
29858 unsigned int i, j, nelt_v8, nelt_v16, nelt_mode;
29859 rtx dst, addr, mem;
29860 rtx val_elt, val_vec, reg;
29861 rtx rval[MAX_VECT_LEN];
29862 machine_mode mode;
29863 unsigned HOST_WIDE_INT v = value;
29864 unsigned int offset = 0;
29866 gcc_assert ((align & 0x3) == 0);
29867 nelt_v8 = GET_MODE_NUNITS (V8QImode);
29868 nelt_v16 = GET_MODE_NUNITS (V16QImode);
29869 if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
29870 mode = V16QImode;
29871 else
29872 mode = V8QImode;
29874 nelt_mode = GET_MODE_NUNITS (mode);
29875 gcc_assert (length >= nelt_mode);
29876 /* Skip if it isn't profitable. */
29877 if (!arm_block_set_vect_profit_p (length, align, mode))
29878 return false;
29880 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29882 v = sext_hwi (v, BITS_PER_WORD);
29883 val_elt = GEN_INT (v);
29884 for (j = 0; j < nelt_mode; j++)
29885 rval[j] = val_elt;
29887 reg = gen_reg_rtx (mode);
29888 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
29889 /* Emit instruction loading the constant value. */
29890 emit_move_insn (reg, val_vec);
29892 i = 0;
29893 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
29894 if (mode == V16QImode)
29896 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29897 emit_insn (gen_movmisalignv16qi (mem, reg));
29898 i += nelt_mode;
29899 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
29900 if (i + nelt_v8 < length && i + nelt_v16 > length)
29902 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
29903 offset += length - nelt_mode;
29904 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29905 /* We are shifting bytes back, set the alignment accordingly. */
29906 if ((length & 0x3) == 0)
29907 set_mem_align (mem, BITS_PER_UNIT * 4);
29908 else if ((length & 0x1) == 0)
29909 set_mem_align (mem, BITS_PER_UNIT * 2);
29910 else
29911 set_mem_align (mem, BITS_PER_UNIT);
29913 emit_insn (gen_movmisalignv16qi (mem, reg));
29914 return true;
29916 /* Fall through for bytes leftover. */
29917 mode = V8QImode;
29918 nelt_mode = GET_MODE_NUNITS (mode);
29919 reg = gen_lowpart (V8QImode, reg);
29922 /* Handle 8 bytes in a vector. */
29923 for (; (i + nelt_mode <= length); i += nelt_mode)
29925 addr = plus_constant (Pmode, dst, i);
29926 mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
29927 emit_move_insn (mem, reg);
29930 /* Handle single word leftover by shifting 4 bytes back. We can
29931 use aligned access for this case. */
29932 if (i + UNITS_PER_WORD == length)
29934 addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
29935 offset += i - UNITS_PER_WORD;
29936 mem = adjust_automodify_address (dstbase, mode, addr, offset);
29937 /* We are shifting 4 bytes back, set the alignment accordingly. */
29938 if (align > UNITS_PER_WORD)
29939 set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
29941 emit_move_insn (mem, reg);
29943 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
29944 We have to use unaligned access for this case. */
29945 else if (i < length)
29947 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
29948 offset += length - nelt_mode;
29949 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29950 /* We are shifting bytes back, set the alignment accordingly. */
29951 if ((length & 1) == 0)
29952 set_mem_align (mem, BITS_PER_UNIT * 2);
29953 else
29954 set_mem_align (mem, BITS_PER_UNIT);
29956 emit_insn (gen_movmisalignv8qi (mem, reg));
29959 return true;
29962 /* Set a block of memory using plain strh/strb instructions, only
29963 using instructions allowed by ALIGN on processor. We fill the
29964 first LENGTH bytes of the memory area starting from DSTBASE
29965 with byte constant VALUE. ALIGN is the alignment requirement
29966 of memory. */
29967 static bool
29968 arm_block_set_unaligned_non_vect (rtx dstbase,
29969 unsigned HOST_WIDE_INT length,
29970 unsigned HOST_WIDE_INT value,
29971 unsigned HOST_WIDE_INT align)
29973 unsigned int i;
29974 rtx dst, addr, mem;
29975 rtx val_exp, val_reg, reg;
29976 machine_mode mode;
29977 HOST_WIDE_INT v = value;
29979 gcc_assert (align == 1 || align == 2);
29981 if (align == 2)
29982 v |= (value << BITS_PER_UNIT);
29984 v = sext_hwi (v, BITS_PER_WORD);
29985 val_exp = GEN_INT (v);
29986 /* Skip if it isn't profitable. */
29987 if (!arm_block_set_non_vect_profit_p (val_exp, length,
29988 align, true, false))
29989 return false;
29991 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29992 mode = (align == 2 ? HImode : QImode);
29993 val_reg = force_reg (SImode, val_exp);
29994 reg = gen_lowpart (mode, val_reg);
29996 for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
29998 addr = plus_constant (Pmode, dst, i);
29999 mem = adjust_automodify_address (dstbase, mode, addr, i);
30000 emit_move_insn (mem, reg);
30003 /* Handle single byte leftover. */
30004 if (i + 1 == length)
30006 reg = gen_lowpart (QImode, val_reg);
30007 addr = plus_constant (Pmode, dst, i);
30008 mem = adjust_automodify_address (dstbase, QImode, addr, i);
30009 emit_move_insn (mem, reg);
30010 i++;
30013 gcc_assert (i == length);
30014 return true;
30017 /* Set a block of memory using plain strd/str/strh/strb instructions,
30018 to permit unaligned copies on processors which support unaligned
30019 semantics for those instructions. We fill the first LENGTH bytes
30020 of the memory area starting from DSTBASE with byte constant VALUE.
30021 ALIGN is the alignment requirement of memory. */
30022 static bool
30023 arm_block_set_aligned_non_vect (rtx dstbase,
30024 unsigned HOST_WIDE_INT length,
30025 unsigned HOST_WIDE_INT value,
30026 unsigned HOST_WIDE_INT align)
30028 unsigned int i;
30029 rtx dst, addr, mem;
30030 rtx val_exp, val_reg, reg;
30031 unsigned HOST_WIDE_INT v;
30032 bool use_strd_p;
30034 use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
30035 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
30037 v = (value | (value << 8) | (value << 16) | (value << 24));
30038 if (length < UNITS_PER_WORD)
30039 v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
30041 if (use_strd_p)
30042 v |= (v << BITS_PER_WORD);
30043 else
30044 v = sext_hwi (v, BITS_PER_WORD);
30046 val_exp = GEN_INT (v);
30047 /* Skip if it isn't profitable. */
30048 if (!arm_block_set_non_vect_profit_p (val_exp, length,
30049 align, false, use_strd_p))
30051 if (!use_strd_p)
30052 return false;
30054 /* Try without strd. */
30055 v = (v >> BITS_PER_WORD);
30056 v = sext_hwi (v, BITS_PER_WORD);
30057 val_exp = GEN_INT (v);
30058 use_strd_p = false;
30059 if (!arm_block_set_non_vect_profit_p (val_exp, length,
30060 align, false, use_strd_p))
30061 return false;
30064 i = 0;
30065 dst = copy_addr_to_reg (XEXP (dstbase, 0));
30066 /* Handle double words using strd if possible. */
30067 if (use_strd_p)
30069 val_reg = force_reg (DImode, val_exp);
30070 reg = val_reg;
30071 for (; (i + 8 <= length); i += 8)
30073 addr = plus_constant (Pmode, dst, i);
30074 mem = adjust_automodify_address (dstbase, DImode, addr, i);
30075 emit_move_insn (mem, reg);
30078 else
30079 val_reg = force_reg (SImode, val_exp);
30081 /* Handle words. */
30082 reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
30083 for (; (i + 4 <= length); i += 4)
30085 addr = plus_constant (Pmode, dst, i);
30086 mem = adjust_automodify_address (dstbase, SImode, addr, i);
30087 if ((align & 3) == 0)
30088 emit_move_insn (mem, reg);
30089 else
30090 emit_insn (gen_unaligned_storesi (mem, reg));
30093 /* Merge last pair of STRH and STRB into a STR if possible. */
30094 if (unaligned_access && i > 0 && (i + 3) == length)
30096 addr = plus_constant (Pmode, dst, i - 1);
30097 mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
30098 /* We are shifting one byte back, set the alignment accordingly. */
30099 if ((align & 1) == 0)
30100 set_mem_align (mem, BITS_PER_UNIT);
30102 /* Most likely this is an unaligned access, and we can't tell at
30103 compilation time. */
30104 emit_insn (gen_unaligned_storesi (mem, reg));
30105 return true;
30108 /* Handle half word leftover. */
30109 if (i + 2 <= length)
30111 reg = gen_lowpart (HImode, val_reg);
30112 addr = plus_constant (Pmode, dst, i);
30113 mem = adjust_automodify_address (dstbase, HImode, addr, i);
30114 if ((align & 1) == 0)
30115 emit_move_insn (mem, reg);
30116 else
30117 emit_insn (gen_unaligned_storehi (mem, reg));
30119 i += 2;
30122 /* Handle single byte leftover. */
30123 if (i + 1 == length)
30125 reg = gen_lowpart (QImode, val_reg);
30126 addr = plus_constant (Pmode, dst, i);
30127 mem = adjust_automodify_address (dstbase, QImode, addr, i);
30128 emit_move_insn (mem, reg);
30131 return true;
30134 /* Set a block of memory using vectorization instructions for both
30135 aligned and unaligned cases. We fill the first LENGTH bytes of
30136 the memory area starting from DSTBASE with byte constant VALUE.
30137 ALIGN is the alignment requirement of memory. */
30138 static bool
30139 arm_block_set_vect (rtx dstbase,
30140 unsigned HOST_WIDE_INT length,
30141 unsigned HOST_WIDE_INT value,
30142 unsigned HOST_WIDE_INT align)
30144 /* Check whether we need to use unaligned store instruction. */
30145 if (((align & 3) != 0 || (length & 3) != 0)
30146 /* Check whether unaligned store instruction is available. */
30147 && (!unaligned_access || BYTES_BIG_ENDIAN))
30148 return false;
30150 if ((align & 3) == 0)
30151 return arm_block_set_aligned_vect (dstbase, length, value, align);
30152 else
30153 return arm_block_set_unaligned_vect (dstbase, length, value, align);
30156 /* Expand string store operation. Firstly we try to do that by using
30157 vectorization instructions, then try with ARM unaligned access and
30158 double-word store if profitable. OPERANDS[0] is the destination,
30159 OPERANDS[1] is the number of bytes, operands[2] is the value to
30160 initialize the memory, OPERANDS[3] is the known alignment of the
30161 destination. */
30162 bool
30163 arm_gen_setmem (rtx *operands)
30165 rtx dstbase = operands[0];
30166 unsigned HOST_WIDE_INT length;
30167 unsigned HOST_WIDE_INT value;
30168 unsigned HOST_WIDE_INT align;
30170 if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
30171 return false;
30173 length = UINTVAL (operands[1]);
30174 if (length > 64)
30175 return false;
30177 value = (UINTVAL (operands[2]) & 0xFF);
30178 align = UINTVAL (operands[3]);
30179 if (TARGET_NEON && length >= 8
30180 && current_tune->string_ops_prefer_neon
30181 && arm_block_set_vect (dstbase, length, value, align))
30182 return true;
30184 if (!unaligned_access && (align & 3) != 0)
30185 return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
30187 return arm_block_set_aligned_non_vect (dstbase, length, value, align);
30191 static bool
30192 arm_macro_fusion_p (void)
30194 return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
30197 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
30198 for MOVW / MOVT macro fusion. */
30200 static bool
30201 arm_sets_movw_movt_fusible_p (rtx prev_set, rtx curr_set)
30203 /* We are trying to fuse
30204 movw imm / movt imm
30205 instructions as a group that gets scheduled together. */
30207 rtx set_dest = SET_DEST (curr_set);
30209 if (GET_MODE (set_dest) != SImode)
30210 return false;
30212 /* We are trying to match:
30213 prev (movw) == (set (reg r0) (const_int imm16))
30214 curr (movt) == (set (zero_extract (reg r0)
30215 (const_int 16)
30216 (const_int 16))
30217 (const_int imm16_1))
30219 prev (movw) == (set (reg r1)
30220 (high (symbol_ref ("SYM"))))
30221 curr (movt) == (set (reg r0)
30222 (lo_sum (reg r1)
30223 (symbol_ref ("SYM")))) */
30225 if (GET_CODE (set_dest) == ZERO_EXTRACT)
30227 if (CONST_INT_P (SET_SRC (curr_set))
30228 && CONST_INT_P (SET_SRC (prev_set))
30229 && REG_P (XEXP (set_dest, 0))
30230 && REG_P (SET_DEST (prev_set))
30231 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
30232 return true;
30235 else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
30236 && REG_P (SET_DEST (curr_set))
30237 && REG_P (SET_DEST (prev_set))
30238 && GET_CODE (SET_SRC (prev_set)) == HIGH
30239 && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
30240 return true;
30242 return false;
30245 static bool
30246 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
30248 rtx prev_set = single_set (prev);
30249 rtx curr_set = single_set (curr);
30251 if (!prev_set
30252 || !curr_set)
30253 return false;
30255 if (any_condjump_p (curr))
30256 return false;
30258 if (!arm_macro_fusion_p ())
30259 return false;
30261 if (current_tune->fusible_ops & tune_params::FUSE_AES_AESMC
30262 && aarch_crypto_can_dual_issue (prev, curr))
30263 return true;
30265 if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT
30266 && arm_sets_movw_movt_fusible_p (prev_set, curr_set))
30267 return true;
30269 return false;
30272 /* Return true iff the instruction fusion described by OP is enabled. */
30273 bool
30274 arm_fusion_enabled_p (tune_params::fuse_ops op)
30276 return current_tune->fusible_ops & op;
30279 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
30280 scheduled for speculative execution. Reject the long-running division
30281 and square-root instructions. */
30283 static bool
30284 arm_sched_can_speculate_insn (rtx_insn *insn)
30286 switch (get_attr_type (insn))
30288 case TYPE_SDIV:
30289 case TYPE_UDIV:
30290 case TYPE_FDIVS:
30291 case TYPE_FDIVD:
30292 case TYPE_FSQRTS:
30293 case TYPE_FSQRTD:
30294 case TYPE_NEON_FP_SQRT_S:
30295 case TYPE_NEON_FP_SQRT_D:
30296 case TYPE_NEON_FP_SQRT_S_Q:
30297 case TYPE_NEON_FP_SQRT_D_Q:
30298 case TYPE_NEON_FP_DIV_S:
30299 case TYPE_NEON_FP_DIV_D:
30300 case TYPE_NEON_FP_DIV_S_Q:
30301 case TYPE_NEON_FP_DIV_D_Q:
30302 return false;
30303 default:
30304 return true;
30308 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
30310 static unsigned HOST_WIDE_INT
30311 arm_asan_shadow_offset (void)
30313 return HOST_WIDE_INT_1U << 29;
30317 /* This is a temporary fix for PR60655. Ideally we need
30318 to handle most of these cases in the generic part but
30319 currently we reject minus (..) (sym_ref). We try to
30320 ameliorate the case with minus (sym_ref1) (sym_ref2)
30321 where they are in the same section. */
30323 static bool
30324 arm_const_not_ok_for_debug_p (rtx p)
30326 tree decl_op0 = NULL;
30327 tree decl_op1 = NULL;
30329 if (GET_CODE (p) == MINUS)
30331 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
30333 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
30334 if (decl_op1
30335 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
30336 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
30338 if ((VAR_P (decl_op1)
30339 || TREE_CODE (decl_op1) == CONST_DECL)
30340 && (VAR_P (decl_op0)
30341 || TREE_CODE (decl_op0) == CONST_DECL))
30342 return (get_variable_section (decl_op1, false)
30343 != get_variable_section (decl_op0, false));
30345 if (TREE_CODE (decl_op1) == LABEL_DECL
30346 && TREE_CODE (decl_op0) == LABEL_DECL)
30347 return (DECL_CONTEXT (decl_op1)
30348 != DECL_CONTEXT (decl_op0));
30351 return true;
30355 return false;
30358 /* return TRUE if x is a reference to a value in a constant pool */
30359 extern bool
30360 arm_is_constant_pool_ref (rtx x)
30362 return (MEM_P (x)
30363 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
30364 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
30367 /* Remember the last target of arm_set_current_function. */
30368 static GTY(()) tree arm_previous_fndecl;
30370 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
30372 void
30373 save_restore_target_globals (tree new_tree)
30375 /* If we have a previous state, use it. */
30376 if (TREE_TARGET_GLOBALS (new_tree))
30377 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
30378 else if (new_tree == target_option_default_node)
30379 restore_target_globals (&default_target_globals);
30380 else
30382 /* Call target_reinit and save the state for TARGET_GLOBALS. */
30383 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
30386 arm_option_params_internal ();
30389 /* Invalidate arm_previous_fndecl. */
30391 void
30392 arm_reset_previous_fndecl (void)
30394 arm_previous_fndecl = NULL_TREE;
30397 /* Establish appropriate back-end context for processing the function
30398 FNDECL. The argument might be NULL to indicate processing at top
30399 level, outside of any function scope. */
30401 static void
30402 arm_set_current_function (tree fndecl)
30404 if (!fndecl || fndecl == arm_previous_fndecl)
30405 return;
30407 tree old_tree = (arm_previous_fndecl
30408 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
30409 : NULL_TREE);
30411 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30413 /* If current function has no attributes but previous one did,
30414 use the default node. */
30415 if (! new_tree && old_tree)
30416 new_tree = target_option_default_node;
30418 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
30419 the default have been handled by save_restore_target_globals from
30420 arm_pragma_target_parse. */
30421 if (old_tree == new_tree)
30422 return;
30424 arm_previous_fndecl = fndecl;
30426 /* First set the target options. */
30427 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
30429 save_restore_target_globals (new_tree);
30432 /* Implement TARGET_OPTION_PRINT. */
30434 static void
30435 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
30437 int flags = ptr->x_target_flags;
30438 const char *fpu_name;
30440 fpu_name = (ptr->x_arm_fpu_index == TARGET_FPU_auto
30441 ? "auto" : all_fpus[ptr->x_arm_fpu_index].name);
30443 fprintf (file, "%*sselected isa %s\n", indent, "",
30444 TARGET_THUMB2_P (flags) ? "thumb2" :
30445 TARGET_THUMB_P (flags) ? "thumb1" :
30446 "arm");
30448 if (ptr->x_arm_arch_string)
30449 fprintf (file, "%*sselected architecture %s\n", indent, "",
30450 ptr->x_arm_arch_string);
30452 if (ptr->x_arm_cpu_string)
30453 fprintf (file, "%*sselected CPU %s\n", indent, "",
30454 ptr->x_arm_cpu_string);
30456 if (ptr->x_arm_tune_string)
30457 fprintf (file, "%*sselected tune %s\n", indent, "",
30458 ptr->x_arm_tune_string);
30460 fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_name);
30463 /* Hook to determine if one function can safely inline another. */
30465 static bool
30466 arm_can_inline_p (tree caller, tree callee)
30468 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
30469 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
30470 bool can_inline = true;
30472 struct cl_target_option *caller_opts
30473 = TREE_TARGET_OPTION (caller_tree ? caller_tree
30474 : target_option_default_node);
30476 struct cl_target_option *callee_opts
30477 = TREE_TARGET_OPTION (callee_tree ? callee_tree
30478 : target_option_default_node);
30480 if (callee_opts == caller_opts)
30481 return true;
30483 /* Callee's ISA features should be a subset of the caller's. */
30484 struct arm_build_target caller_target;
30485 struct arm_build_target callee_target;
30486 caller_target.isa = sbitmap_alloc (isa_num_bits);
30487 callee_target.isa = sbitmap_alloc (isa_num_bits);
30489 arm_configure_build_target (&caller_target, caller_opts, &global_options_set,
30490 false);
30491 arm_configure_build_target (&callee_target, callee_opts, &global_options_set,
30492 false);
30493 if (!bitmap_subset_p (callee_target.isa, caller_target.isa))
30494 can_inline = false;
30496 sbitmap_free (caller_target.isa);
30497 sbitmap_free (callee_target.isa);
30499 /* OK to inline between different modes.
30500 Function with mode specific instructions, e.g using asm,
30501 must be explicitly protected with noinline. */
30502 return can_inline;
30505 /* Hook to fix function's alignment affected by target attribute. */
30507 static void
30508 arm_relayout_function (tree fndecl)
30510 if (DECL_USER_ALIGN (fndecl))
30511 return;
30513 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30515 if (!callee_tree)
30516 callee_tree = target_option_default_node;
30518 struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
30519 SET_DECL_ALIGN
30520 (fndecl,
30521 FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts->x_target_flags)));
30524 /* Inner function to process the attribute((target(...))), take an argument and
30525 set the current options from the argument. If we have a list, recursively
30526 go over the list. */
30528 static bool
30529 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
30531 if (TREE_CODE (args) == TREE_LIST)
30533 bool ret = true;
30535 for (; args; args = TREE_CHAIN (args))
30536 if (TREE_VALUE (args)
30537 && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
30538 ret = false;
30539 return ret;
30542 else if (TREE_CODE (args) != STRING_CST)
30544 error ("attribute %<target%> argument not a string");
30545 return false;
30548 char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
30549 char *q;
30551 while ((q = strtok (argstr, ",")) != NULL)
30553 while (ISSPACE (*q)) ++q;
30555 argstr = NULL;
30556 if (!strncmp (q, "thumb", 5))
30557 opts->x_target_flags |= MASK_THUMB;
30559 else if (!strncmp (q, "arm", 3))
30560 opts->x_target_flags &= ~MASK_THUMB;
30562 else if (!strncmp (q, "fpu=", 4))
30564 int fpu_index;
30565 if (! opt_enum_arg_to_value (OPT_mfpu_, q+4,
30566 &fpu_index, CL_TARGET))
30568 error ("invalid fpu for attribute(target(\"%s\"))", q);
30569 return false;
30571 if (fpu_index == TARGET_FPU_auto)
30573 /* This doesn't really make sense until we support
30574 general dynamic selection of the architecture and all
30575 sub-features. */
30576 sorry ("auto fpu selection not currently permitted here");
30577 return false;
30579 opts->x_arm_fpu_index = (enum fpu_type) fpu_index;
30581 else
30583 error ("attribute(target(\"%s\")) is unknown", q);
30584 return false;
30588 return true;
30591 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
30593 tree
30594 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
30595 struct gcc_options *opts_set)
30597 struct cl_target_option cl_opts;
30599 if (!arm_valid_target_attribute_rec (args, opts))
30600 return NULL_TREE;
30602 cl_target_option_save (&cl_opts, opts);
30603 arm_configure_build_target (&arm_active_target, &cl_opts, opts_set, false);
30604 arm_option_check_internal (opts);
30605 /* Do any overrides, such as global options arch=xxx. */
30606 arm_option_override_internal (opts, opts_set);
30608 return build_target_option_node (opts);
30611 static void
30612 add_attribute (const char * mode, tree *attributes)
30614 size_t len = strlen (mode);
30615 tree value = build_string (len, mode);
30617 TREE_TYPE (value) = build_array_type (char_type_node,
30618 build_index_type (size_int (len)));
30620 *attributes = tree_cons (get_identifier ("target"),
30621 build_tree_list (NULL_TREE, value),
30622 *attributes);
30625 /* For testing. Insert thumb or arm modes alternatively on functions. */
30627 static void
30628 arm_insert_attributes (tree fndecl, tree * attributes)
30630 const char *mode;
30632 if (! TARGET_FLIP_THUMB)
30633 return;
30635 if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
30636 || DECL_BUILT_IN (fndecl) || DECL_ARTIFICIAL (fndecl))
30637 return;
30639 /* Nested definitions must inherit mode. */
30640 if (current_function_decl)
30642 mode = TARGET_THUMB ? "thumb" : "arm";
30643 add_attribute (mode, attributes);
30644 return;
30647 /* If there is already a setting don't change it. */
30648 if (lookup_attribute ("target", *attributes) != NULL)
30649 return;
30651 mode = thumb_flipper ? "thumb" : "arm";
30652 add_attribute (mode, attributes);
30654 thumb_flipper = !thumb_flipper;
30657 /* Hook to validate attribute((target("string"))). */
30659 static bool
30660 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
30661 tree args, int ARG_UNUSED (flags))
30663 bool ret = true;
30664 struct gcc_options func_options;
30665 tree cur_tree, new_optimize;
30666 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
30668 /* Get the optimization options of the current function. */
30669 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
30671 /* If the function changed the optimization levels as well as setting target
30672 options, start with the optimizations specified. */
30673 if (!func_optimize)
30674 func_optimize = optimization_default_node;
30676 /* Init func_options. */
30677 memset (&func_options, 0, sizeof (func_options));
30678 init_options_struct (&func_options, NULL);
30679 lang_hooks.init_options_struct (&func_options);
30681 /* Initialize func_options to the defaults. */
30682 cl_optimization_restore (&func_options,
30683 TREE_OPTIMIZATION (func_optimize));
30685 cl_target_option_restore (&func_options,
30686 TREE_TARGET_OPTION (target_option_default_node));
30688 /* Set func_options flags with new target mode. */
30689 cur_tree = arm_valid_target_attribute_tree (args, &func_options,
30690 &global_options_set);
30692 if (cur_tree == NULL_TREE)
30693 ret = false;
30695 new_optimize = build_optimization_node (&func_options);
30697 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
30699 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
30701 finalize_options_struct (&func_options);
30703 return ret;
30706 /* Match an ISA feature bitmap to a named FPU. We always use the
30707 first entry that exactly matches the feature set, so that we
30708 effectively canonicalize the FPU name for the assembler. */
30709 static const char*
30710 arm_identify_fpu_from_isa (sbitmap isa)
30712 auto_sbitmap fpubits (isa_num_bits);
30713 auto_sbitmap cand_fpubits (isa_num_bits);
30715 bitmap_and (fpubits, isa, isa_all_fpubits);
30717 /* If there are no ISA feature bits relating to the FPU, we must be
30718 doing soft-float. */
30719 if (bitmap_empty_p (fpubits))
30720 return "softvfp";
30722 for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
30724 arm_initialize_isa (cand_fpubits, all_fpus[i].isa_bits);
30725 if (bitmap_equal_p (fpubits, cand_fpubits))
30726 return all_fpus[i].name;
30728 /* We must find an entry, or things have gone wrong. */
30729 gcc_unreachable ();
30732 void
30733 arm_declare_function_name (FILE *stream, const char *name, tree decl)
30736 fprintf (stream, "\t.syntax unified\n");
30738 if (TARGET_THUMB)
30740 if (is_called_in_ARM_mode (decl)
30741 || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
30742 && cfun->is_thunk))
30743 fprintf (stream, "\t.code 32\n");
30744 else if (TARGET_THUMB1)
30745 fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
30746 else
30747 fprintf (stream, "\t.thumb\n\t.thumb_func\n");
30749 else
30750 fprintf (stream, "\t.arm\n");
30752 asm_fprintf (asm_out_file, "\t.fpu %s\n",
30753 (TARGET_SOFT_FLOAT
30754 ? "softvfp"
30755 : arm_identify_fpu_from_isa (arm_active_target.isa)));
30757 if (TARGET_POKE_FUNCTION_NAME)
30758 arm_poke_function_name (stream, (const char *) name);
30761 /* If MEM is in the form of [base+offset], extract the two parts
30762 of address and set to BASE and OFFSET, otherwise return false
30763 after clearing BASE and OFFSET. */
30765 static bool
30766 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
30768 rtx addr;
30770 gcc_assert (MEM_P (mem));
30772 addr = XEXP (mem, 0);
30774 /* Strip off const from addresses like (const (addr)). */
30775 if (GET_CODE (addr) == CONST)
30776 addr = XEXP (addr, 0);
30778 if (GET_CODE (addr) == REG)
30780 *base = addr;
30781 *offset = const0_rtx;
30782 return true;
30785 if (GET_CODE (addr) == PLUS
30786 && GET_CODE (XEXP (addr, 0)) == REG
30787 && CONST_INT_P (XEXP (addr, 1)))
30789 *base = XEXP (addr, 0);
30790 *offset = XEXP (addr, 1);
30791 return true;
30794 *base = NULL_RTX;
30795 *offset = NULL_RTX;
30797 return false;
30800 /* If INSN is a load or store of address in the form of [base+offset],
30801 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
30802 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
30803 otherwise return FALSE. */
30805 static bool
30806 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
30808 rtx x, dest, src;
30810 gcc_assert (INSN_P (insn));
30811 x = PATTERN (insn);
30812 if (GET_CODE (x) != SET)
30813 return false;
30815 src = SET_SRC (x);
30816 dest = SET_DEST (x);
30817 if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
30819 *is_load = false;
30820 extract_base_offset_in_addr (dest, base, offset);
30822 else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
30824 *is_load = true;
30825 extract_base_offset_in_addr (src, base, offset);
30827 else
30828 return false;
30830 return (*base != NULL_RTX && *offset != NULL_RTX);
30833 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
30835 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
30836 and PRI are only calculated for these instructions. For other instruction,
30837 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
30838 instruction fusion can be supported by returning different priorities.
30840 It's important that irrelevant instructions get the largest FUSION_PRI. */
30842 static void
30843 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
30844 int *fusion_pri, int *pri)
30846 int tmp, off_val;
30847 bool is_load;
30848 rtx base, offset;
30850 gcc_assert (INSN_P (insn));
30852 tmp = max_pri - 1;
30853 if (!fusion_load_store (insn, &base, &offset, &is_load))
30855 *pri = tmp;
30856 *fusion_pri = tmp;
30857 return;
30860 /* Load goes first. */
30861 if (is_load)
30862 *fusion_pri = tmp - 1;
30863 else
30864 *fusion_pri = tmp - 2;
30866 tmp /= 2;
30868 /* INSN with smaller base register goes first. */
30869 tmp -= ((REGNO (base) & 0xff) << 20);
30871 /* INSN with smaller offset goes first. */
30872 off_val = (int)(INTVAL (offset));
30873 if (off_val >= 0)
30874 tmp -= (off_val & 0xfffff);
30875 else
30876 tmp += ((- off_val) & 0xfffff);
30878 *pri = tmp;
30879 return;
30883 /* Construct and return a PARALLEL RTX vector with elements numbering the
30884 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
30885 the vector - from the perspective of the architecture. This does not
30886 line up with GCC's perspective on lane numbers, so we end up with
30887 different masks depending on our target endian-ness. The diagram
30888 below may help. We must draw the distinction when building masks
30889 which select one half of the vector. An instruction selecting
30890 architectural low-lanes for a big-endian target, must be described using
30891 a mask selecting GCC high-lanes.
30893 Big-Endian Little-Endian
30895 GCC 0 1 2 3 3 2 1 0
30896 | x | x | x | x | | x | x | x | x |
30897 Architecture 3 2 1 0 3 2 1 0
30899 Low Mask: { 2, 3 } { 0, 1 }
30900 High Mask: { 0, 1 } { 2, 3 }
30904 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
30906 int nunits = GET_MODE_NUNITS (mode);
30907 rtvec v = rtvec_alloc (nunits / 2);
30908 int high_base = nunits / 2;
30909 int low_base = 0;
30910 int base;
30911 rtx t1;
30912 int i;
30914 if (BYTES_BIG_ENDIAN)
30915 base = high ? low_base : high_base;
30916 else
30917 base = high ? high_base : low_base;
30919 for (i = 0; i < nunits / 2; i++)
30920 RTVEC_ELT (v, i) = GEN_INT (base + i);
30922 t1 = gen_rtx_PARALLEL (mode, v);
30923 return t1;
30926 /* Check OP for validity as a PARALLEL RTX vector with elements
30927 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
30928 from the perspective of the architecture. See the diagram above
30929 arm_simd_vect_par_cnst_half_p for more details. */
30931 bool
30932 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
30933 bool high)
30935 rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
30936 HOST_WIDE_INT count_op = XVECLEN (op, 0);
30937 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
30938 int i = 0;
30940 if (!VECTOR_MODE_P (mode))
30941 return false;
30943 if (count_op != count_ideal)
30944 return false;
30946 for (i = 0; i < count_ideal; i++)
30948 rtx elt_op = XVECEXP (op, 0, i);
30949 rtx elt_ideal = XVECEXP (ideal, 0, i);
30951 if (!CONST_INT_P (elt_op)
30952 || INTVAL (elt_ideal) != INTVAL (elt_op))
30953 return false;
30955 return true;
30958 /* Can output mi_thunk for all cases except for non-zero vcall_offset
30959 in Thumb1. */
30960 static bool
30961 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
30962 const_tree)
30964 /* For now, we punt and not handle this for TARGET_THUMB1. */
30965 if (vcall_offset && TARGET_THUMB1)
30966 return false;
30968 /* Otherwise ok. */
30969 return true;
30972 /* Generate RTL for a conditional branch with rtx comparison CODE in
30973 mode CC_MODE. The destination of the unlikely conditional branch
30974 is LABEL_REF. */
30976 void
30977 arm_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
30978 rtx label_ref)
30980 rtx x;
30981 x = gen_rtx_fmt_ee (code, VOIDmode,
30982 gen_rtx_REG (cc_mode, CC_REGNUM),
30983 const0_rtx);
30985 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
30986 gen_rtx_LABEL_REF (VOIDmode, label_ref),
30987 pc_rtx);
30988 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
30991 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
30993 For pure-code sections there is no letter code for this attribute, so
30994 output all the section flags numerically when this is needed. */
30996 static bool
30997 arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num)
31000 if (flags & SECTION_ARM_PURECODE)
31002 *num = 0x20000000;
31004 if (!(flags & SECTION_DEBUG))
31005 *num |= 0x2;
31006 if (flags & SECTION_EXCLUDE)
31007 *num |= 0x80000000;
31008 if (flags & SECTION_WRITE)
31009 *num |= 0x1;
31010 if (flags & SECTION_CODE)
31011 *num |= 0x4;
31012 if (flags & SECTION_MERGE)
31013 *num |= 0x10;
31014 if (flags & SECTION_STRINGS)
31015 *num |= 0x20;
31016 if (flags & SECTION_TLS)
31017 *num |= 0x400;
31018 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
31019 *num |= 0x200;
31021 return true;
31024 return false;
31027 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
31029 If pure-code is passed as an option, make sure all functions are in
31030 sections that have the SHF_ARM_PURECODE attribute. */
31032 static section *
31033 arm_function_section (tree decl, enum node_frequency freq,
31034 bool startup, bool exit)
31036 const char * section_name;
31037 section * sec;
31039 if (!decl || TREE_CODE (decl) != FUNCTION_DECL)
31040 return default_function_section (decl, freq, startup, exit);
31042 if (!target_pure_code)
31043 return default_function_section (decl, freq, startup, exit);
31046 section_name = DECL_SECTION_NAME (decl);
31048 /* If a function is not in a named section then it falls under the 'default'
31049 text section, also known as '.text'. We can preserve previous behavior as
31050 the default text section already has the SHF_ARM_PURECODE section
31051 attribute. */
31052 if (!section_name)
31054 section *default_sec = default_function_section (decl, freq, startup,
31055 exit);
31057 /* If default_sec is not null, then it must be a special section like for
31058 example .text.startup. We set the pure-code attribute and return the
31059 same section to preserve existing behavior. */
31060 if (default_sec)
31061 default_sec->common.flags |= SECTION_ARM_PURECODE;
31062 return default_sec;
31065 /* Otherwise look whether a section has already been created with
31066 'section_name'. */
31067 sec = get_named_section (decl, section_name, 0);
31068 if (!sec)
31069 /* If that is not the case passing NULL as the section's name to
31070 'get_named_section' will create a section with the declaration's
31071 section name. */
31072 sec = get_named_section (decl, NULL, 0);
31074 /* Set the SHF_ARM_PURECODE attribute. */
31075 sec->common.flags |= SECTION_ARM_PURECODE;
31077 return sec;
31080 /* Implements the TARGET_SECTION_FLAGS hook.
31082 If DECL is a function declaration and pure-code is passed as an option
31083 then add the SFH_ARM_PURECODE attribute to the section flags. NAME is the
31084 section's name and RELOC indicates whether the declarations initializer may
31085 contain runtime relocations. */
31087 static unsigned int
31088 arm_elf_section_type_flags (tree decl, const char *name, int reloc)
31090 unsigned int flags = default_section_type_flags (decl, name, reloc);
31092 if (decl && TREE_CODE (decl) == FUNCTION_DECL && target_pure_code)
31093 flags |= SECTION_ARM_PURECODE;
31095 return flags;
31098 /* Generate call to __aeabi_[mode]divmod (op0, op1). */
31100 static void
31101 arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
31102 rtx op0, rtx op1,
31103 rtx *quot_p, rtx *rem_p)
31105 if (mode == SImode)
31106 gcc_assert (!TARGET_IDIV);
31108 scalar_int_mode libval_mode
31109 = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode));
31111 rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
31112 libval_mode,
31113 op0, GET_MODE (op0),
31114 op1, GET_MODE (op1));
31116 rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
31117 rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
31118 GET_MODE_SIZE (mode));
31120 gcc_assert (quotient);
31121 gcc_assert (remainder);
31123 *quot_p = quotient;
31124 *rem_p = remainder;
31127 /* This function checks for the availability of the coprocessor builtin passed
31128 in BUILTIN for the current target. Returns true if it is available and
31129 false otherwise. If a BUILTIN is passed for which this function has not
31130 been implemented it will cause an exception. */
31132 bool
31133 arm_coproc_builtin_available (enum unspecv builtin)
31135 /* None of these builtins are available in Thumb mode if the target only
31136 supports Thumb-1. */
31137 if (TARGET_THUMB1)
31138 return false;
31140 switch (builtin)
31142 case VUNSPEC_CDP:
31143 case VUNSPEC_LDC:
31144 case VUNSPEC_LDCL:
31145 case VUNSPEC_STC:
31146 case VUNSPEC_STCL:
31147 case VUNSPEC_MCR:
31148 case VUNSPEC_MRC:
31149 if (arm_arch4)
31150 return true;
31151 break;
31152 case VUNSPEC_CDP2:
31153 case VUNSPEC_LDC2:
31154 case VUNSPEC_LDC2L:
31155 case VUNSPEC_STC2:
31156 case VUNSPEC_STC2L:
31157 case VUNSPEC_MCR2:
31158 case VUNSPEC_MRC2:
31159 /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
31160 ARMv8-{A,M}. */
31161 if (arm_arch5)
31162 return true;
31163 break;
31164 case VUNSPEC_MCRR:
31165 case VUNSPEC_MRRC:
31166 /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
31167 ARMv8-{A,M}. */
31168 if (arm_arch6 || arm_arch5te)
31169 return true;
31170 break;
31171 case VUNSPEC_MCRR2:
31172 case VUNSPEC_MRRC2:
31173 if (arm_arch6)
31174 return true;
31175 break;
31176 default:
31177 gcc_unreachable ();
31179 return false;
31182 /* This function returns true if OP is a valid memory operand for the ldc and
31183 stc coprocessor instructions and false otherwise. */
31185 bool
31186 arm_coproc_ldc_stc_legitimate_address (rtx op)
31188 HOST_WIDE_INT range;
31189 /* Has to be a memory operand. */
31190 if (!MEM_P (op))
31191 return false;
31193 op = XEXP (op, 0);
31195 /* We accept registers. */
31196 if (REG_P (op))
31197 return true;
31199 switch GET_CODE (op)
31201 case PLUS:
31203 /* Or registers with an offset. */
31204 if (!REG_P (XEXP (op, 0)))
31205 return false;
31207 op = XEXP (op, 1);
31209 /* The offset must be an immediate though. */
31210 if (!CONST_INT_P (op))
31211 return false;
31213 range = INTVAL (op);
31215 /* Within the range of [-1020,1020]. */
31216 if (!IN_RANGE (range, -1020, 1020))
31217 return false;
31219 /* And a multiple of 4. */
31220 return (range % 4) == 0;
31222 case PRE_INC:
31223 case POST_INC:
31224 case PRE_DEC:
31225 case POST_DEC:
31226 return REG_P (XEXP (op, 0));
31227 default:
31228 gcc_unreachable ();
31230 return false;
31233 #if CHECKING_P
31234 namespace selftest {
31236 /* Scan the static data tables generated by parsecpu.awk looking for
31237 potential issues with the data. We primarily check for
31238 inconsistencies in the option extensions at present (extensions
31239 that duplicate others but aren't marked as aliases). Furthermore,
31240 for correct canonicalization later options must never be a subset
31241 of an earlier option. Any extension should also only specify other
31242 feature bits and never an architecture bit. The architecture is inferred
31243 from the declaration of the extension. */
31244 static void
31245 arm_test_cpu_arch_data (void)
31247 const arch_option *arch;
31248 const cpu_option *cpu;
31249 auto_sbitmap target_isa (isa_num_bits);
31250 auto_sbitmap isa1 (isa_num_bits);
31251 auto_sbitmap isa2 (isa_num_bits);
31253 for (arch = all_architectures; arch->common.name != NULL; ++arch)
31255 const cpu_arch_extension *ext1, *ext2;
31257 if (arch->common.extensions == NULL)
31258 continue;
31260 arm_initialize_isa (target_isa, arch->common.isa_bits);
31262 for (ext1 = arch->common.extensions; ext1->name != NULL; ++ext1)
31264 if (ext1->alias)
31265 continue;
31267 arm_initialize_isa (isa1, ext1->isa_bits);
31268 for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
31270 if (ext2->alias || ext1->remove != ext2->remove)
31271 continue;
31273 arm_initialize_isa (isa2, ext2->isa_bits);
31274 /* If the option is a subset of the parent option, it doesn't
31275 add anything and so isn't useful. */
31276 ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
31278 /* If the extension specifies any architectural bits then
31279 disallow it. Extensions should only specify feature bits. */
31280 ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
31285 for (cpu = all_cores; cpu->common.name != NULL; ++cpu)
31287 const cpu_arch_extension *ext1, *ext2;
31289 if (cpu->common.extensions == NULL)
31290 continue;
31292 arm_initialize_isa (target_isa, arch->common.isa_bits);
31294 for (ext1 = cpu->common.extensions; ext1->name != NULL; ++ext1)
31296 if (ext1->alias)
31297 continue;
31299 arm_initialize_isa (isa1, ext1->isa_bits);
31300 for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
31302 if (ext2->alias || ext1->remove != ext2->remove)
31303 continue;
31305 arm_initialize_isa (isa2, ext2->isa_bits);
31306 /* If the option is a subset of the parent option, it doesn't
31307 add anything and so isn't useful. */
31308 ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
31310 /* If the extension specifies any architectural bits then
31311 disallow it. Extensions should only specify feature bits. */
31312 ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
31318 static void
31319 arm_run_selftests (void)
31321 arm_test_cpu_arch_data ();
31323 } /* Namespace selftest. */
31325 #undef TARGET_RUN_TARGET_SELFTESTS
31326 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
31327 #endif /* CHECKING_P */
31329 struct gcc_target targetm = TARGET_INITIALIZER;
31331 #include "gt-arm.h"