Fix numerous typos in comments
[official-gcc.git] / gcc / config / arm / arm.c
blobbc1e607c42c9ba12a473e4df8ef51f788e041f62
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2017 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "target.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "memmodel.h"
31 #include "cfghooks.h"
32 #include "df.h"
33 #include "tm_p.h"
34 #include "stringpool.h"
35 #include "optabs.h"
36 #include "regs.h"
37 #include "emit-rtl.h"
38 #include "recog.h"
39 #include "cgraph.h"
40 #include "diagnostic-core.h"
41 #include "alias.h"
42 #include "fold-const.h"
43 #include "stor-layout.h"
44 #include "calls.h"
45 #include "varasm.h"
46 #include "output.h"
47 #include "insn-attr.h"
48 #include "flags.h"
49 #include "reload.h"
50 #include "explow.h"
51 #include "expr.h"
52 #include "cfgrtl.h"
53 #include "sched-int.h"
54 #include "common/common-target.h"
55 #include "langhooks.h"
56 #include "intl.h"
57 #include "libfuncs.h"
58 #include "params.h"
59 #include "opts.h"
60 #include "dumpfile.h"
61 #include "target-globals.h"
62 #include "builtins.h"
63 #include "tm-constrs.h"
64 #include "rtl-iter.h"
65 #include "optabs-libfuncs.h"
66 #include "gimplify.h"
68 /* This file should be included last. */
69 #include "target-def.h"
71 /* Forward definitions of types. */
72 typedef struct minipool_node Mnode;
73 typedef struct minipool_fixup Mfix;
75 void (*arm_lang_output_object_attributes_hook)(void);
77 struct four_ints
79 int i[4];
82 /* Forward function declarations. */
83 static bool arm_const_not_ok_for_debug_p (rtx);
84 static bool arm_needs_doubleword_align (machine_mode, const_tree);
85 static int arm_compute_static_chain_stack_bytes (void);
86 static arm_stack_offsets *arm_get_frame_offsets (void);
87 static void arm_add_gc_roots (void);
88 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
89 unsigned HOST_WIDE_INT, rtx, rtx, int, int);
90 static unsigned bit_count (unsigned long);
91 static unsigned bitmap_popcount (const sbitmap);
92 static int arm_address_register_rtx_p (rtx, int);
93 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
94 static bool is_called_in_ARM_mode (tree);
95 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
96 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
97 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
98 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
99 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
100 inline static int thumb1_index_register_rtx_p (rtx, int);
101 static int thumb_far_jump_used_p (void);
102 static bool thumb_force_lr_save (void);
103 static unsigned arm_size_return_regs (void);
104 static bool arm_assemble_integer (rtx, unsigned int, int);
105 static void arm_print_operand (FILE *, rtx, int);
106 static void arm_print_operand_address (FILE *, machine_mode, rtx);
107 static bool arm_print_operand_punct_valid_p (unsigned char code);
108 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
109 static arm_cc get_arm_condition_code (rtx);
110 static const char *output_multi_immediate (rtx *, const char *, const char *,
111 int, HOST_WIDE_INT);
112 static const char *shift_op (rtx, HOST_WIDE_INT *);
113 static struct machine_function *arm_init_machine_status (void);
114 static void thumb_exit (FILE *, int);
115 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
116 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
117 static Mnode *add_minipool_forward_ref (Mfix *);
118 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
119 static Mnode *add_minipool_backward_ref (Mfix *);
120 static void assign_minipool_offsets (Mfix *);
121 static void arm_print_value (FILE *, rtx);
122 static void dump_minipool (rtx_insn *);
123 static int arm_barrier_cost (rtx_insn *);
124 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
125 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
126 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
127 machine_mode, rtx);
128 static void arm_reorg (void);
129 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
130 static unsigned long arm_compute_save_reg0_reg12_mask (void);
131 static unsigned long arm_compute_save_reg_mask (void);
132 static unsigned long arm_isr_value (tree);
133 static unsigned long arm_compute_func_type (void);
134 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
135 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
136 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
137 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
138 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
139 #endif
140 static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *);
141 static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree, int, bool *);
142 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
143 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
144 static int arm_comp_type_attributes (const_tree, const_tree);
145 static void arm_set_default_type_attributes (tree);
146 static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
147 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
148 static int optimal_immediate_sequence (enum rtx_code code,
149 unsigned HOST_WIDE_INT val,
150 struct four_ints *return_sequence);
151 static int optimal_immediate_sequence_1 (enum rtx_code code,
152 unsigned HOST_WIDE_INT val,
153 struct four_ints *return_sequence,
154 int i);
155 static int arm_get_strip_length (int);
156 static bool arm_function_ok_for_sibcall (tree, tree);
157 static machine_mode arm_promote_function_mode (const_tree,
158 machine_mode, int *,
159 const_tree, int);
160 static bool arm_return_in_memory (const_tree, const_tree);
161 static rtx arm_function_value (const_tree, const_tree, bool);
162 static rtx arm_libcall_value_1 (machine_mode);
163 static rtx arm_libcall_value (machine_mode, const_rtx);
164 static bool arm_function_value_regno_p (const unsigned int);
165 static void arm_internal_label (FILE *, const char *, unsigned long);
166 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
167 tree);
168 static bool arm_have_conditional_execution (void);
169 static bool arm_cannot_force_const_mem (machine_mode, rtx);
170 static bool arm_legitimate_constant_p (machine_mode, rtx);
171 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
172 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
173 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
174 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
175 static void emit_constant_insn (rtx cond, rtx pattern);
176 static rtx_insn *emit_set_insn (rtx, rtx);
177 static rtx emit_multi_reg_push (unsigned long, unsigned long);
178 static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
179 tree, bool);
180 static rtx arm_function_arg (cumulative_args_t, machine_mode,
181 const_tree, bool);
182 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
183 const_tree, bool);
184 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
185 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
186 const_tree);
187 static rtx aapcs_libcall_value (machine_mode);
188 static int aapcs_select_return_coproc (const_tree, const_tree);
190 #ifdef OBJECT_FORMAT_ELF
191 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
192 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
193 #endif
194 #ifndef ARM_PE
195 static void arm_encode_section_info (tree, rtx, int);
196 #endif
198 static void arm_file_end (void);
199 static void arm_file_start (void);
200 static void arm_insert_attributes (tree, tree *);
202 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
203 tree, int *, int);
204 static bool arm_pass_by_reference (cumulative_args_t,
205 machine_mode, const_tree, bool);
206 static bool arm_promote_prototypes (const_tree);
207 static bool arm_default_short_enums (void);
208 static bool arm_align_anon_bitfield (void);
209 static bool arm_return_in_msb (const_tree);
210 static bool arm_must_pass_in_stack (machine_mode, const_tree);
211 static bool arm_return_in_memory (const_tree, const_tree);
212 #if ARM_UNWIND_INFO
213 static void arm_unwind_emit (FILE *, rtx_insn *);
214 static bool arm_output_ttype (rtx);
215 static void arm_asm_emit_except_personality (rtx);
216 #endif
217 static void arm_asm_init_sections (void);
218 static rtx arm_dwarf_register_span (rtx);
220 static tree arm_cxx_guard_type (void);
221 static bool arm_cxx_guard_mask_bit (void);
222 static tree arm_get_cookie_size (tree);
223 static bool arm_cookie_has_size (void);
224 static bool arm_cxx_cdtor_returns_this (void);
225 static bool arm_cxx_key_method_may_be_inline (void);
226 static void arm_cxx_determine_class_data_visibility (tree);
227 static bool arm_cxx_class_data_always_comdat (void);
228 static bool arm_cxx_use_aeabi_atexit (void);
229 static void arm_init_libfuncs (void);
230 static tree arm_build_builtin_va_list (void);
231 static void arm_expand_builtin_va_start (tree, rtx);
232 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
233 static void arm_option_override (void);
234 static void arm_option_restore (struct gcc_options *,
235 struct cl_target_option *);
236 static void arm_override_options_after_change (void);
237 static void arm_option_print (FILE *, int, struct cl_target_option *);
238 static void arm_set_current_function (tree);
239 static bool arm_can_inline_p (tree, tree);
240 static void arm_relayout_function (tree);
241 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
242 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
243 static bool arm_sched_can_speculate_insn (rtx_insn *);
244 static bool arm_macro_fusion_p (void);
245 static bool arm_cannot_copy_insn_p (rtx_insn *);
246 static int arm_issue_rate (void);
247 static int arm_first_cycle_multipass_dfa_lookahead (void);
248 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
249 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
250 static bool arm_output_addr_const_extra (FILE *, rtx);
251 static bool arm_allocate_stack_slots_for_args (void);
252 static bool arm_warn_func_return (tree);
253 static tree arm_promoted_type (const_tree t);
254 static bool arm_scalar_mode_supported_p (machine_mode);
255 static bool arm_frame_pointer_required (void);
256 static bool arm_can_eliminate (const int, const int);
257 static void arm_asm_trampoline_template (FILE *);
258 static void arm_trampoline_init (rtx, tree, rtx);
259 static rtx arm_trampoline_adjust_address (rtx);
260 static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg);
261 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
262 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
263 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
264 static bool arm_array_mode_supported_p (machine_mode,
265 unsigned HOST_WIDE_INT);
266 static machine_mode arm_preferred_simd_mode (machine_mode);
267 static bool arm_class_likely_spilled_p (reg_class_t);
268 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
269 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
270 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
271 const_tree type,
272 int misalignment,
273 bool is_packed);
274 static void arm_conditional_register_usage (void);
275 static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
276 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
277 static unsigned int arm_autovectorize_vector_sizes (void);
278 static int arm_default_branch_cost (bool, bool);
279 static int arm_cortex_a5_branch_cost (bool, bool);
280 static int arm_cortex_m_branch_cost (bool, bool);
281 static int arm_cortex_m7_branch_cost (bool, bool);
283 static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode,
284 const unsigned char *sel);
286 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
288 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
289 tree vectype,
290 int misalign ATTRIBUTE_UNUSED);
291 static unsigned arm_add_stmt_cost (void *data, int count,
292 enum vect_cost_for_stmt kind,
293 struct _stmt_vec_info *stmt_info,
294 int misalign,
295 enum vect_cost_model_location where);
297 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
298 bool op0_preserve_value);
299 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
301 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
302 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
303 const_tree);
304 static section *arm_function_section (tree, enum node_frequency, bool, bool);
305 static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num);
306 static unsigned int arm_elf_section_type_flags (tree decl, const char *name,
307 int reloc);
308 static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *);
309 static machine_mode arm_floatn_mode (int, bool);
311 /* Table of machine attributes. */
312 static const struct attribute_spec arm_attribute_table[] =
314 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
315 affects_type_identity } */
316 /* Function calls made to this symbol must be done indirectly, because
317 it may lie outside of the 26 bit addressing range of a normal function
318 call. */
319 { "long_call", 0, 0, false, true, true, NULL, false },
320 /* Whereas these functions are always known to reside within the 26 bit
321 addressing range. */
322 { "short_call", 0, 0, false, true, true, NULL, false },
323 /* Specify the procedure call conventions for a function. */
324 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
325 false },
326 /* Interrupt Service Routines have special prologue and epilogue requirements. */
327 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
328 false },
329 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
330 false },
331 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
332 false },
333 #ifdef ARM_PE
334 /* ARM/PE has three new attributes:
335 interfacearm - ?
336 dllexport - for exporting a function/variable that will live in a dll
337 dllimport - for importing a function/variable from a dll
339 Microsoft allows multiple declspecs in one __declspec, separating
340 them with spaces. We do NOT support this. Instead, use __declspec
341 multiple times.
343 { "dllimport", 0, 0, true, false, false, NULL, false },
344 { "dllexport", 0, 0, true, false, false, NULL, false },
345 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
346 false },
347 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
348 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
349 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
350 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
351 false },
352 #endif
353 /* ARMv8-M Security Extensions support. */
354 { "cmse_nonsecure_entry", 0, 0, true, false, false,
355 arm_handle_cmse_nonsecure_entry, false },
356 { "cmse_nonsecure_call", 0, 0, true, false, false,
357 arm_handle_cmse_nonsecure_call, true },
358 { NULL, 0, 0, false, false, false, NULL, false }
361 /* Initialize the GCC target structure. */
362 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
363 #undef TARGET_MERGE_DECL_ATTRIBUTES
364 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
365 #endif
367 #undef TARGET_LEGITIMIZE_ADDRESS
368 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
370 #undef TARGET_ATTRIBUTE_TABLE
371 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
373 #undef TARGET_INSERT_ATTRIBUTES
374 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
376 #undef TARGET_ASM_FILE_START
377 #define TARGET_ASM_FILE_START arm_file_start
378 #undef TARGET_ASM_FILE_END
379 #define TARGET_ASM_FILE_END arm_file_end
381 #undef TARGET_ASM_ALIGNED_SI_OP
382 #define TARGET_ASM_ALIGNED_SI_OP NULL
383 #undef TARGET_ASM_INTEGER
384 #define TARGET_ASM_INTEGER arm_assemble_integer
386 #undef TARGET_PRINT_OPERAND
387 #define TARGET_PRINT_OPERAND arm_print_operand
388 #undef TARGET_PRINT_OPERAND_ADDRESS
389 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
390 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
391 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
393 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
394 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
396 #undef TARGET_ASM_FUNCTION_PROLOGUE
397 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
399 #undef TARGET_ASM_FUNCTION_EPILOGUE
400 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
402 #undef TARGET_CAN_INLINE_P
403 #define TARGET_CAN_INLINE_P arm_can_inline_p
405 #undef TARGET_RELAYOUT_FUNCTION
406 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
408 #undef TARGET_OPTION_OVERRIDE
409 #define TARGET_OPTION_OVERRIDE arm_option_override
411 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
412 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
414 #undef TARGET_OPTION_RESTORE
415 #define TARGET_OPTION_RESTORE arm_option_restore
417 #undef TARGET_OPTION_PRINT
418 #define TARGET_OPTION_PRINT arm_option_print
420 #undef TARGET_COMP_TYPE_ATTRIBUTES
421 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
423 #undef TARGET_SCHED_CAN_SPECULATE_INSN
424 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
426 #undef TARGET_SCHED_MACRO_FUSION_P
427 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
429 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
430 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
432 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
433 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
435 #undef TARGET_SCHED_ADJUST_COST
436 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
438 #undef TARGET_SET_CURRENT_FUNCTION
439 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
441 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
442 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
444 #undef TARGET_SCHED_REORDER
445 #define TARGET_SCHED_REORDER arm_sched_reorder
447 #undef TARGET_REGISTER_MOVE_COST
448 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
450 #undef TARGET_MEMORY_MOVE_COST
451 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
453 #undef TARGET_ENCODE_SECTION_INFO
454 #ifdef ARM_PE
455 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
456 #else
457 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
458 #endif
460 #undef TARGET_STRIP_NAME_ENCODING
461 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
463 #undef TARGET_ASM_INTERNAL_LABEL
464 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
466 #undef TARGET_FLOATN_MODE
467 #define TARGET_FLOATN_MODE arm_floatn_mode
469 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
470 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
472 #undef TARGET_FUNCTION_VALUE
473 #define TARGET_FUNCTION_VALUE arm_function_value
475 #undef TARGET_LIBCALL_VALUE
476 #define TARGET_LIBCALL_VALUE arm_libcall_value
478 #undef TARGET_FUNCTION_VALUE_REGNO_P
479 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
481 #undef TARGET_ASM_OUTPUT_MI_THUNK
482 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
483 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
484 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
486 #undef TARGET_RTX_COSTS
487 #define TARGET_RTX_COSTS arm_rtx_costs
488 #undef TARGET_ADDRESS_COST
489 #define TARGET_ADDRESS_COST arm_address_cost
491 #undef TARGET_SHIFT_TRUNCATION_MASK
492 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
493 #undef TARGET_VECTOR_MODE_SUPPORTED_P
494 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
495 #undef TARGET_ARRAY_MODE_SUPPORTED_P
496 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
497 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
498 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
499 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
500 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
501 arm_autovectorize_vector_sizes
503 #undef TARGET_MACHINE_DEPENDENT_REORG
504 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
506 #undef TARGET_INIT_BUILTINS
507 #define TARGET_INIT_BUILTINS arm_init_builtins
508 #undef TARGET_EXPAND_BUILTIN
509 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
510 #undef TARGET_BUILTIN_DECL
511 #define TARGET_BUILTIN_DECL arm_builtin_decl
513 #undef TARGET_INIT_LIBFUNCS
514 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
516 #undef TARGET_PROMOTE_FUNCTION_MODE
517 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
518 #undef TARGET_PROMOTE_PROTOTYPES
519 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
520 #undef TARGET_PASS_BY_REFERENCE
521 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
522 #undef TARGET_ARG_PARTIAL_BYTES
523 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
524 #undef TARGET_FUNCTION_ARG
525 #define TARGET_FUNCTION_ARG arm_function_arg
526 #undef TARGET_FUNCTION_ARG_ADVANCE
527 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
528 #undef TARGET_FUNCTION_ARG_BOUNDARY
529 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
531 #undef TARGET_SETUP_INCOMING_VARARGS
532 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
534 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
535 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
537 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
538 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
539 #undef TARGET_TRAMPOLINE_INIT
540 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
541 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
542 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
544 #undef TARGET_WARN_FUNC_RETURN
545 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
547 #undef TARGET_DEFAULT_SHORT_ENUMS
548 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
550 #undef TARGET_ALIGN_ANON_BITFIELD
551 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
553 #undef TARGET_NARROW_VOLATILE_BITFIELD
554 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
556 #undef TARGET_CXX_GUARD_TYPE
557 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
559 #undef TARGET_CXX_GUARD_MASK_BIT
560 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
562 #undef TARGET_CXX_GET_COOKIE_SIZE
563 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
565 #undef TARGET_CXX_COOKIE_HAS_SIZE
566 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
568 #undef TARGET_CXX_CDTOR_RETURNS_THIS
569 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
571 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
572 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
574 #undef TARGET_CXX_USE_AEABI_ATEXIT
575 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
577 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
578 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
579 arm_cxx_determine_class_data_visibility
581 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
582 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
584 #undef TARGET_RETURN_IN_MSB
585 #define TARGET_RETURN_IN_MSB arm_return_in_msb
587 #undef TARGET_RETURN_IN_MEMORY
588 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
590 #undef TARGET_MUST_PASS_IN_STACK
591 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
593 #if ARM_UNWIND_INFO
594 #undef TARGET_ASM_UNWIND_EMIT
595 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
597 /* EABI unwinding tables use a different format for the typeinfo tables. */
598 #undef TARGET_ASM_TTYPE
599 #define TARGET_ASM_TTYPE arm_output_ttype
601 #undef TARGET_ARM_EABI_UNWINDER
602 #define TARGET_ARM_EABI_UNWINDER true
604 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
605 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
607 #endif /* ARM_UNWIND_INFO */
609 #undef TARGET_ASM_INIT_SECTIONS
610 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
612 #undef TARGET_DWARF_REGISTER_SPAN
613 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
615 #undef TARGET_CANNOT_COPY_INSN_P
616 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
618 #ifdef HAVE_AS_TLS
619 #undef TARGET_HAVE_TLS
620 #define TARGET_HAVE_TLS true
621 #endif
623 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
624 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
626 #undef TARGET_LEGITIMATE_CONSTANT_P
627 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
629 #undef TARGET_CANNOT_FORCE_CONST_MEM
630 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
632 #undef TARGET_MAX_ANCHOR_OFFSET
633 #define TARGET_MAX_ANCHOR_OFFSET 4095
635 /* The minimum is set such that the total size of the block
636 for a particular anchor is -4088 + 1 + 4095 bytes, which is
637 divisible by eight, ensuring natural spacing of anchors. */
638 #undef TARGET_MIN_ANCHOR_OFFSET
639 #define TARGET_MIN_ANCHOR_OFFSET -4088
641 #undef TARGET_SCHED_ISSUE_RATE
642 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
644 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
645 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
646 arm_first_cycle_multipass_dfa_lookahead
648 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
649 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
650 arm_first_cycle_multipass_dfa_lookahead_guard
652 #undef TARGET_MANGLE_TYPE
653 #define TARGET_MANGLE_TYPE arm_mangle_type
655 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
656 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
658 #undef TARGET_BUILD_BUILTIN_VA_LIST
659 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
660 #undef TARGET_EXPAND_BUILTIN_VA_START
661 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
662 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
663 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
665 #ifdef HAVE_AS_TLS
666 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
667 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
668 #endif
670 #undef TARGET_LEGITIMATE_ADDRESS_P
671 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
673 #undef TARGET_PREFERRED_RELOAD_CLASS
674 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
676 #undef TARGET_PROMOTED_TYPE
677 #define TARGET_PROMOTED_TYPE arm_promoted_type
679 #undef TARGET_SCALAR_MODE_SUPPORTED_P
680 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
682 #undef TARGET_FRAME_POINTER_REQUIRED
683 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
685 #undef TARGET_CAN_ELIMINATE
686 #define TARGET_CAN_ELIMINATE arm_can_eliminate
688 #undef TARGET_CONDITIONAL_REGISTER_USAGE
689 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
691 #undef TARGET_CLASS_LIKELY_SPILLED_P
692 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
694 #undef TARGET_VECTORIZE_BUILTINS
695 #define TARGET_VECTORIZE_BUILTINS
697 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
698 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
699 arm_builtin_vectorized_function
701 #undef TARGET_VECTOR_ALIGNMENT
702 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
704 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
705 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
706 arm_vector_alignment_reachable
708 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
709 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
710 arm_builtin_support_vector_misalignment
712 #undef TARGET_PREFERRED_RENAME_CLASS
713 #define TARGET_PREFERRED_RENAME_CLASS \
714 arm_preferred_rename_class
716 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
717 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
718 arm_vectorize_vec_perm_const_ok
720 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
721 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
722 arm_builtin_vectorization_cost
723 #undef TARGET_VECTORIZE_ADD_STMT_COST
724 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
726 #undef TARGET_CANONICALIZE_COMPARISON
727 #define TARGET_CANONICALIZE_COMPARISON \
728 arm_canonicalize_comparison
730 #undef TARGET_ASAN_SHADOW_OFFSET
731 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
733 #undef MAX_INSN_PER_IT_BLOCK
734 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
736 #undef TARGET_CAN_USE_DOLOOP_P
737 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
739 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
740 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
742 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
743 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
745 #undef TARGET_SCHED_FUSION_PRIORITY
746 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
748 #undef TARGET_ASM_FUNCTION_SECTION
749 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
751 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
752 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
754 #undef TARGET_SECTION_TYPE_FLAGS
755 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
757 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
758 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
760 #undef TARGET_C_EXCESS_PRECISION
761 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
763 /* Although the architecture reserves bits 0 and 1, only the former is
764 used for ARM/Thumb ISA selection in v7 and earlier versions. */
765 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
766 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
768 struct gcc_target targetm = TARGET_INITIALIZER;
770 /* Obstack for minipool constant handling. */
771 static struct obstack minipool_obstack;
772 static char * minipool_startobj;
774 /* The maximum number of insns skipped which
775 will be conditionalised if possible. */
776 static int max_insns_skipped = 5;
778 extern FILE * asm_out_file;
780 /* True if we are currently building a constant table. */
781 int making_const_table;
783 /* The processor for which instructions should be scheduled. */
784 enum processor_type arm_tune = TARGET_CPU_arm_none;
786 /* The current tuning set. */
787 const struct tune_params *current_tune;
789 /* Which floating point hardware to schedule for. */
790 int arm_fpu_attr;
792 /* Used for Thumb call_via trampolines. */
793 rtx thumb_call_via_label[14];
794 static int thumb_call_reg_needed;
796 /* The bits in this mask specify which instruction scheduling options should
797 be used. */
798 unsigned int tune_flags = 0;
800 /* The highest ARM architecture version supported by the
801 target. */
802 enum base_architecture arm_base_arch = BASE_ARCH_0;
804 /* Active target architecture and tuning. */
806 struct arm_build_target arm_active_target;
808 /* The following are used in the arm.md file as equivalents to bits
809 in the above two flag variables. */
811 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
812 int arm_arch3m = 0;
814 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
815 int arm_arch4 = 0;
817 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
818 int arm_arch4t = 0;
820 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
821 int arm_arch5 = 0;
823 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
824 int arm_arch5e = 0;
826 /* Nonzero if this chip supports the ARM Architecture 5TE extensions. */
827 int arm_arch5te = 0;
829 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
830 int arm_arch6 = 0;
832 /* Nonzero if this chip supports the ARM 6K extensions. */
833 int arm_arch6k = 0;
835 /* Nonzero if this chip supports the ARM 6KZ extensions. */
836 int arm_arch6kz = 0;
838 /* Nonzero if instructions present in ARMv6-M can be used. */
839 int arm_arch6m = 0;
841 /* Nonzero if this chip supports the ARM 7 extensions. */
842 int arm_arch7 = 0;
844 /* Nonzero if this chip supports the Large Physical Address Extension. */
845 int arm_arch_lpae = 0;
847 /* Nonzero if instructions not present in the 'M' profile can be used. */
848 int arm_arch_notm = 0;
850 /* Nonzero if instructions present in ARMv7E-M can be used. */
851 int arm_arch7em = 0;
853 /* Nonzero if instructions present in ARMv8 can be used. */
854 int arm_arch8 = 0;
856 /* Nonzero if this chip supports the ARMv8.1 extensions. */
857 int arm_arch8_1 = 0;
859 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */
860 int arm_arch8_2 = 0;
862 /* Nonzero if this chip supports the FP16 instructions extension of ARM
863 Architecture 8.2. */
864 int arm_fp16_inst = 0;
866 /* Nonzero if this chip can benefit from load scheduling. */
867 int arm_ld_sched = 0;
869 /* Nonzero if this chip is a StrongARM. */
870 int arm_tune_strongarm = 0;
872 /* Nonzero if this chip supports Intel Wireless MMX technology. */
873 int arm_arch_iwmmxt = 0;
875 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
876 int arm_arch_iwmmxt2 = 0;
878 /* Nonzero if this chip is an XScale. */
879 int arm_arch_xscale = 0;
881 /* Nonzero if tuning for XScale */
882 int arm_tune_xscale = 0;
884 /* Nonzero if we want to tune for stores that access the write-buffer.
885 This typically means an ARM6 or ARM7 with MMU or MPU. */
886 int arm_tune_wbuf = 0;
888 /* Nonzero if tuning for Cortex-A9. */
889 int arm_tune_cortex_a9 = 0;
891 /* Nonzero if we should define __THUMB_INTERWORK__ in the
892 preprocessor.
893 XXX This is a bit of a hack, it's intended to help work around
894 problems in GLD which doesn't understand that armv5t code is
895 interworking clean. */
896 int arm_cpp_interwork = 0;
898 /* Nonzero if chip supports Thumb 1. */
899 int arm_arch_thumb1;
901 /* Nonzero if chip supports Thumb 2. */
902 int arm_arch_thumb2;
904 /* Nonzero if chip supports integer division instruction. */
905 int arm_arch_arm_hwdiv;
906 int arm_arch_thumb_hwdiv;
908 /* Nonzero if chip disallows volatile memory access in IT block. */
909 int arm_arch_no_volatile_ce;
911 /* Nonzero if we should use Neon to handle 64-bits operations rather
912 than core registers. */
913 int prefer_neon_for_64bits = 0;
915 /* Nonzero if we shouldn't use literal pools. */
916 bool arm_disable_literal_pool = false;
918 /* The register number to be used for the PIC offset register. */
919 unsigned arm_pic_register = INVALID_REGNUM;
921 enum arm_pcs arm_pcs_default;
923 /* For an explanation of these variables, see final_prescan_insn below. */
924 int arm_ccfsm_state;
925 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
926 enum arm_cond_code arm_current_cc;
928 rtx arm_target_insn;
929 int arm_target_label;
930 /* The number of conditionally executed insns, including the current insn. */
931 int arm_condexec_count = 0;
932 /* A bitmask specifying the patterns for the IT block.
933 Zero means do not output an IT block before this insn. */
934 int arm_condexec_mask = 0;
935 /* The number of bits used in arm_condexec_mask. */
936 int arm_condexec_masklen = 0;
938 /* Nonzero if chip supports the ARMv8 CRC instructions. */
939 int arm_arch_crc = 0;
941 /* Nonzero if chip supports the ARMv8-M security extensions. */
942 int arm_arch_cmse = 0;
944 /* Nonzero if the core has a very small, high-latency, multiply unit. */
945 int arm_m_profile_small_mul = 0;
947 /* The condition codes of the ARM, and the inverse function. */
948 static const char * const arm_condition_codes[] =
950 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
951 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
954 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
955 int arm_regs_in_sequence[] =
957 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
960 #define ARM_LSL_NAME "lsl"
961 #define streq(string1, string2) (strcmp (string1, string2) == 0)
963 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
964 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
965 | (1 << PIC_OFFSET_TABLE_REGNUM)))
967 /* Initialization code. */
969 struct processors
971 const char *const name;
972 enum processor_type core;
973 unsigned int tune_flags;
974 const char *arch;
975 enum base_architecture base_arch;
976 enum isa_feature isa_bits[isa_num_bits];
977 const struct tune_params *const tune;
981 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
982 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
984 num_slots, \
985 l1_size, \
986 l1_line_size \
989 /* arm generic vectorizer costs. */
990 static const
991 struct cpu_vec_costs arm_default_vec_cost = {
992 1, /* scalar_stmt_cost. */
993 1, /* scalar load_cost. */
994 1, /* scalar_store_cost. */
995 1, /* vec_stmt_cost. */
996 1, /* vec_to_scalar_cost. */
997 1, /* scalar_to_vec_cost. */
998 1, /* vec_align_load_cost. */
999 1, /* vec_unalign_load_cost. */
1000 1, /* vec_unalign_store_cost. */
1001 1, /* vec_store_cost. */
1002 3, /* cond_taken_branch_cost. */
1003 1, /* cond_not_taken_branch_cost. */
1006 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
1007 #include "aarch-cost-tables.h"
1011 const struct cpu_cost_table cortexa9_extra_costs =
1013 /* ALU */
1015 0, /* arith. */
1016 0, /* logical. */
1017 0, /* shift. */
1018 COSTS_N_INSNS (1), /* shift_reg. */
1019 COSTS_N_INSNS (1), /* arith_shift. */
1020 COSTS_N_INSNS (2), /* arith_shift_reg. */
1021 0, /* log_shift. */
1022 COSTS_N_INSNS (1), /* log_shift_reg. */
1023 COSTS_N_INSNS (1), /* extend. */
1024 COSTS_N_INSNS (2), /* extend_arith. */
1025 COSTS_N_INSNS (1), /* bfi. */
1026 COSTS_N_INSNS (1), /* bfx. */
1027 0, /* clz. */
1028 0, /* rev. */
1029 0, /* non_exec. */
1030 true /* non_exec_costs_exec. */
1033 /* MULT SImode */
1035 COSTS_N_INSNS (3), /* simple. */
1036 COSTS_N_INSNS (3), /* flag_setting. */
1037 COSTS_N_INSNS (2), /* extend. */
1038 COSTS_N_INSNS (3), /* add. */
1039 COSTS_N_INSNS (2), /* extend_add. */
1040 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1042 /* MULT DImode */
1044 0, /* simple (N/A). */
1045 0, /* flag_setting (N/A). */
1046 COSTS_N_INSNS (4), /* extend. */
1047 0, /* add (N/A). */
1048 COSTS_N_INSNS (4), /* extend_add. */
1049 0 /* idiv (N/A). */
1052 /* LD/ST */
1054 COSTS_N_INSNS (2), /* load. */
1055 COSTS_N_INSNS (2), /* load_sign_extend. */
1056 COSTS_N_INSNS (2), /* ldrd. */
1057 COSTS_N_INSNS (2), /* ldm_1st. */
1058 1, /* ldm_regs_per_insn_1st. */
1059 2, /* ldm_regs_per_insn_subsequent. */
1060 COSTS_N_INSNS (5), /* loadf. */
1061 COSTS_N_INSNS (5), /* loadd. */
1062 COSTS_N_INSNS (1), /* load_unaligned. */
1063 COSTS_N_INSNS (2), /* store. */
1064 COSTS_N_INSNS (2), /* strd. */
1065 COSTS_N_INSNS (2), /* stm_1st. */
1066 1, /* stm_regs_per_insn_1st. */
1067 2, /* stm_regs_per_insn_subsequent. */
1068 COSTS_N_INSNS (1), /* storef. */
1069 COSTS_N_INSNS (1), /* stored. */
1070 COSTS_N_INSNS (1), /* store_unaligned. */
1071 COSTS_N_INSNS (1), /* loadv. */
1072 COSTS_N_INSNS (1) /* storev. */
1075 /* FP SFmode */
1077 COSTS_N_INSNS (14), /* div. */
1078 COSTS_N_INSNS (4), /* mult. */
1079 COSTS_N_INSNS (7), /* mult_addsub. */
1080 COSTS_N_INSNS (30), /* fma. */
1081 COSTS_N_INSNS (3), /* addsub. */
1082 COSTS_N_INSNS (1), /* fpconst. */
1083 COSTS_N_INSNS (1), /* neg. */
1084 COSTS_N_INSNS (3), /* compare. */
1085 COSTS_N_INSNS (3), /* widen. */
1086 COSTS_N_INSNS (3), /* narrow. */
1087 COSTS_N_INSNS (3), /* toint. */
1088 COSTS_N_INSNS (3), /* fromint. */
1089 COSTS_N_INSNS (3) /* roundint. */
1091 /* FP DFmode */
1093 COSTS_N_INSNS (24), /* div. */
1094 COSTS_N_INSNS (5), /* mult. */
1095 COSTS_N_INSNS (8), /* mult_addsub. */
1096 COSTS_N_INSNS (30), /* fma. */
1097 COSTS_N_INSNS (3), /* addsub. */
1098 COSTS_N_INSNS (1), /* fpconst. */
1099 COSTS_N_INSNS (1), /* neg. */
1100 COSTS_N_INSNS (3), /* compare. */
1101 COSTS_N_INSNS (3), /* widen. */
1102 COSTS_N_INSNS (3), /* narrow. */
1103 COSTS_N_INSNS (3), /* toint. */
1104 COSTS_N_INSNS (3), /* fromint. */
1105 COSTS_N_INSNS (3) /* roundint. */
1108 /* Vector */
1110 COSTS_N_INSNS (1) /* alu. */
1114 const struct cpu_cost_table cortexa8_extra_costs =
1116 /* ALU */
1118 0, /* arith. */
1119 0, /* logical. */
1120 COSTS_N_INSNS (1), /* shift. */
1121 0, /* shift_reg. */
1122 COSTS_N_INSNS (1), /* arith_shift. */
1123 0, /* arith_shift_reg. */
1124 COSTS_N_INSNS (1), /* log_shift. */
1125 0, /* log_shift_reg. */
1126 0, /* extend. */
1127 0, /* extend_arith. */
1128 0, /* bfi. */
1129 0, /* bfx. */
1130 0, /* clz. */
1131 0, /* rev. */
1132 0, /* non_exec. */
1133 true /* non_exec_costs_exec. */
1136 /* MULT SImode */
1138 COSTS_N_INSNS (1), /* simple. */
1139 COSTS_N_INSNS (1), /* flag_setting. */
1140 COSTS_N_INSNS (1), /* extend. */
1141 COSTS_N_INSNS (1), /* add. */
1142 COSTS_N_INSNS (1), /* extend_add. */
1143 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1145 /* MULT DImode */
1147 0, /* simple (N/A). */
1148 0, /* flag_setting (N/A). */
1149 COSTS_N_INSNS (2), /* extend. */
1150 0, /* add (N/A). */
1151 COSTS_N_INSNS (2), /* extend_add. */
1152 0 /* idiv (N/A). */
1155 /* LD/ST */
1157 COSTS_N_INSNS (1), /* load. */
1158 COSTS_N_INSNS (1), /* load_sign_extend. */
1159 COSTS_N_INSNS (1), /* ldrd. */
1160 COSTS_N_INSNS (1), /* ldm_1st. */
1161 1, /* ldm_regs_per_insn_1st. */
1162 2, /* ldm_regs_per_insn_subsequent. */
1163 COSTS_N_INSNS (1), /* loadf. */
1164 COSTS_N_INSNS (1), /* loadd. */
1165 COSTS_N_INSNS (1), /* load_unaligned. */
1166 COSTS_N_INSNS (1), /* store. */
1167 COSTS_N_INSNS (1), /* strd. */
1168 COSTS_N_INSNS (1), /* stm_1st. */
1169 1, /* stm_regs_per_insn_1st. */
1170 2, /* stm_regs_per_insn_subsequent. */
1171 COSTS_N_INSNS (1), /* storef. */
1172 COSTS_N_INSNS (1), /* stored. */
1173 COSTS_N_INSNS (1), /* store_unaligned. */
1174 COSTS_N_INSNS (1), /* loadv. */
1175 COSTS_N_INSNS (1) /* storev. */
1178 /* FP SFmode */
1180 COSTS_N_INSNS (36), /* div. */
1181 COSTS_N_INSNS (11), /* mult. */
1182 COSTS_N_INSNS (20), /* mult_addsub. */
1183 COSTS_N_INSNS (30), /* fma. */
1184 COSTS_N_INSNS (9), /* addsub. */
1185 COSTS_N_INSNS (3), /* fpconst. */
1186 COSTS_N_INSNS (3), /* neg. */
1187 COSTS_N_INSNS (6), /* compare. */
1188 COSTS_N_INSNS (4), /* widen. */
1189 COSTS_N_INSNS (4), /* narrow. */
1190 COSTS_N_INSNS (8), /* toint. */
1191 COSTS_N_INSNS (8), /* fromint. */
1192 COSTS_N_INSNS (8) /* roundint. */
1194 /* FP DFmode */
1196 COSTS_N_INSNS (64), /* div. */
1197 COSTS_N_INSNS (16), /* mult. */
1198 COSTS_N_INSNS (25), /* mult_addsub. */
1199 COSTS_N_INSNS (30), /* fma. */
1200 COSTS_N_INSNS (9), /* addsub. */
1201 COSTS_N_INSNS (3), /* fpconst. */
1202 COSTS_N_INSNS (3), /* neg. */
1203 COSTS_N_INSNS (6), /* compare. */
1204 COSTS_N_INSNS (6), /* widen. */
1205 COSTS_N_INSNS (6), /* narrow. */
1206 COSTS_N_INSNS (8), /* toint. */
1207 COSTS_N_INSNS (8), /* fromint. */
1208 COSTS_N_INSNS (8) /* roundint. */
1211 /* Vector */
1213 COSTS_N_INSNS (1) /* alu. */
1217 const struct cpu_cost_table cortexa5_extra_costs =
1219 /* ALU */
1221 0, /* arith. */
1222 0, /* logical. */
1223 COSTS_N_INSNS (1), /* shift. */
1224 COSTS_N_INSNS (1), /* shift_reg. */
1225 COSTS_N_INSNS (1), /* arith_shift. */
1226 COSTS_N_INSNS (1), /* arith_shift_reg. */
1227 COSTS_N_INSNS (1), /* log_shift. */
1228 COSTS_N_INSNS (1), /* log_shift_reg. */
1229 COSTS_N_INSNS (1), /* extend. */
1230 COSTS_N_INSNS (1), /* extend_arith. */
1231 COSTS_N_INSNS (1), /* bfi. */
1232 COSTS_N_INSNS (1), /* bfx. */
1233 COSTS_N_INSNS (1), /* clz. */
1234 COSTS_N_INSNS (1), /* rev. */
1235 0, /* non_exec. */
1236 true /* non_exec_costs_exec. */
1240 /* MULT SImode */
1242 0, /* simple. */
1243 COSTS_N_INSNS (1), /* flag_setting. */
1244 COSTS_N_INSNS (1), /* extend. */
1245 COSTS_N_INSNS (1), /* add. */
1246 COSTS_N_INSNS (1), /* extend_add. */
1247 COSTS_N_INSNS (7) /* idiv. */
1249 /* MULT DImode */
1251 0, /* simple (N/A). */
1252 0, /* flag_setting (N/A). */
1253 COSTS_N_INSNS (1), /* extend. */
1254 0, /* add. */
1255 COSTS_N_INSNS (2), /* extend_add. */
1256 0 /* idiv (N/A). */
1259 /* LD/ST */
1261 COSTS_N_INSNS (1), /* load. */
1262 COSTS_N_INSNS (1), /* load_sign_extend. */
1263 COSTS_N_INSNS (6), /* ldrd. */
1264 COSTS_N_INSNS (1), /* ldm_1st. */
1265 1, /* ldm_regs_per_insn_1st. */
1266 2, /* ldm_regs_per_insn_subsequent. */
1267 COSTS_N_INSNS (2), /* loadf. */
1268 COSTS_N_INSNS (4), /* loadd. */
1269 COSTS_N_INSNS (1), /* load_unaligned. */
1270 COSTS_N_INSNS (1), /* store. */
1271 COSTS_N_INSNS (3), /* strd. */
1272 COSTS_N_INSNS (1), /* stm_1st. */
1273 1, /* stm_regs_per_insn_1st. */
1274 2, /* stm_regs_per_insn_subsequent. */
1275 COSTS_N_INSNS (2), /* storef. */
1276 COSTS_N_INSNS (2), /* stored. */
1277 COSTS_N_INSNS (1), /* store_unaligned. */
1278 COSTS_N_INSNS (1), /* loadv. */
1279 COSTS_N_INSNS (1) /* storev. */
1282 /* FP SFmode */
1284 COSTS_N_INSNS (15), /* div. */
1285 COSTS_N_INSNS (3), /* mult. */
1286 COSTS_N_INSNS (7), /* mult_addsub. */
1287 COSTS_N_INSNS (7), /* fma. */
1288 COSTS_N_INSNS (3), /* addsub. */
1289 COSTS_N_INSNS (3), /* fpconst. */
1290 COSTS_N_INSNS (3), /* neg. */
1291 COSTS_N_INSNS (3), /* compare. */
1292 COSTS_N_INSNS (3), /* widen. */
1293 COSTS_N_INSNS (3), /* narrow. */
1294 COSTS_N_INSNS (3), /* toint. */
1295 COSTS_N_INSNS (3), /* fromint. */
1296 COSTS_N_INSNS (3) /* roundint. */
1298 /* FP DFmode */
1300 COSTS_N_INSNS (30), /* div. */
1301 COSTS_N_INSNS (6), /* mult. */
1302 COSTS_N_INSNS (10), /* mult_addsub. */
1303 COSTS_N_INSNS (7), /* fma. */
1304 COSTS_N_INSNS (3), /* addsub. */
1305 COSTS_N_INSNS (3), /* fpconst. */
1306 COSTS_N_INSNS (3), /* neg. */
1307 COSTS_N_INSNS (3), /* compare. */
1308 COSTS_N_INSNS (3), /* widen. */
1309 COSTS_N_INSNS (3), /* narrow. */
1310 COSTS_N_INSNS (3), /* toint. */
1311 COSTS_N_INSNS (3), /* fromint. */
1312 COSTS_N_INSNS (3) /* roundint. */
1315 /* Vector */
1317 COSTS_N_INSNS (1) /* alu. */
1322 const struct cpu_cost_table cortexa7_extra_costs =
1324 /* ALU */
1326 0, /* arith. */
1327 0, /* logical. */
1328 COSTS_N_INSNS (1), /* shift. */
1329 COSTS_N_INSNS (1), /* shift_reg. */
1330 COSTS_N_INSNS (1), /* arith_shift. */
1331 COSTS_N_INSNS (1), /* arith_shift_reg. */
1332 COSTS_N_INSNS (1), /* log_shift. */
1333 COSTS_N_INSNS (1), /* log_shift_reg. */
1334 COSTS_N_INSNS (1), /* extend. */
1335 COSTS_N_INSNS (1), /* extend_arith. */
1336 COSTS_N_INSNS (1), /* bfi. */
1337 COSTS_N_INSNS (1), /* bfx. */
1338 COSTS_N_INSNS (1), /* clz. */
1339 COSTS_N_INSNS (1), /* rev. */
1340 0, /* non_exec. */
1341 true /* non_exec_costs_exec. */
1345 /* MULT SImode */
1347 0, /* simple. */
1348 COSTS_N_INSNS (1), /* flag_setting. */
1349 COSTS_N_INSNS (1), /* extend. */
1350 COSTS_N_INSNS (1), /* add. */
1351 COSTS_N_INSNS (1), /* extend_add. */
1352 COSTS_N_INSNS (7) /* idiv. */
1354 /* MULT DImode */
1356 0, /* simple (N/A). */
1357 0, /* flag_setting (N/A). */
1358 COSTS_N_INSNS (1), /* extend. */
1359 0, /* add. */
1360 COSTS_N_INSNS (2), /* extend_add. */
1361 0 /* idiv (N/A). */
1364 /* LD/ST */
1366 COSTS_N_INSNS (1), /* load. */
1367 COSTS_N_INSNS (1), /* load_sign_extend. */
1368 COSTS_N_INSNS (3), /* ldrd. */
1369 COSTS_N_INSNS (1), /* ldm_1st. */
1370 1, /* ldm_regs_per_insn_1st. */
1371 2, /* ldm_regs_per_insn_subsequent. */
1372 COSTS_N_INSNS (2), /* loadf. */
1373 COSTS_N_INSNS (2), /* loadd. */
1374 COSTS_N_INSNS (1), /* load_unaligned. */
1375 COSTS_N_INSNS (1), /* store. */
1376 COSTS_N_INSNS (3), /* strd. */
1377 COSTS_N_INSNS (1), /* stm_1st. */
1378 1, /* stm_regs_per_insn_1st. */
1379 2, /* stm_regs_per_insn_subsequent. */
1380 COSTS_N_INSNS (2), /* storef. */
1381 COSTS_N_INSNS (2), /* stored. */
1382 COSTS_N_INSNS (1), /* store_unaligned. */
1383 COSTS_N_INSNS (1), /* loadv. */
1384 COSTS_N_INSNS (1) /* storev. */
1387 /* FP SFmode */
1389 COSTS_N_INSNS (15), /* div. */
1390 COSTS_N_INSNS (3), /* mult. */
1391 COSTS_N_INSNS (7), /* mult_addsub. */
1392 COSTS_N_INSNS (7), /* fma. */
1393 COSTS_N_INSNS (3), /* addsub. */
1394 COSTS_N_INSNS (3), /* fpconst. */
1395 COSTS_N_INSNS (3), /* neg. */
1396 COSTS_N_INSNS (3), /* compare. */
1397 COSTS_N_INSNS (3), /* widen. */
1398 COSTS_N_INSNS (3), /* narrow. */
1399 COSTS_N_INSNS (3), /* toint. */
1400 COSTS_N_INSNS (3), /* fromint. */
1401 COSTS_N_INSNS (3) /* roundint. */
1403 /* FP DFmode */
1405 COSTS_N_INSNS (30), /* div. */
1406 COSTS_N_INSNS (6), /* mult. */
1407 COSTS_N_INSNS (10), /* mult_addsub. */
1408 COSTS_N_INSNS (7), /* fma. */
1409 COSTS_N_INSNS (3), /* addsub. */
1410 COSTS_N_INSNS (3), /* fpconst. */
1411 COSTS_N_INSNS (3), /* neg. */
1412 COSTS_N_INSNS (3), /* compare. */
1413 COSTS_N_INSNS (3), /* widen. */
1414 COSTS_N_INSNS (3), /* narrow. */
1415 COSTS_N_INSNS (3), /* toint. */
1416 COSTS_N_INSNS (3), /* fromint. */
1417 COSTS_N_INSNS (3) /* roundint. */
1420 /* Vector */
1422 COSTS_N_INSNS (1) /* alu. */
1426 const struct cpu_cost_table cortexa12_extra_costs =
1428 /* ALU */
1430 0, /* arith. */
1431 0, /* logical. */
1432 0, /* shift. */
1433 COSTS_N_INSNS (1), /* shift_reg. */
1434 COSTS_N_INSNS (1), /* arith_shift. */
1435 COSTS_N_INSNS (1), /* arith_shift_reg. */
1436 COSTS_N_INSNS (1), /* log_shift. */
1437 COSTS_N_INSNS (1), /* log_shift_reg. */
1438 0, /* extend. */
1439 COSTS_N_INSNS (1), /* extend_arith. */
1440 0, /* bfi. */
1441 COSTS_N_INSNS (1), /* bfx. */
1442 COSTS_N_INSNS (1), /* clz. */
1443 COSTS_N_INSNS (1), /* rev. */
1444 0, /* non_exec. */
1445 true /* non_exec_costs_exec. */
1447 /* MULT SImode */
1450 COSTS_N_INSNS (2), /* simple. */
1451 COSTS_N_INSNS (3), /* flag_setting. */
1452 COSTS_N_INSNS (2), /* extend. */
1453 COSTS_N_INSNS (3), /* add. */
1454 COSTS_N_INSNS (2), /* extend_add. */
1455 COSTS_N_INSNS (18) /* idiv. */
1457 /* MULT DImode */
1459 0, /* simple (N/A). */
1460 0, /* flag_setting (N/A). */
1461 COSTS_N_INSNS (3), /* extend. */
1462 0, /* add (N/A). */
1463 COSTS_N_INSNS (3), /* extend_add. */
1464 0 /* idiv (N/A). */
1467 /* LD/ST */
1469 COSTS_N_INSNS (3), /* load. */
1470 COSTS_N_INSNS (3), /* load_sign_extend. */
1471 COSTS_N_INSNS (3), /* ldrd. */
1472 COSTS_N_INSNS (3), /* ldm_1st. */
1473 1, /* ldm_regs_per_insn_1st. */
1474 2, /* ldm_regs_per_insn_subsequent. */
1475 COSTS_N_INSNS (3), /* loadf. */
1476 COSTS_N_INSNS (3), /* loadd. */
1477 0, /* load_unaligned. */
1478 0, /* store. */
1479 0, /* strd. */
1480 0, /* stm_1st. */
1481 1, /* stm_regs_per_insn_1st. */
1482 2, /* stm_regs_per_insn_subsequent. */
1483 COSTS_N_INSNS (2), /* storef. */
1484 COSTS_N_INSNS (2), /* stored. */
1485 0, /* store_unaligned. */
1486 COSTS_N_INSNS (1), /* loadv. */
1487 COSTS_N_INSNS (1) /* storev. */
1490 /* FP SFmode */
1492 COSTS_N_INSNS (17), /* div. */
1493 COSTS_N_INSNS (4), /* mult. */
1494 COSTS_N_INSNS (8), /* mult_addsub. */
1495 COSTS_N_INSNS (8), /* fma. */
1496 COSTS_N_INSNS (4), /* addsub. */
1497 COSTS_N_INSNS (2), /* fpconst. */
1498 COSTS_N_INSNS (2), /* neg. */
1499 COSTS_N_INSNS (2), /* compare. */
1500 COSTS_N_INSNS (4), /* widen. */
1501 COSTS_N_INSNS (4), /* narrow. */
1502 COSTS_N_INSNS (4), /* toint. */
1503 COSTS_N_INSNS (4), /* fromint. */
1504 COSTS_N_INSNS (4) /* roundint. */
1506 /* FP DFmode */
1508 COSTS_N_INSNS (31), /* div. */
1509 COSTS_N_INSNS (4), /* mult. */
1510 COSTS_N_INSNS (8), /* mult_addsub. */
1511 COSTS_N_INSNS (8), /* fma. */
1512 COSTS_N_INSNS (4), /* addsub. */
1513 COSTS_N_INSNS (2), /* fpconst. */
1514 COSTS_N_INSNS (2), /* neg. */
1515 COSTS_N_INSNS (2), /* compare. */
1516 COSTS_N_INSNS (4), /* widen. */
1517 COSTS_N_INSNS (4), /* narrow. */
1518 COSTS_N_INSNS (4), /* toint. */
1519 COSTS_N_INSNS (4), /* fromint. */
1520 COSTS_N_INSNS (4) /* roundint. */
1523 /* Vector */
1525 COSTS_N_INSNS (1) /* alu. */
1529 const struct cpu_cost_table cortexa15_extra_costs =
1531 /* ALU */
1533 0, /* arith. */
1534 0, /* logical. */
1535 0, /* shift. */
1536 0, /* shift_reg. */
1537 COSTS_N_INSNS (1), /* arith_shift. */
1538 COSTS_N_INSNS (1), /* arith_shift_reg. */
1539 COSTS_N_INSNS (1), /* log_shift. */
1540 COSTS_N_INSNS (1), /* log_shift_reg. */
1541 0, /* extend. */
1542 COSTS_N_INSNS (1), /* extend_arith. */
1543 COSTS_N_INSNS (1), /* bfi. */
1544 0, /* bfx. */
1545 0, /* clz. */
1546 0, /* rev. */
1547 0, /* non_exec. */
1548 true /* non_exec_costs_exec. */
1550 /* MULT SImode */
1553 COSTS_N_INSNS (2), /* simple. */
1554 COSTS_N_INSNS (3), /* flag_setting. */
1555 COSTS_N_INSNS (2), /* extend. */
1556 COSTS_N_INSNS (2), /* add. */
1557 COSTS_N_INSNS (2), /* extend_add. */
1558 COSTS_N_INSNS (18) /* idiv. */
1560 /* MULT DImode */
1562 0, /* simple (N/A). */
1563 0, /* flag_setting (N/A). */
1564 COSTS_N_INSNS (3), /* extend. */
1565 0, /* add (N/A). */
1566 COSTS_N_INSNS (3), /* extend_add. */
1567 0 /* idiv (N/A). */
1570 /* LD/ST */
1572 COSTS_N_INSNS (3), /* load. */
1573 COSTS_N_INSNS (3), /* load_sign_extend. */
1574 COSTS_N_INSNS (3), /* ldrd. */
1575 COSTS_N_INSNS (4), /* ldm_1st. */
1576 1, /* ldm_regs_per_insn_1st. */
1577 2, /* ldm_regs_per_insn_subsequent. */
1578 COSTS_N_INSNS (4), /* loadf. */
1579 COSTS_N_INSNS (4), /* loadd. */
1580 0, /* load_unaligned. */
1581 0, /* store. */
1582 0, /* strd. */
1583 COSTS_N_INSNS (1), /* stm_1st. */
1584 1, /* stm_regs_per_insn_1st. */
1585 2, /* stm_regs_per_insn_subsequent. */
1586 0, /* storef. */
1587 0, /* stored. */
1588 0, /* store_unaligned. */
1589 COSTS_N_INSNS (1), /* loadv. */
1590 COSTS_N_INSNS (1) /* storev. */
1593 /* FP SFmode */
1595 COSTS_N_INSNS (17), /* div. */
1596 COSTS_N_INSNS (4), /* mult. */
1597 COSTS_N_INSNS (8), /* mult_addsub. */
1598 COSTS_N_INSNS (8), /* fma. */
1599 COSTS_N_INSNS (4), /* addsub. */
1600 COSTS_N_INSNS (2), /* fpconst. */
1601 COSTS_N_INSNS (2), /* neg. */
1602 COSTS_N_INSNS (5), /* compare. */
1603 COSTS_N_INSNS (4), /* widen. */
1604 COSTS_N_INSNS (4), /* narrow. */
1605 COSTS_N_INSNS (4), /* toint. */
1606 COSTS_N_INSNS (4), /* fromint. */
1607 COSTS_N_INSNS (4) /* roundint. */
1609 /* FP DFmode */
1611 COSTS_N_INSNS (31), /* div. */
1612 COSTS_N_INSNS (4), /* mult. */
1613 COSTS_N_INSNS (8), /* mult_addsub. */
1614 COSTS_N_INSNS (8), /* fma. */
1615 COSTS_N_INSNS (4), /* addsub. */
1616 COSTS_N_INSNS (2), /* fpconst. */
1617 COSTS_N_INSNS (2), /* neg. */
1618 COSTS_N_INSNS (2), /* compare. */
1619 COSTS_N_INSNS (4), /* widen. */
1620 COSTS_N_INSNS (4), /* narrow. */
1621 COSTS_N_INSNS (4), /* toint. */
1622 COSTS_N_INSNS (4), /* fromint. */
1623 COSTS_N_INSNS (4) /* roundint. */
1626 /* Vector */
1628 COSTS_N_INSNS (1) /* alu. */
1632 const struct cpu_cost_table v7m_extra_costs =
1634 /* ALU */
1636 0, /* arith. */
1637 0, /* logical. */
1638 0, /* shift. */
1639 0, /* shift_reg. */
1640 0, /* arith_shift. */
1641 COSTS_N_INSNS (1), /* arith_shift_reg. */
1642 0, /* log_shift. */
1643 COSTS_N_INSNS (1), /* log_shift_reg. */
1644 0, /* extend. */
1645 COSTS_N_INSNS (1), /* extend_arith. */
1646 0, /* bfi. */
1647 0, /* bfx. */
1648 0, /* clz. */
1649 0, /* rev. */
1650 COSTS_N_INSNS (1), /* non_exec. */
1651 false /* non_exec_costs_exec. */
1654 /* MULT SImode */
1656 COSTS_N_INSNS (1), /* simple. */
1657 COSTS_N_INSNS (1), /* flag_setting. */
1658 COSTS_N_INSNS (2), /* extend. */
1659 COSTS_N_INSNS (1), /* add. */
1660 COSTS_N_INSNS (3), /* extend_add. */
1661 COSTS_N_INSNS (8) /* idiv. */
1663 /* MULT DImode */
1665 0, /* simple (N/A). */
1666 0, /* flag_setting (N/A). */
1667 COSTS_N_INSNS (2), /* extend. */
1668 0, /* add (N/A). */
1669 COSTS_N_INSNS (3), /* extend_add. */
1670 0 /* idiv (N/A). */
1673 /* LD/ST */
1675 COSTS_N_INSNS (2), /* load. */
1676 0, /* load_sign_extend. */
1677 COSTS_N_INSNS (3), /* ldrd. */
1678 COSTS_N_INSNS (2), /* ldm_1st. */
1679 1, /* ldm_regs_per_insn_1st. */
1680 1, /* ldm_regs_per_insn_subsequent. */
1681 COSTS_N_INSNS (2), /* loadf. */
1682 COSTS_N_INSNS (3), /* loadd. */
1683 COSTS_N_INSNS (1), /* load_unaligned. */
1684 COSTS_N_INSNS (2), /* store. */
1685 COSTS_N_INSNS (3), /* strd. */
1686 COSTS_N_INSNS (2), /* stm_1st. */
1687 1, /* stm_regs_per_insn_1st. */
1688 1, /* stm_regs_per_insn_subsequent. */
1689 COSTS_N_INSNS (2), /* storef. */
1690 COSTS_N_INSNS (3), /* stored. */
1691 COSTS_N_INSNS (1), /* store_unaligned. */
1692 COSTS_N_INSNS (1), /* loadv. */
1693 COSTS_N_INSNS (1) /* storev. */
1696 /* FP SFmode */
1698 COSTS_N_INSNS (7), /* div. */
1699 COSTS_N_INSNS (2), /* mult. */
1700 COSTS_N_INSNS (5), /* mult_addsub. */
1701 COSTS_N_INSNS (3), /* fma. */
1702 COSTS_N_INSNS (1), /* addsub. */
1703 0, /* fpconst. */
1704 0, /* neg. */
1705 0, /* compare. */
1706 0, /* widen. */
1707 0, /* narrow. */
1708 0, /* toint. */
1709 0, /* fromint. */
1710 0 /* roundint. */
1712 /* FP DFmode */
1714 COSTS_N_INSNS (15), /* div. */
1715 COSTS_N_INSNS (5), /* mult. */
1716 COSTS_N_INSNS (7), /* mult_addsub. */
1717 COSTS_N_INSNS (7), /* fma. */
1718 COSTS_N_INSNS (3), /* addsub. */
1719 0, /* fpconst. */
1720 0, /* neg. */
1721 0, /* compare. */
1722 0, /* widen. */
1723 0, /* narrow. */
1724 0, /* toint. */
1725 0, /* fromint. */
1726 0 /* roundint. */
1729 /* Vector */
1731 COSTS_N_INSNS (1) /* alu. */
1735 const struct tune_params arm_slowmul_tune =
1737 &generic_extra_costs, /* Insn extra costs. */
1738 NULL, /* Sched adj cost. */
1739 arm_default_branch_cost,
1740 &arm_default_vec_cost,
1741 3, /* Constant limit. */
1742 5, /* Max cond insns. */
1743 8, /* Memset max inline. */
1744 1, /* Issue rate. */
1745 ARM_PREFETCH_NOT_BENEFICIAL,
1746 tune_params::PREF_CONST_POOL_TRUE,
1747 tune_params::PREF_LDRD_FALSE,
1748 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1749 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1750 tune_params::DISPARAGE_FLAGS_NEITHER,
1751 tune_params::PREF_NEON_64_FALSE,
1752 tune_params::PREF_NEON_STRINGOPS_FALSE,
1753 tune_params::FUSE_NOTHING,
1754 tune_params::SCHED_AUTOPREF_OFF
1757 const struct tune_params arm_fastmul_tune =
1759 &generic_extra_costs, /* Insn extra costs. */
1760 NULL, /* Sched adj cost. */
1761 arm_default_branch_cost,
1762 &arm_default_vec_cost,
1763 1, /* Constant limit. */
1764 5, /* Max cond insns. */
1765 8, /* Memset max inline. */
1766 1, /* Issue rate. */
1767 ARM_PREFETCH_NOT_BENEFICIAL,
1768 tune_params::PREF_CONST_POOL_TRUE,
1769 tune_params::PREF_LDRD_FALSE,
1770 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1771 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1772 tune_params::DISPARAGE_FLAGS_NEITHER,
1773 tune_params::PREF_NEON_64_FALSE,
1774 tune_params::PREF_NEON_STRINGOPS_FALSE,
1775 tune_params::FUSE_NOTHING,
1776 tune_params::SCHED_AUTOPREF_OFF
1779 /* StrongARM has early execution of branches, so a sequence that is worth
1780 skipping is shorter. Set max_insns_skipped to a lower value. */
1782 const struct tune_params arm_strongarm_tune =
1784 &generic_extra_costs, /* Insn extra costs. */
1785 NULL, /* Sched adj cost. */
1786 arm_default_branch_cost,
1787 &arm_default_vec_cost,
1788 1, /* Constant limit. */
1789 3, /* Max cond insns. */
1790 8, /* Memset max inline. */
1791 1, /* Issue rate. */
1792 ARM_PREFETCH_NOT_BENEFICIAL,
1793 tune_params::PREF_CONST_POOL_TRUE,
1794 tune_params::PREF_LDRD_FALSE,
1795 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1796 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1797 tune_params::DISPARAGE_FLAGS_NEITHER,
1798 tune_params::PREF_NEON_64_FALSE,
1799 tune_params::PREF_NEON_STRINGOPS_FALSE,
1800 tune_params::FUSE_NOTHING,
1801 tune_params::SCHED_AUTOPREF_OFF
1804 const struct tune_params arm_xscale_tune =
1806 &generic_extra_costs, /* Insn extra costs. */
1807 xscale_sched_adjust_cost,
1808 arm_default_branch_cost,
1809 &arm_default_vec_cost,
1810 2, /* Constant limit. */
1811 3, /* Max cond insns. */
1812 8, /* Memset max inline. */
1813 1, /* Issue rate. */
1814 ARM_PREFETCH_NOT_BENEFICIAL,
1815 tune_params::PREF_CONST_POOL_TRUE,
1816 tune_params::PREF_LDRD_FALSE,
1817 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1818 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1819 tune_params::DISPARAGE_FLAGS_NEITHER,
1820 tune_params::PREF_NEON_64_FALSE,
1821 tune_params::PREF_NEON_STRINGOPS_FALSE,
1822 tune_params::FUSE_NOTHING,
1823 tune_params::SCHED_AUTOPREF_OFF
1826 const struct tune_params arm_9e_tune =
1828 &generic_extra_costs, /* Insn extra costs. */
1829 NULL, /* Sched adj cost. */
1830 arm_default_branch_cost,
1831 &arm_default_vec_cost,
1832 1, /* Constant limit. */
1833 5, /* Max cond insns. */
1834 8, /* Memset max inline. */
1835 1, /* Issue rate. */
1836 ARM_PREFETCH_NOT_BENEFICIAL,
1837 tune_params::PREF_CONST_POOL_TRUE,
1838 tune_params::PREF_LDRD_FALSE,
1839 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1840 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1841 tune_params::DISPARAGE_FLAGS_NEITHER,
1842 tune_params::PREF_NEON_64_FALSE,
1843 tune_params::PREF_NEON_STRINGOPS_FALSE,
1844 tune_params::FUSE_NOTHING,
1845 tune_params::SCHED_AUTOPREF_OFF
1848 const struct tune_params arm_marvell_pj4_tune =
1850 &generic_extra_costs, /* Insn extra costs. */
1851 NULL, /* Sched adj cost. */
1852 arm_default_branch_cost,
1853 &arm_default_vec_cost,
1854 1, /* Constant limit. */
1855 5, /* Max cond insns. */
1856 8, /* Memset max inline. */
1857 2, /* Issue rate. */
1858 ARM_PREFETCH_NOT_BENEFICIAL,
1859 tune_params::PREF_CONST_POOL_TRUE,
1860 tune_params::PREF_LDRD_FALSE,
1861 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1862 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1863 tune_params::DISPARAGE_FLAGS_NEITHER,
1864 tune_params::PREF_NEON_64_FALSE,
1865 tune_params::PREF_NEON_STRINGOPS_FALSE,
1866 tune_params::FUSE_NOTHING,
1867 tune_params::SCHED_AUTOPREF_OFF
1870 const struct tune_params arm_v6t2_tune =
1872 &generic_extra_costs, /* Insn extra costs. */
1873 NULL, /* Sched adj cost. */
1874 arm_default_branch_cost,
1875 &arm_default_vec_cost,
1876 1, /* Constant limit. */
1877 5, /* Max cond insns. */
1878 8, /* Memset max inline. */
1879 1, /* Issue rate. */
1880 ARM_PREFETCH_NOT_BENEFICIAL,
1881 tune_params::PREF_CONST_POOL_FALSE,
1882 tune_params::PREF_LDRD_FALSE,
1883 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1884 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1885 tune_params::DISPARAGE_FLAGS_NEITHER,
1886 tune_params::PREF_NEON_64_FALSE,
1887 tune_params::PREF_NEON_STRINGOPS_FALSE,
1888 tune_params::FUSE_NOTHING,
1889 tune_params::SCHED_AUTOPREF_OFF
1893 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1894 const struct tune_params arm_cortex_tune =
1896 &generic_extra_costs,
1897 NULL, /* Sched adj cost. */
1898 arm_default_branch_cost,
1899 &arm_default_vec_cost,
1900 1, /* Constant limit. */
1901 5, /* Max cond insns. */
1902 8, /* Memset max inline. */
1903 2, /* Issue rate. */
1904 ARM_PREFETCH_NOT_BENEFICIAL,
1905 tune_params::PREF_CONST_POOL_FALSE,
1906 tune_params::PREF_LDRD_FALSE,
1907 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1908 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1909 tune_params::DISPARAGE_FLAGS_NEITHER,
1910 tune_params::PREF_NEON_64_FALSE,
1911 tune_params::PREF_NEON_STRINGOPS_FALSE,
1912 tune_params::FUSE_NOTHING,
1913 tune_params::SCHED_AUTOPREF_OFF
1916 const struct tune_params arm_cortex_a8_tune =
1918 &cortexa8_extra_costs,
1919 NULL, /* Sched adj cost. */
1920 arm_default_branch_cost,
1921 &arm_default_vec_cost,
1922 1, /* Constant limit. */
1923 5, /* Max cond insns. */
1924 8, /* Memset max inline. */
1925 2, /* Issue rate. */
1926 ARM_PREFETCH_NOT_BENEFICIAL,
1927 tune_params::PREF_CONST_POOL_FALSE,
1928 tune_params::PREF_LDRD_FALSE,
1929 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1930 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1931 tune_params::DISPARAGE_FLAGS_NEITHER,
1932 tune_params::PREF_NEON_64_FALSE,
1933 tune_params::PREF_NEON_STRINGOPS_TRUE,
1934 tune_params::FUSE_NOTHING,
1935 tune_params::SCHED_AUTOPREF_OFF
1938 const struct tune_params arm_cortex_a7_tune =
1940 &cortexa7_extra_costs,
1941 NULL, /* Sched adj cost. */
1942 arm_default_branch_cost,
1943 &arm_default_vec_cost,
1944 1, /* Constant limit. */
1945 5, /* Max cond insns. */
1946 8, /* Memset max inline. */
1947 2, /* Issue rate. */
1948 ARM_PREFETCH_NOT_BENEFICIAL,
1949 tune_params::PREF_CONST_POOL_FALSE,
1950 tune_params::PREF_LDRD_FALSE,
1951 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1952 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1953 tune_params::DISPARAGE_FLAGS_NEITHER,
1954 tune_params::PREF_NEON_64_FALSE,
1955 tune_params::PREF_NEON_STRINGOPS_TRUE,
1956 tune_params::FUSE_NOTHING,
1957 tune_params::SCHED_AUTOPREF_OFF
1960 const struct tune_params arm_cortex_a15_tune =
1962 &cortexa15_extra_costs,
1963 NULL, /* Sched adj cost. */
1964 arm_default_branch_cost,
1965 &arm_default_vec_cost,
1966 1, /* Constant limit. */
1967 2, /* Max cond insns. */
1968 8, /* Memset max inline. */
1969 3, /* Issue rate. */
1970 ARM_PREFETCH_NOT_BENEFICIAL,
1971 tune_params::PREF_CONST_POOL_FALSE,
1972 tune_params::PREF_LDRD_TRUE,
1973 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1974 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1975 tune_params::DISPARAGE_FLAGS_ALL,
1976 tune_params::PREF_NEON_64_FALSE,
1977 tune_params::PREF_NEON_STRINGOPS_TRUE,
1978 tune_params::FUSE_NOTHING,
1979 tune_params::SCHED_AUTOPREF_FULL
1982 const struct tune_params arm_cortex_a35_tune =
1984 &cortexa53_extra_costs,
1985 NULL, /* Sched adj cost. */
1986 arm_default_branch_cost,
1987 &arm_default_vec_cost,
1988 1, /* Constant limit. */
1989 5, /* Max cond insns. */
1990 8, /* Memset max inline. */
1991 1, /* Issue rate. */
1992 ARM_PREFETCH_NOT_BENEFICIAL,
1993 tune_params::PREF_CONST_POOL_FALSE,
1994 tune_params::PREF_LDRD_FALSE,
1995 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1996 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1997 tune_params::DISPARAGE_FLAGS_NEITHER,
1998 tune_params::PREF_NEON_64_FALSE,
1999 tune_params::PREF_NEON_STRINGOPS_TRUE,
2000 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2001 tune_params::SCHED_AUTOPREF_OFF
2004 const struct tune_params arm_cortex_a53_tune =
2006 &cortexa53_extra_costs,
2007 NULL, /* Sched adj cost. */
2008 arm_default_branch_cost,
2009 &arm_default_vec_cost,
2010 1, /* Constant limit. */
2011 5, /* Max cond insns. */
2012 8, /* Memset max inline. */
2013 2, /* Issue rate. */
2014 ARM_PREFETCH_NOT_BENEFICIAL,
2015 tune_params::PREF_CONST_POOL_FALSE,
2016 tune_params::PREF_LDRD_FALSE,
2017 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2018 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2019 tune_params::DISPARAGE_FLAGS_NEITHER,
2020 tune_params::PREF_NEON_64_FALSE,
2021 tune_params::PREF_NEON_STRINGOPS_TRUE,
2022 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2023 tune_params::SCHED_AUTOPREF_OFF
2026 const struct tune_params arm_cortex_a57_tune =
2028 &cortexa57_extra_costs,
2029 NULL, /* Sched adj cost. */
2030 arm_default_branch_cost,
2031 &arm_default_vec_cost,
2032 1, /* Constant limit. */
2033 2, /* Max cond insns. */
2034 8, /* Memset max inline. */
2035 3, /* Issue rate. */
2036 ARM_PREFETCH_NOT_BENEFICIAL,
2037 tune_params::PREF_CONST_POOL_FALSE,
2038 tune_params::PREF_LDRD_TRUE,
2039 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2040 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2041 tune_params::DISPARAGE_FLAGS_ALL,
2042 tune_params::PREF_NEON_64_FALSE,
2043 tune_params::PREF_NEON_STRINGOPS_TRUE,
2044 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2045 tune_params::SCHED_AUTOPREF_FULL
2048 const struct tune_params arm_exynosm1_tune =
2050 &exynosm1_extra_costs,
2051 NULL, /* Sched adj cost. */
2052 arm_default_branch_cost,
2053 &arm_default_vec_cost,
2054 1, /* Constant limit. */
2055 2, /* Max cond insns. */
2056 8, /* Memset max inline. */
2057 3, /* Issue rate. */
2058 ARM_PREFETCH_NOT_BENEFICIAL,
2059 tune_params::PREF_CONST_POOL_FALSE,
2060 tune_params::PREF_LDRD_TRUE,
2061 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2062 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2063 tune_params::DISPARAGE_FLAGS_ALL,
2064 tune_params::PREF_NEON_64_FALSE,
2065 tune_params::PREF_NEON_STRINGOPS_TRUE,
2066 tune_params::FUSE_NOTHING,
2067 tune_params::SCHED_AUTOPREF_OFF
2070 const struct tune_params arm_xgene1_tune =
2072 &xgene1_extra_costs,
2073 NULL, /* Sched adj cost. */
2074 arm_default_branch_cost,
2075 &arm_default_vec_cost,
2076 1, /* Constant limit. */
2077 2, /* Max cond insns. */
2078 32, /* Memset max inline. */
2079 4, /* Issue rate. */
2080 ARM_PREFETCH_NOT_BENEFICIAL,
2081 tune_params::PREF_CONST_POOL_FALSE,
2082 tune_params::PREF_LDRD_TRUE,
2083 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2084 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2085 tune_params::DISPARAGE_FLAGS_ALL,
2086 tune_params::PREF_NEON_64_FALSE,
2087 tune_params::PREF_NEON_STRINGOPS_FALSE,
2088 tune_params::FUSE_NOTHING,
2089 tune_params::SCHED_AUTOPREF_OFF
2092 const struct tune_params arm_qdf24xx_tune =
2094 &qdf24xx_extra_costs,
2095 NULL, /* Scheduler cost adjustment. */
2096 arm_default_branch_cost,
2097 &arm_default_vec_cost, /* Vectorizer costs. */
2098 1, /* Constant limit. */
2099 2, /* Max cond insns. */
2100 8, /* Memset max inline. */
2101 4, /* Issue rate. */
2102 ARM_PREFETCH_BENEFICIAL (0, -1, 64),
2103 tune_params::PREF_CONST_POOL_FALSE,
2104 tune_params::PREF_LDRD_TRUE,
2105 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2106 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2107 tune_params::DISPARAGE_FLAGS_ALL,
2108 tune_params::PREF_NEON_64_FALSE,
2109 tune_params::PREF_NEON_STRINGOPS_TRUE,
2110 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2111 tune_params::SCHED_AUTOPREF_FULL
2114 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2115 less appealing. Set max_insns_skipped to a low value. */
2117 const struct tune_params arm_cortex_a5_tune =
2119 &cortexa5_extra_costs,
2120 NULL, /* Sched adj cost. */
2121 arm_cortex_a5_branch_cost,
2122 &arm_default_vec_cost,
2123 1, /* Constant limit. */
2124 1, /* Max cond insns. */
2125 8, /* Memset max inline. */
2126 2, /* Issue rate. */
2127 ARM_PREFETCH_NOT_BENEFICIAL,
2128 tune_params::PREF_CONST_POOL_FALSE,
2129 tune_params::PREF_LDRD_FALSE,
2130 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2131 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2132 tune_params::DISPARAGE_FLAGS_NEITHER,
2133 tune_params::PREF_NEON_64_FALSE,
2134 tune_params::PREF_NEON_STRINGOPS_TRUE,
2135 tune_params::FUSE_NOTHING,
2136 tune_params::SCHED_AUTOPREF_OFF
2139 const struct tune_params arm_cortex_a9_tune =
2141 &cortexa9_extra_costs,
2142 cortex_a9_sched_adjust_cost,
2143 arm_default_branch_cost,
2144 &arm_default_vec_cost,
2145 1, /* Constant limit. */
2146 5, /* Max cond insns. */
2147 8, /* Memset max inline. */
2148 2, /* Issue rate. */
2149 ARM_PREFETCH_BENEFICIAL(4,32,32),
2150 tune_params::PREF_CONST_POOL_FALSE,
2151 tune_params::PREF_LDRD_FALSE,
2152 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2153 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2154 tune_params::DISPARAGE_FLAGS_NEITHER,
2155 tune_params::PREF_NEON_64_FALSE,
2156 tune_params::PREF_NEON_STRINGOPS_FALSE,
2157 tune_params::FUSE_NOTHING,
2158 tune_params::SCHED_AUTOPREF_OFF
2161 const struct tune_params arm_cortex_a12_tune =
2163 &cortexa12_extra_costs,
2164 NULL, /* Sched adj cost. */
2165 arm_default_branch_cost,
2166 &arm_default_vec_cost, /* Vectorizer costs. */
2167 1, /* Constant limit. */
2168 2, /* Max cond insns. */
2169 8, /* Memset max inline. */
2170 2, /* Issue rate. */
2171 ARM_PREFETCH_NOT_BENEFICIAL,
2172 tune_params::PREF_CONST_POOL_FALSE,
2173 tune_params::PREF_LDRD_TRUE,
2174 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2175 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2176 tune_params::DISPARAGE_FLAGS_ALL,
2177 tune_params::PREF_NEON_64_FALSE,
2178 tune_params::PREF_NEON_STRINGOPS_TRUE,
2179 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2180 tune_params::SCHED_AUTOPREF_OFF
2183 const struct tune_params arm_cortex_a73_tune =
2185 &cortexa57_extra_costs,
2186 NULL, /* Sched adj cost. */
2187 arm_default_branch_cost,
2188 &arm_default_vec_cost, /* Vectorizer costs. */
2189 1, /* Constant limit. */
2190 2, /* Max cond insns. */
2191 8, /* Memset max inline. */
2192 2, /* Issue rate. */
2193 ARM_PREFETCH_NOT_BENEFICIAL,
2194 tune_params::PREF_CONST_POOL_FALSE,
2195 tune_params::PREF_LDRD_TRUE,
2196 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2197 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2198 tune_params::DISPARAGE_FLAGS_ALL,
2199 tune_params::PREF_NEON_64_FALSE,
2200 tune_params::PREF_NEON_STRINGOPS_TRUE,
2201 FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2202 tune_params::SCHED_AUTOPREF_FULL
2205 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2206 cycle to execute each. An LDR from the constant pool also takes two cycles
2207 to execute, but mildly increases pipelining opportunity (consecutive
2208 loads/stores can be pipelined together, saving one cycle), and may also
2209 improve icache utilisation. Hence we prefer the constant pool for such
2210 processors. */
2212 const struct tune_params arm_v7m_tune =
2214 &v7m_extra_costs,
2215 NULL, /* Sched adj cost. */
2216 arm_cortex_m_branch_cost,
2217 &arm_default_vec_cost,
2218 1, /* Constant limit. */
2219 2, /* Max cond insns. */
2220 8, /* Memset max inline. */
2221 1, /* Issue rate. */
2222 ARM_PREFETCH_NOT_BENEFICIAL,
2223 tune_params::PREF_CONST_POOL_TRUE,
2224 tune_params::PREF_LDRD_FALSE,
2225 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2226 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2227 tune_params::DISPARAGE_FLAGS_NEITHER,
2228 tune_params::PREF_NEON_64_FALSE,
2229 tune_params::PREF_NEON_STRINGOPS_FALSE,
2230 tune_params::FUSE_NOTHING,
2231 tune_params::SCHED_AUTOPREF_OFF
2234 /* Cortex-M7 tuning. */
2236 const struct tune_params arm_cortex_m7_tune =
2238 &v7m_extra_costs,
2239 NULL, /* Sched adj cost. */
2240 arm_cortex_m7_branch_cost,
2241 &arm_default_vec_cost,
2242 0, /* Constant limit. */
2243 1, /* Max cond insns. */
2244 8, /* Memset max inline. */
2245 2, /* Issue rate. */
2246 ARM_PREFETCH_NOT_BENEFICIAL,
2247 tune_params::PREF_CONST_POOL_TRUE,
2248 tune_params::PREF_LDRD_FALSE,
2249 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2250 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2251 tune_params::DISPARAGE_FLAGS_NEITHER,
2252 tune_params::PREF_NEON_64_FALSE,
2253 tune_params::PREF_NEON_STRINGOPS_FALSE,
2254 tune_params::FUSE_NOTHING,
2255 tune_params::SCHED_AUTOPREF_OFF
2258 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2259 arm_v6t2_tune. It is used for cortex-m0, cortex-m1, cortex-m0plus and
2260 cortex-m23. */
2261 const struct tune_params arm_v6m_tune =
2263 &generic_extra_costs, /* Insn extra costs. */
2264 NULL, /* Sched adj cost. */
2265 arm_default_branch_cost,
2266 &arm_default_vec_cost, /* Vectorizer costs. */
2267 1, /* Constant limit. */
2268 5, /* Max cond insns. */
2269 8, /* Memset max inline. */
2270 1, /* Issue rate. */
2271 ARM_PREFETCH_NOT_BENEFICIAL,
2272 tune_params::PREF_CONST_POOL_FALSE,
2273 tune_params::PREF_LDRD_FALSE,
2274 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2275 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2276 tune_params::DISPARAGE_FLAGS_NEITHER,
2277 tune_params::PREF_NEON_64_FALSE,
2278 tune_params::PREF_NEON_STRINGOPS_FALSE,
2279 tune_params::FUSE_NOTHING,
2280 tune_params::SCHED_AUTOPREF_OFF
2283 const struct tune_params arm_fa726te_tune =
2285 &generic_extra_costs, /* Insn extra costs. */
2286 fa726te_sched_adjust_cost,
2287 arm_default_branch_cost,
2288 &arm_default_vec_cost,
2289 1, /* Constant limit. */
2290 5, /* Max cond insns. */
2291 8, /* Memset max inline. */
2292 2, /* Issue rate. */
2293 ARM_PREFETCH_NOT_BENEFICIAL,
2294 tune_params::PREF_CONST_POOL_TRUE,
2295 tune_params::PREF_LDRD_FALSE,
2296 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2297 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2298 tune_params::DISPARAGE_FLAGS_NEITHER,
2299 tune_params::PREF_NEON_64_FALSE,
2300 tune_params::PREF_NEON_STRINGOPS_FALSE,
2301 tune_params::FUSE_NOTHING,
2302 tune_params::SCHED_AUTOPREF_OFF
2305 /* Auto-generated CPU, FPU and architecture tables. */
2306 #include "arm-cpu-data.h"
2308 /* The name of the preprocessor macro to define for this architecture. PROFILE
2309 is replaced by the architecture name (eg. 8A) in arm_option_override () and
2310 is thus chosen to be big enough to hold the longest architecture name. */
2312 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2314 /* Supported TLS relocations. */
2316 enum tls_reloc {
2317 TLS_GD32,
2318 TLS_LDM32,
2319 TLS_LDO32,
2320 TLS_IE32,
2321 TLS_LE32,
2322 TLS_DESCSEQ /* GNU scheme */
2325 /* The maximum number of insns to be used when loading a constant. */
2326 inline static int
2327 arm_constant_limit (bool size_p)
2329 return size_p ? 1 : current_tune->constant_limit;
2332 /* Emit an insn that's a simple single-set. Both the operands must be known
2333 to be valid. */
2334 inline static rtx_insn *
2335 emit_set_insn (rtx x, rtx y)
2337 return emit_insn (gen_rtx_SET (x, y));
2340 /* Return the number of bits set in VALUE. */
2341 static unsigned
2342 bit_count (unsigned long value)
2344 unsigned long count = 0;
2346 while (value)
2348 count++;
2349 value &= value - 1; /* Clear the least-significant set bit. */
2352 return count;
2355 /* Return the number of bits set in BMAP. */
2356 static unsigned
2357 bitmap_popcount (const sbitmap bmap)
2359 unsigned int count = 0;
2360 unsigned int n = 0;
2361 sbitmap_iterator sbi;
2363 EXECUTE_IF_SET_IN_BITMAP (bmap, 0, n, sbi)
2364 count++;
2365 return count;
2368 typedef struct
2370 machine_mode mode;
2371 const char *name;
2372 } arm_fixed_mode_set;
2374 /* A small helper for setting fixed-point library libfuncs. */
2376 static void
2377 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2378 const char *funcname, const char *modename,
2379 int num_suffix)
2381 char buffer[50];
2383 if (num_suffix == 0)
2384 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2385 else
2386 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2388 set_optab_libfunc (optable, mode, buffer);
2391 static void
2392 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2393 machine_mode from, const char *funcname,
2394 const char *toname, const char *fromname)
2396 char buffer[50];
2397 const char *maybe_suffix_2 = "";
2399 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2400 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2401 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2402 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2403 maybe_suffix_2 = "2";
2405 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2406 maybe_suffix_2);
2408 set_conv_libfunc (optable, to, from, buffer);
2411 /* Set up library functions unique to ARM. */
2413 static void
2414 arm_init_libfuncs (void)
2416 /* For Linux, we have access to kernel support for atomic operations. */
2417 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2418 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2420 /* There are no special library functions unless we are using the
2421 ARM BPABI. */
2422 if (!TARGET_BPABI)
2423 return;
2425 /* The functions below are described in Section 4 of the "Run-Time
2426 ABI for the ARM architecture", Version 1.0. */
2428 /* Double-precision floating-point arithmetic. Table 2. */
2429 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2430 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2431 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2432 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2433 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2435 /* Double-precision comparisons. Table 3. */
2436 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2437 set_optab_libfunc (ne_optab, DFmode, NULL);
2438 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2439 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2440 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2441 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2442 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2444 /* Single-precision floating-point arithmetic. Table 4. */
2445 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2446 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2447 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2448 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2449 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2451 /* Single-precision comparisons. Table 5. */
2452 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2453 set_optab_libfunc (ne_optab, SFmode, NULL);
2454 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2455 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2456 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2457 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2458 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2460 /* Floating-point to integer conversions. Table 6. */
2461 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2462 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2463 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2464 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2465 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2466 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2467 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2468 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2470 /* Conversions between floating types. Table 7. */
2471 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2472 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2474 /* Integer to floating-point conversions. Table 8. */
2475 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2476 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2477 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2478 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2479 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2480 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2481 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2482 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2484 /* Long long. Table 9. */
2485 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2486 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2487 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2488 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2489 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2490 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2491 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2492 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2494 /* Integer (32/32->32) division. \S 4.3.1. */
2495 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2496 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2498 /* The divmod functions are designed so that they can be used for
2499 plain division, even though they return both the quotient and the
2500 remainder. The quotient is returned in the usual location (i.e.,
2501 r0 for SImode, {r0, r1} for DImode), just as would be expected
2502 for an ordinary division routine. Because the AAPCS calling
2503 conventions specify that all of { r0, r1, r2, r3 } are
2504 callee-saved registers, there is no need to tell the compiler
2505 explicitly that those registers are clobbered by these
2506 routines. */
2507 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2508 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2510 /* For SImode division the ABI provides div-without-mod routines,
2511 which are faster. */
2512 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2513 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2515 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2516 divmod libcalls instead. */
2517 set_optab_libfunc (smod_optab, DImode, NULL);
2518 set_optab_libfunc (umod_optab, DImode, NULL);
2519 set_optab_libfunc (smod_optab, SImode, NULL);
2520 set_optab_libfunc (umod_optab, SImode, NULL);
2522 /* Half-precision float operations. The compiler handles all operations
2523 with NULL libfuncs by converting the SFmode. */
2524 switch (arm_fp16_format)
2526 case ARM_FP16_FORMAT_IEEE:
2527 case ARM_FP16_FORMAT_ALTERNATIVE:
2529 /* Conversions. */
2530 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2531 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2532 ? "__gnu_f2h_ieee"
2533 : "__gnu_f2h_alternative"));
2534 set_conv_libfunc (sext_optab, SFmode, HFmode,
2535 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2536 ? "__gnu_h2f_ieee"
2537 : "__gnu_h2f_alternative"));
2539 set_conv_libfunc (trunc_optab, HFmode, DFmode,
2540 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2541 ? "__gnu_d2h_ieee"
2542 : "__gnu_d2h_alternative"));
2544 /* Arithmetic. */
2545 set_optab_libfunc (add_optab, HFmode, NULL);
2546 set_optab_libfunc (sdiv_optab, HFmode, NULL);
2547 set_optab_libfunc (smul_optab, HFmode, NULL);
2548 set_optab_libfunc (neg_optab, HFmode, NULL);
2549 set_optab_libfunc (sub_optab, HFmode, NULL);
2551 /* Comparisons. */
2552 set_optab_libfunc (eq_optab, HFmode, NULL);
2553 set_optab_libfunc (ne_optab, HFmode, NULL);
2554 set_optab_libfunc (lt_optab, HFmode, NULL);
2555 set_optab_libfunc (le_optab, HFmode, NULL);
2556 set_optab_libfunc (ge_optab, HFmode, NULL);
2557 set_optab_libfunc (gt_optab, HFmode, NULL);
2558 set_optab_libfunc (unord_optab, HFmode, NULL);
2559 break;
2561 default:
2562 break;
2565 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2567 const arm_fixed_mode_set fixed_arith_modes[] =
2569 { QQmode, "qq" },
2570 { UQQmode, "uqq" },
2571 { HQmode, "hq" },
2572 { UHQmode, "uhq" },
2573 { SQmode, "sq" },
2574 { USQmode, "usq" },
2575 { DQmode, "dq" },
2576 { UDQmode, "udq" },
2577 { TQmode, "tq" },
2578 { UTQmode, "utq" },
2579 { HAmode, "ha" },
2580 { UHAmode, "uha" },
2581 { SAmode, "sa" },
2582 { USAmode, "usa" },
2583 { DAmode, "da" },
2584 { UDAmode, "uda" },
2585 { TAmode, "ta" },
2586 { UTAmode, "uta" }
2588 const arm_fixed_mode_set fixed_conv_modes[] =
2590 { QQmode, "qq" },
2591 { UQQmode, "uqq" },
2592 { HQmode, "hq" },
2593 { UHQmode, "uhq" },
2594 { SQmode, "sq" },
2595 { USQmode, "usq" },
2596 { DQmode, "dq" },
2597 { UDQmode, "udq" },
2598 { TQmode, "tq" },
2599 { UTQmode, "utq" },
2600 { HAmode, "ha" },
2601 { UHAmode, "uha" },
2602 { SAmode, "sa" },
2603 { USAmode, "usa" },
2604 { DAmode, "da" },
2605 { UDAmode, "uda" },
2606 { TAmode, "ta" },
2607 { UTAmode, "uta" },
2608 { QImode, "qi" },
2609 { HImode, "hi" },
2610 { SImode, "si" },
2611 { DImode, "di" },
2612 { TImode, "ti" },
2613 { SFmode, "sf" },
2614 { DFmode, "df" }
2616 unsigned int i, j;
2618 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2620 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2621 "add", fixed_arith_modes[i].name, 3);
2622 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2623 "ssadd", fixed_arith_modes[i].name, 3);
2624 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2625 "usadd", fixed_arith_modes[i].name, 3);
2626 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2627 "sub", fixed_arith_modes[i].name, 3);
2628 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2629 "sssub", fixed_arith_modes[i].name, 3);
2630 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2631 "ussub", fixed_arith_modes[i].name, 3);
2632 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2633 "mul", fixed_arith_modes[i].name, 3);
2634 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2635 "ssmul", fixed_arith_modes[i].name, 3);
2636 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2637 "usmul", fixed_arith_modes[i].name, 3);
2638 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2639 "div", fixed_arith_modes[i].name, 3);
2640 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2641 "udiv", fixed_arith_modes[i].name, 3);
2642 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2643 "ssdiv", fixed_arith_modes[i].name, 3);
2644 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2645 "usdiv", fixed_arith_modes[i].name, 3);
2646 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2647 "neg", fixed_arith_modes[i].name, 2);
2648 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2649 "ssneg", fixed_arith_modes[i].name, 2);
2650 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2651 "usneg", fixed_arith_modes[i].name, 2);
2652 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2653 "ashl", fixed_arith_modes[i].name, 3);
2654 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2655 "ashr", fixed_arith_modes[i].name, 3);
2656 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2657 "lshr", fixed_arith_modes[i].name, 3);
2658 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2659 "ssashl", fixed_arith_modes[i].name, 3);
2660 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2661 "usashl", fixed_arith_modes[i].name, 3);
2662 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2663 "cmp", fixed_arith_modes[i].name, 2);
2666 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2667 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2669 if (i == j
2670 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2671 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2672 continue;
2674 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2675 fixed_conv_modes[j].mode, "fract",
2676 fixed_conv_modes[i].name,
2677 fixed_conv_modes[j].name);
2678 arm_set_fixed_conv_libfunc (satfract_optab,
2679 fixed_conv_modes[i].mode,
2680 fixed_conv_modes[j].mode, "satfract",
2681 fixed_conv_modes[i].name,
2682 fixed_conv_modes[j].name);
2683 arm_set_fixed_conv_libfunc (fractuns_optab,
2684 fixed_conv_modes[i].mode,
2685 fixed_conv_modes[j].mode, "fractuns",
2686 fixed_conv_modes[i].name,
2687 fixed_conv_modes[j].name);
2688 arm_set_fixed_conv_libfunc (satfractuns_optab,
2689 fixed_conv_modes[i].mode,
2690 fixed_conv_modes[j].mode, "satfractuns",
2691 fixed_conv_modes[i].name,
2692 fixed_conv_modes[j].name);
2696 if (TARGET_AAPCS_BASED)
2697 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2700 /* On AAPCS systems, this is the "struct __va_list". */
2701 static GTY(()) tree va_list_type;
2703 /* Return the type to use as __builtin_va_list. */
2704 static tree
2705 arm_build_builtin_va_list (void)
2707 tree va_list_name;
2708 tree ap_field;
2710 if (!TARGET_AAPCS_BASED)
2711 return std_build_builtin_va_list ();
2713 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2714 defined as:
2716 struct __va_list
2718 void *__ap;
2721 The C Library ABI further reinforces this definition in \S
2722 4.1.
2724 We must follow this definition exactly. The structure tag
2725 name is visible in C++ mangled names, and thus forms a part
2726 of the ABI. The field name may be used by people who
2727 #include <stdarg.h>. */
2728 /* Create the type. */
2729 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2730 /* Give it the required name. */
2731 va_list_name = build_decl (BUILTINS_LOCATION,
2732 TYPE_DECL,
2733 get_identifier ("__va_list"),
2734 va_list_type);
2735 DECL_ARTIFICIAL (va_list_name) = 1;
2736 TYPE_NAME (va_list_type) = va_list_name;
2737 TYPE_STUB_DECL (va_list_type) = va_list_name;
2738 /* Create the __ap field. */
2739 ap_field = build_decl (BUILTINS_LOCATION,
2740 FIELD_DECL,
2741 get_identifier ("__ap"),
2742 ptr_type_node);
2743 DECL_ARTIFICIAL (ap_field) = 1;
2744 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2745 TYPE_FIELDS (va_list_type) = ap_field;
2746 /* Compute its layout. */
2747 layout_type (va_list_type);
2749 return va_list_type;
2752 /* Return an expression of type "void *" pointing to the next
2753 available argument in a variable-argument list. VALIST is the
2754 user-level va_list object, of type __builtin_va_list. */
2755 static tree
2756 arm_extract_valist_ptr (tree valist)
2758 if (TREE_TYPE (valist) == error_mark_node)
2759 return error_mark_node;
2761 /* On an AAPCS target, the pointer is stored within "struct
2762 va_list". */
2763 if (TARGET_AAPCS_BASED)
2765 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2766 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2767 valist, ap_field, NULL_TREE);
2770 return valist;
2773 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2774 static void
2775 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2777 valist = arm_extract_valist_ptr (valist);
2778 std_expand_builtin_va_start (valist, nextarg);
2781 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2782 static tree
2783 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2784 gimple_seq *post_p)
2786 valist = arm_extract_valist_ptr (valist);
2787 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2790 /* Check any incompatible options that the user has specified. */
2791 static void
2792 arm_option_check_internal (struct gcc_options *opts)
2794 int flags = opts->x_target_flags;
2796 /* iWMMXt and NEON are incompatible. */
2797 if (TARGET_IWMMXT
2798 && bitmap_bit_p (arm_active_target.isa, isa_bit_neon))
2799 error ("iWMMXt and NEON are incompatible");
2801 /* Make sure that the processor choice does not conflict with any of the
2802 other command line choices. */
2803 if (TARGET_ARM_P (flags)
2804 && !bitmap_bit_p (arm_active_target.isa, isa_bit_notm))
2805 error ("target CPU does not support ARM mode");
2807 /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet. */
2808 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2809 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2811 if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2812 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2814 /* If this target is normally configured to use APCS frames, warn if they
2815 are turned off and debugging is turned on. */
2816 if (TARGET_ARM_P (flags)
2817 && write_symbols != NO_DEBUG
2818 && !TARGET_APCS_FRAME
2819 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2820 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2822 /* iWMMXt unsupported under Thumb mode. */
2823 if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2824 error ("iWMMXt unsupported under Thumb mode");
2826 if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
2827 error ("can not use -mtp=cp15 with 16-bit Thumb");
2829 if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
2831 error ("RTP PIC is incompatible with Thumb");
2832 flag_pic = 0;
2835 /* We only support -mslow-flash-data on armv7-m targets. */
2836 if (target_slow_flash_data
2837 && ((!(arm_arch7 && !arm_arch_notm) && !arm_arch7em)
2838 || (TARGET_THUMB1_P (flags) || flag_pic || TARGET_NEON)))
2839 error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
2841 /* We only support pure-code on Thumb-2 M-profile targets. */
2842 if (target_pure_code
2843 && (!arm_arch_thumb2 || arm_arch_notm || flag_pic || TARGET_NEON))
2844 error ("-mpure-code only supports non-pic code on armv7-m targets");
2848 /* Recompute the global settings depending on target attribute options. */
2850 static void
2851 arm_option_params_internal (void)
2853 /* If we are not using the default (ARM mode) section anchor offset
2854 ranges, then set the correct ranges now. */
2855 if (TARGET_THUMB1)
2857 /* Thumb-1 LDR instructions cannot have negative offsets.
2858 Permissible positive offset ranges are 5-bit (for byte loads),
2859 6-bit (for halfword loads), or 7-bit (for word loads).
2860 Empirical results suggest a 7-bit anchor range gives the best
2861 overall code size. */
2862 targetm.min_anchor_offset = 0;
2863 targetm.max_anchor_offset = 127;
2865 else if (TARGET_THUMB2)
2867 /* The minimum is set such that the total size of the block
2868 for a particular anchor is 248 + 1 + 4095 bytes, which is
2869 divisible by eight, ensuring natural spacing of anchors. */
2870 targetm.min_anchor_offset = -248;
2871 targetm.max_anchor_offset = 4095;
2873 else
2875 targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
2876 targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
2879 if (optimize_size)
2881 /* If optimizing for size, bump the number of instructions that we
2882 are prepared to conditionally execute (even on a StrongARM). */
2883 max_insns_skipped = 6;
2885 /* For THUMB2, we limit the conditional sequence to one IT block. */
2886 if (TARGET_THUMB2)
2887 max_insns_skipped = arm_restrict_it ? 1 : 4;
2889 else
2890 /* When -mrestrict-it is in use tone down the if-conversion. */
2891 max_insns_skipped = (TARGET_THUMB2 && arm_restrict_it)
2892 ? 1 : current_tune->max_insns_skipped;
2895 /* True if -mflip-thumb should next add an attribute for the default
2896 mode, false if it should next add an attribute for the opposite mode. */
2897 static GTY(()) bool thumb_flipper;
2899 /* Options after initial target override. */
2900 static GTY(()) tree init_optimize;
2902 static void
2903 arm_override_options_after_change_1 (struct gcc_options *opts)
2905 if (opts->x_align_functions <= 0)
2906 opts->x_align_functions = TARGET_THUMB_P (opts->x_target_flags)
2907 && opts->x_optimize_size ? 2 : 4;
2910 /* Implement targetm.override_options_after_change. */
2912 static void
2913 arm_override_options_after_change (void)
2915 arm_configure_build_target (&arm_active_target,
2916 TREE_TARGET_OPTION (target_option_default_node),
2917 &global_options_set, false);
2919 arm_override_options_after_change_1 (&global_options);
2922 static void
2923 arm_option_restore (struct gcc_options *, struct cl_target_option *ptr)
2925 arm_configure_build_target (&arm_active_target, ptr, &global_options_set,
2926 false);
2929 /* Reset options between modes that the user has specified. */
2930 static void
2931 arm_option_override_internal (struct gcc_options *opts,
2932 struct gcc_options *opts_set)
2934 arm_override_options_after_change_1 (opts);
2936 if (TARGET_INTERWORK && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
2938 /* The default is to enable interworking, so this warning message would
2939 be confusing to users who have just compiled with, eg, -march=armv3. */
2940 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
2941 opts->x_target_flags &= ~MASK_INTERWORK;
2944 if (TARGET_THUMB_P (opts->x_target_flags)
2945 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
2947 warning (0, "target CPU does not support THUMB instructions");
2948 opts->x_target_flags &= ~MASK_THUMB;
2951 if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
2953 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2954 opts->x_target_flags &= ~MASK_APCS_FRAME;
2957 /* Callee super interworking implies thumb interworking. Adding
2958 this to the flags here simplifies the logic elsewhere. */
2959 if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
2960 opts->x_target_flags |= MASK_INTERWORK;
2962 /* need to remember initial values so combinaisons of options like
2963 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
2964 cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
2966 if (! opts_set->x_arm_restrict_it)
2967 opts->x_arm_restrict_it = arm_arch8;
2969 /* ARM execution state and M profile don't have [restrict] IT. */
2970 if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
2971 opts->x_arm_restrict_it = 0;
2973 /* Enable -munaligned-access by default for
2974 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
2975 i.e. Thumb2 and ARM state only.
2976 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2977 - ARMv8 architecture-base processors.
2979 Disable -munaligned-access by default for
2980 - all pre-ARMv6 architecture-based processors
2981 - ARMv6-M architecture-based processors
2982 - ARMv8-M Baseline processors. */
2984 if (! opts_set->x_unaligned_access)
2986 opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
2987 && arm_arch6 && (arm_arch_notm || arm_arch7));
2989 else if (opts->x_unaligned_access == 1
2990 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
2992 warning (0, "target CPU does not support unaligned accesses");
2993 opts->x_unaligned_access = 0;
2996 /* Don't warn since it's on by default in -O2. */
2997 if (TARGET_THUMB1_P (opts->x_target_flags))
2998 opts->x_flag_schedule_insns = 0;
2999 else
3000 opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
3002 /* Disable shrink-wrap when optimizing function for size, since it tends to
3003 generate additional returns. */
3004 if (optimize_function_for_size_p (cfun)
3005 && TARGET_THUMB2_P (opts->x_target_flags))
3006 opts->x_flag_shrink_wrap = false;
3007 else
3008 opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
3010 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3011 - epilogue_insns - does not accurately model the corresponding insns
3012 emitted in the asm file. In particular, see the comment in thumb_exit
3013 'Find out how many of the (return) argument registers we can corrupt'.
3014 As a consequence, the epilogue may clobber registers without fipa-ra
3015 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3016 TODO: Accurately model clobbers for epilogue_insns and reenable
3017 fipa-ra. */
3018 if (TARGET_THUMB1_P (opts->x_target_flags))
3019 opts->x_flag_ipa_ra = 0;
3020 else
3021 opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3023 /* Thumb2 inline assembly code should always use unified syntax.
3024 This will apply to ARM and Thumb1 eventually. */
3025 opts->x_inline_asm_unified = TARGET_THUMB2_P (opts->x_target_flags);
3027 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3028 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3029 #endif
3032 /* Convert a static initializer array of feature bits to sbitmap
3033 representation. */
3034 static void
3035 arm_initialize_isa (sbitmap isa, const enum isa_feature *isa_bits)
3037 bitmap_clear (isa);
3038 while (*isa_bits != isa_nobit)
3039 bitmap_set_bit (isa, *(isa_bits++));
3042 static sbitmap isa_all_fpubits;
3043 static sbitmap isa_quirkbits;
3045 /* Configure a build target TARGET from the user-specified options OPTS and
3046 OPTS_SET. If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3047 architecture have been specified, but the two are not identical. */
3048 void
3049 arm_configure_build_target (struct arm_build_target *target,
3050 struct cl_target_option *opts,
3051 struct gcc_options *opts_set,
3052 bool warn_compatible)
3054 const struct processors *arm_selected_tune = NULL;
3055 const struct processors *arm_selected_arch = NULL;
3056 const struct processors *arm_selected_cpu = NULL;
3057 const struct arm_fpu_desc *arm_selected_fpu = NULL;
3059 bitmap_clear (target->isa);
3060 target->core_name = NULL;
3061 target->arch_name = NULL;
3063 if (opts_set->x_arm_arch_option)
3064 arm_selected_arch = &all_architectures[opts->x_arm_arch_option];
3066 if (opts_set->x_arm_cpu_option)
3068 arm_selected_cpu = &all_cores[(int) opts->x_arm_cpu_option];
3069 arm_selected_tune = &all_cores[(int) opts->x_arm_cpu_option];
3072 if (opts_set->x_arm_tune_option)
3073 arm_selected_tune = &all_cores[(int) opts->x_arm_tune_option];
3075 if (arm_selected_arch)
3077 arm_initialize_isa (target->isa, arm_selected_arch->isa_bits);
3079 if (arm_selected_cpu)
3081 auto_sbitmap cpu_isa (isa_num_bits);
3083 arm_initialize_isa (cpu_isa, arm_selected_cpu->isa_bits);
3084 bitmap_xor (cpu_isa, cpu_isa, target->isa);
3085 /* Ignore any bits that are quirk bits. */
3086 bitmap_and_compl (cpu_isa, cpu_isa, isa_quirkbits);
3087 /* Ignore (for now) any bits that might be set by -mfpu. */
3088 bitmap_and_compl (cpu_isa, cpu_isa, isa_all_fpubits);
3090 if (!bitmap_empty_p (cpu_isa))
3092 if (warn_compatible)
3093 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
3094 arm_selected_cpu->name, arm_selected_arch->name);
3095 /* -march wins for code generation.
3096 -mcpu wins for default tuning. */
3097 if (!arm_selected_tune)
3098 arm_selected_tune = arm_selected_cpu;
3100 arm_selected_cpu = arm_selected_arch;
3102 else
3104 /* Architecture and CPU are essentially the same.
3105 Prefer the CPU setting. */
3106 arm_selected_arch = NULL;
3109 target->core_name = arm_selected_cpu->name;
3111 else
3113 /* Pick a CPU based on the architecture. */
3114 arm_selected_cpu = arm_selected_arch;
3115 target->arch_name = arm_selected_arch->name;
3116 /* Note: target->core_name is left unset in this path. */
3119 else if (arm_selected_cpu)
3121 target->core_name = arm_selected_cpu->name;
3122 arm_initialize_isa (target->isa, arm_selected_cpu->isa_bits);
3124 /* If the user did not specify a processor, choose one for them. */
3125 else
3127 const struct processors * sel;
3128 auto_sbitmap sought_isa (isa_num_bits);
3129 bitmap_clear (sought_isa);
3130 auto_sbitmap default_isa (isa_num_bits);
3132 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
3133 gcc_assert (arm_selected_cpu->name);
3135 /* RWE: All of the selection logic below (to the end of this
3136 'if' clause) looks somewhat suspect. It appears to be mostly
3137 there to support forcing thumb support when the default CPU
3138 does not have thumb (somewhat dubious in terms of what the
3139 user might be expecting). I think it should be removed once
3140 support for the pre-thumb era cores is removed. */
3141 sel = arm_selected_cpu;
3142 arm_initialize_isa (default_isa, sel->isa_bits);
3144 /* Now check to see if the user has specified any command line
3145 switches that require certain abilities from the cpu. */
3147 if (TARGET_INTERWORK || TARGET_THUMB)
3149 bitmap_set_bit (sought_isa, isa_bit_thumb);
3150 bitmap_set_bit (sought_isa, isa_bit_mode32);
3152 /* There are no ARM processors that support both APCS-26 and
3153 interworking. Therefore we forcibly remove MODE26 from
3154 from the isa features here (if it was set), so that the
3155 search below will always be able to find a compatible
3156 processor. */
3157 bitmap_clear_bit (default_isa, isa_bit_mode26);
3160 /* If there are such requirements and the default CPU does not
3161 satisfy them, we need to run over the complete list of
3162 cores looking for one that is satisfactory. */
3163 if (!bitmap_empty_p (sought_isa)
3164 && !bitmap_subset_p (sought_isa, default_isa))
3166 auto_sbitmap candidate_isa (isa_num_bits);
3167 /* We're only interested in a CPU with at least the
3168 capabilities of the default CPU and the required
3169 additional features. */
3170 bitmap_ior (default_isa, default_isa, sought_isa);
3172 /* Try to locate a CPU type that supports all of the abilities
3173 of the default CPU, plus the extra abilities requested by
3174 the user. */
3175 for (sel = all_cores; sel->name != NULL; sel++)
3177 arm_initialize_isa (candidate_isa, sel->isa_bits);
3178 /* An exact match? */
3179 if (bitmap_equal_p (default_isa, candidate_isa))
3180 break;
3183 if (sel->name == NULL)
3185 unsigned current_bit_count = isa_num_bits;
3186 const struct processors * best_fit = NULL;
3188 /* Ideally we would like to issue an error message here
3189 saying that it was not possible to find a CPU compatible
3190 with the default CPU, but which also supports the command
3191 line options specified by the programmer, and so they
3192 ought to use the -mcpu=<name> command line option to
3193 override the default CPU type.
3195 If we cannot find a CPU that has exactly the
3196 characteristics of the default CPU and the given
3197 command line options we scan the array again looking
3198 for a best match. The best match must have at least
3199 the capabilities of the perfect match. */
3200 for (sel = all_cores; sel->name != NULL; sel++)
3202 arm_initialize_isa (candidate_isa, sel->isa_bits);
3204 if (bitmap_subset_p (default_isa, candidate_isa))
3206 unsigned count;
3208 bitmap_and_compl (candidate_isa, candidate_isa,
3209 default_isa);
3210 count = bitmap_popcount (candidate_isa);
3212 if (count < current_bit_count)
3214 best_fit = sel;
3215 current_bit_count = count;
3219 gcc_assert (best_fit);
3220 sel = best_fit;
3223 arm_selected_cpu = sel;
3226 /* Now we know the CPU, we can finally initialize the target
3227 structure. */
3228 target->core_name = arm_selected_cpu->name;
3229 arm_initialize_isa (target->isa, arm_selected_cpu->isa_bits);
3232 gcc_assert (arm_selected_cpu);
3234 if (opts->x_arm_fpu_index != TARGET_FPU_auto)
3236 arm_selected_fpu = &all_fpus[opts->x_arm_fpu_index];
3237 auto_sbitmap fpu_bits (isa_num_bits);
3239 arm_initialize_isa (fpu_bits, arm_selected_fpu->isa_bits);
3240 bitmap_and_compl (target->isa, target->isa, isa_all_fpubits);
3241 bitmap_ior (target->isa, target->isa, fpu_bits);
3243 else if (target->core_name == NULL)
3244 /* To support this we need to be able to parse FPU feature options
3245 from the architecture string. */
3246 sorry ("-mfpu=auto not currently supported without an explicit CPU.");
3248 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
3249 if (!arm_selected_tune)
3250 arm_selected_tune = &all_cores[arm_selected_cpu->core];
3252 /* Finish initializing the target structure. */
3253 target->arch_pp_name = arm_selected_cpu->arch;
3254 target->base_arch = arm_selected_cpu->base_arch;
3255 target->arch_core = arm_selected_cpu->core;
3257 target->tune_flags = arm_selected_tune->tune_flags;
3258 target->tune = arm_selected_tune->tune;
3259 target->tune_core = arm_selected_tune->core;
3262 /* Fix up any incompatible options that the user has specified. */
3263 static void
3264 arm_option_override (void)
3266 static const enum isa_feature fpu_bitlist[] = { ISA_ALL_FPU, isa_nobit };
3267 static const enum isa_feature quirk_bitlist[] = { ISA_ALL_QUIRKS, isa_nobit};
3268 cl_target_option opts;
3270 isa_quirkbits = sbitmap_alloc (isa_num_bits);
3271 arm_initialize_isa (isa_quirkbits, quirk_bitlist);
3273 isa_all_fpubits = sbitmap_alloc (isa_num_bits);
3274 arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
3276 arm_active_target.isa = sbitmap_alloc (isa_num_bits);
3278 if (!global_options_set.x_arm_fpu_index)
3280 const char *target_fpu_name;
3281 bool ok;
3282 int fpu_index;
3284 #ifdef FPUTYPE_DEFAULT
3285 target_fpu_name = FPUTYPE_DEFAULT;
3286 #else
3287 target_fpu_name = "vfp";
3288 #endif
3290 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &fpu_index,
3291 CL_TARGET);
3292 gcc_assert (ok);
3293 arm_fpu_index = (enum fpu_type) fpu_index;
3296 cl_target_option_save (&opts, &global_options);
3297 arm_configure_build_target (&arm_active_target, &opts, &global_options_set,
3298 true);
3300 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3301 SUBTARGET_OVERRIDE_OPTIONS;
3302 #endif
3304 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_active_target.arch_pp_name);
3305 arm_base_arch = arm_active_target.base_arch;
3307 arm_tune = arm_active_target.tune_core;
3308 tune_flags = arm_active_target.tune_flags;
3309 current_tune = arm_active_target.tune;
3311 /* TBD: Dwarf info for apcs frame is not handled yet. */
3312 if (TARGET_APCS_FRAME)
3313 flag_shrink_wrap = false;
3315 /* BPABI targets use linker tricks to allow interworking on cores
3316 without thumb support. */
3317 if (TARGET_INTERWORK
3318 && !TARGET_BPABI
3319 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3321 warning (0, "target CPU does not support interworking" );
3322 target_flags &= ~MASK_INTERWORK;
3325 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3327 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3328 target_flags |= MASK_APCS_FRAME;
3331 if (TARGET_POKE_FUNCTION_NAME)
3332 target_flags |= MASK_APCS_FRAME;
3334 if (TARGET_APCS_REENT && flag_pic)
3335 error ("-fpic and -mapcs-reent are incompatible");
3337 if (TARGET_APCS_REENT)
3338 warning (0, "APCS reentrant code not supported. Ignored");
3340 /* Initialize boolean versions of the architectural flags, for use
3341 in the arm.md file. */
3342 arm_arch3m = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv3m);
3343 arm_arch4 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv4);
3344 arm_arch4t = arm_arch4 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3345 arm_arch5 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv5);
3346 arm_arch5e = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv5e);
3347 arm_arch5te = arm_arch5e
3348 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3349 arm_arch6 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv6);
3350 arm_arch6k = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv6k);
3351 arm_arch_notm = bitmap_bit_p (arm_active_target.isa, isa_bit_notm);
3352 arm_arch6m = arm_arch6 && !arm_arch_notm;
3353 arm_arch7 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv7);
3354 arm_arch7em = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv7em);
3355 arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv8);
3356 arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv8_1);
3357 arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv8_2);
3358 arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3359 arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
3360 arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
3361 arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt);
3362 arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2);
3363 arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv);
3364 arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
3365 arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
3366 arm_arch_cmse = bitmap_bit_p (arm_active_target.isa, isa_bit_cmse);
3367 arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16);
3368 arm_arch_lpae = bitmap_bit_p (arm_active_target.isa, isa_bit_lpae);
3369 if (arm_fp16_inst)
3371 if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
3372 error ("selected fp16 options are incompatible");
3373 arm_fp16_format = ARM_FP16_FORMAT_IEEE;
3377 /* Set up some tuning parameters. */
3378 arm_ld_sched = (tune_flags & TF_LDSCHED) != 0;
3379 arm_tune_strongarm = (tune_flags & TF_STRONG) != 0;
3380 arm_tune_wbuf = (tune_flags & TF_WBUF) != 0;
3381 arm_tune_xscale = (tune_flags & TF_XSCALE) != 0;
3382 arm_tune_cortex_a9 = (arm_tune == TARGET_CPU_cortexa9) != 0;
3383 arm_m_profile_small_mul = (tune_flags & TF_SMALLMUL) != 0;
3385 /* And finally, set up some quirks. */
3386 arm_arch_no_volatile_ce
3387 = bitmap_bit_p (arm_active_target.isa, isa_quirk_no_volatile_ce);
3388 arm_arch6kz
3389 = arm_arch6k && bitmap_bit_p (arm_active_target.isa, isa_quirk_ARMv6kz);
3391 /* V5 code we generate is completely interworking capable, so we turn off
3392 TARGET_INTERWORK here to avoid many tests later on. */
3394 /* XXX However, we must pass the right pre-processor defines to CPP
3395 or GLD can get confused. This is a hack. */
3396 if (TARGET_INTERWORK)
3397 arm_cpp_interwork = 1;
3399 if (arm_arch5)
3400 target_flags &= ~MASK_INTERWORK;
3402 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3403 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3405 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3406 error ("iwmmxt abi requires an iwmmxt capable cpu");
3408 /* If soft-float is specified then don't use FPU. */
3409 if (TARGET_SOFT_FLOAT)
3410 arm_fpu_attr = FPU_NONE;
3411 else
3412 arm_fpu_attr = FPU_VFP;
3414 if (TARGET_AAPCS_BASED)
3416 if (TARGET_CALLER_INTERWORKING)
3417 error ("AAPCS does not support -mcaller-super-interworking");
3418 else
3419 if (TARGET_CALLEE_INTERWORKING)
3420 error ("AAPCS does not support -mcallee-super-interworking");
3423 /* __fp16 support currently assumes the core has ldrh. */
3424 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3425 sorry ("__fp16 and no ldrh");
3427 if (TARGET_AAPCS_BASED)
3429 if (arm_abi == ARM_ABI_IWMMXT)
3430 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3431 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
3432 && TARGET_HARD_FLOAT)
3434 arm_pcs_default = ARM_PCS_AAPCS_VFP;
3435 if (!bitmap_bit_p (arm_active_target.isa, isa_bit_VFPv2))
3436 error ("-mfloat-abi=hard: selected processor lacks an FPU");
3438 else
3439 arm_pcs_default = ARM_PCS_AAPCS;
3441 else
3443 if (arm_float_abi == ARM_FLOAT_ABI_HARD)
3444 sorry ("-mfloat-abi=hard and VFP");
3446 if (arm_abi == ARM_ABI_APCS)
3447 arm_pcs_default = ARM_PCS_APCS;
3448 else
3449 arm_pcs_default = ARM_PCS_ATPCS;
3452 /* For arm2/3 there is no need to do any scheduling if we are doing
3453 software floating-point. */
3454 if (TARGET_SOFT_FLOAT && (tune_flags & TF_NO_MODE32))
3455 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3457 /* Use the cp15 method if it is available. */
3458 if (target_thread_pointer == TP_AUTO)
3460 if (arm_arch6k && !TARGET_THUMB1)
3461 target_thread_pointer = TP_CP15;
3462 else
3463 target_thread_pointer = TP_SOFT;
3466 /* Override the default structure alignment for AAPCS ABI. */
3467 if (!global_options_set.x_arm_structure_size_boundary)
3469 if (TARGET_AAPCS_BASED)
3470 arm_structure_size_boundary = 8;
3472 else
3474 if (arm_structure_size_boundary != 8
3475 && arm_structure_size_boundary != 32
3476 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3478 if (ARM_DOUBLEWORD_ALIGN)
3479 warning (0,
3480 "structure size boundary can only be set to 8, 32 or 64");
3481 else
3482 warning (0, "structure size boundary can only be set to 8 or 32");
3483 arm_structure_size_boundary
3484 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3488 if (TARGET_VXWORKS_RTP)
3490 if (!global_options_set.x_arm_pic_data_is_text_relative)
3491 arm_pic_data_is_text_relative = 0;
3493 else if (flag_pic
3494 && !arm_pic_data_is_text_relative
3495 && !(global_options_set.x_target_flags & MASK_SINGLE_PIC_BASE))
3496 /* When text & data segments don't have a fixed displacement, the
3497 intended use is with a single, read only, pic base register.
3498 Unless the user explicitly requested not to do that, set
3499 it. */
3500 target_flags |= MASK_SINGLE_PIC_BASE;
3502 /* If stack checking is disabled, we can use r10 as the PIC register,
3503 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3504 if (flag_pic && TARGET_SINGLE_PIC_BASE)
3506 if (TARGET_VXWORKS_RTP)
3507 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3508 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3511 if (flag_pic && TARGET_VXWORKS_RTP)
3512 arm_pic_register = 9;
3514 if (arm_pic_register_string != NULL)
3516 int pic_register = decode_reg_name (arm_pic_register_string);
3518 if (!flag_pic)
3519 warning (0, "-mpic-register= is useless without -fpic");
3521 /* Prevent the user from choosing an obviously stupid PIC register. */
3522 else if (pic_register < 0 || call_used_regs[pic_register]
3523 || pic_register == HARD_FRAME_POINTER_REGNUM
3524 || pic_register == STACK_POINTER_REGNUM
3525 || pic_register >= PC_REGNUM
3526 || (TARGET_VXWORKS_RTP
3527 && (unsigned int) pic_register != arm_pic_register))
3528 error ("unable to use '%s' for PIC register", arm_pic_register_string);
3529 else
3530 arm_pic_register = pic_register;
3533 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3534 if (fix_cm3_ldrd == 2)
3536 if (bitmap_bit_p (arm_active_target.isa, isa_quirk_cm3_ldrd))
3537 fix_cm3_ldrd = 1;
3538 else
3539 fix_cm3_ldrd = 0;
3542 /* Hot/Cold partitioning is not currently supported, since we can't
3543 handle literal pool placement in that case. */
3544 if (flag_reorder_blocks_and_partition)
3546 inform (input_location,
3547 "-freorder-blocks-and-partition not supported on this architecture");
3548 flag_reorder_blocks_and_partition = 0;
3549 flag_reorder_blocks = 1;
3552 if (flag_pic)
3553 /* Hoisting PIC address calculations more aggressively provides a small,
3554 but measurable, size reduction for PIC code. Therefore, we decrease
3555 the bar for unrestricted expression hoisting to the cost of PIC address
3556 calculation, which is 2 instructions. */
3557 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3558 global_options.x_param_values,
3559 global_options_set.x_param_values);
3561 /* ARM EABI defaults to strict volatile bitfields. */
3562 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3563 && abi_version_at_least(2))
3564 flag_strict_volatile_bitfields = 1;
3566 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3567 have deemed it beneficial (signified by setting
3568 prefetch.num_slots to 1 or more). */
3569 if (flag_prefetch_loop_arrays < 0
3570 && HAVE_prefetch
3571 && optimize >= 3
3572 && current_tune->prefetch.num_slots > 0)
3573 flag_prefetch_loop_arrays = 1;
3575 /* Set up parameters to be used in prefetching algorithm. Do not
3576 override the defaults unless we are tuning for a core we have
3577 researched values for. */
3578 if (current_tune->prefetch.num_slots > 0)
3579 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3580 current_tune->prefetch.num_slots,
3581 global_options.x_param_values,
3582 global_options_set.x_param_values);
3583 if (current_tune->prefetch.l1_cache_line_size >= 0)
3584 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3585 current_tune->prefetch.l1_cache_line_size,
3586 global_options.x_param_values,
3587 global_options_set.x_param_values);
3588 if (current_tune->prefetch.l1_cache_size >= 0)
3589 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3590 current_tune->prefetch.l1_cache_size,
3591 global_options.x_param_values,
3592 global_options_set.x_param_values);
3594 /* Use Neon to perform 64-bits operations rather than core
3595 registers. */
3596 prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3597 if (use_neon_for_64bits == 1)
3598 prefer_neon_for_64bits = true;
3600 /* Use the alternative scheduling-pressure algorithm by default. */
3601 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3602 global_options.x_param_values,
3603 global_options_set.x_param_values);
3605 /* Look through ready list and all of queue for instructions
3606 relevant for L2 auto-prefetcher. */
3607 int param_sched_autopref_queue_depth;
3609 switch (current_tune->sched_autopref)
3611 case tune_params::SCHED_AUTOPREF_OFF:
3612 param_sched_autopref_queue_depth = -1;
3613 break;
3615 case tune_params::SCHED_AUTOPREF_RANK:
3616 param_sched_autopref_queue_depth = 0;
3617 break;
3619 case tune_params::SCHED_AUTOPREF_FULL:
3620 param_sched_autopref_queue_depth = max_insn_queue_index + 1;
3621 break;
3623 default:
3624 gcc_unreachable ();
3627 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
3628 param_sched_autopref_queue_depth,
3629 global_options.x_param_values,
3630 global_options_set.x_param_values);
3632 /* Currently, for slow flash data, we just disable literal pools. We also
3633 disable it for pure-code. */
3634 if (target_slow_flash_data || target_pure_code)
3635 arm_disable_literal_pool = true;
3637 if (use_cmse && !arm_arch_cmse)
3638 error ("target CPU does not support ARMv8-M Security Extensions");
3640 /* Disable scheduling fusion by default if it's not armv7 processor
3641 or doesn't prefer ldrd/strd. */
3642 if (flag_schedule_fusion == 2
3643 && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3644 flag_schedule_fusion = 0;
3646 /* Need to remember initial options before they are overriden. */
3647 init_optimize = build_optimization_node (&global_options);
3649 arm_option_override_internal (&global_options, &global_options_set);
3650 arm_option_check_internal (&global_options);
3651 arm_option_params_internal ();
3653 /* Create the default target_options structure. */
3654 target_option_default_node = target_option_current_node
3655 = build_target_option_node (&global_options);
3657 /* Register global variables with the garbage collector. */
3658 arm_add_gc_roots ();
3660 /* Init initial mode for testing. */
3661 thumb_flipper = TARGET_THUMB;
3664 static void
3665 arm_add_gc_roots (void)
3667 gcc_obstack_init(&minipool_obstack);
3668 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3671 /* A table of known ARM exception types.
3672 For use with the interrupt function attribute. */
3674 typedef struct
3676 const char *const arg;
3677 const unsigned long return_value;
3679 isr_attribute_arg;
3681 static const isr_attribute_arg isr_attribute_args [] =
3683 { "IRQ", ARM_FT_ISR },
3684 { "irq", ARM_FT_ISR },
3685 { "FIQ", ARM_FT_FIQ },
3686 { "fiq", ARM_FT_FIQ },
3687 { "ABORT", ARM_FT_ISR },
3688 { "abort", ARM_FT_ISR },
3689 { "ABORT", ARM_FT_ISR },
3690 { "abort", ARM_FT_ISR },
3691 { "UNDEF", ARM_FT_EXCEPTION },
3692 { "undef", ARM_FT_EXCEPTION },
3693 { "SWI", ARM_FT_EXCEPTION },
3694 { "swi", ARM_FT_EXCEPTION },
3695 { NULL, ARM_FT_NORMAL }
3698 /* Returns the (interrupt) function type of the current
3699 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3701 static unsigned long
3702 arm_isr_value (tree argument)
3704 const isr_attribute_arg * ptr;
3705 const char * arg;
3707 if (!arm_arch_notm)
3708 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3710 /* No argument - default to IRQ. */
3711 if (argument == NULL_TREE)
3712 return ARM_FT_ISR;
3714 /* Get the value of the argument. */
3715 if (TREE_VALUE (argument) == NULL_TREE
3716 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3717 return ARM_FT_UNKNOWN;
3719 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3721 /* Check it against the list of known arguments. */
3722 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3723 if (streq (arg, ptr->arg))
3724 return ptr->return_value;
3726 /* An unrecognized interrupt type. */
3727 return ARM_FT_UNKNOWN;
3730 /* Computes the type of the current function. */
3732 static unsigned long
3733 arm_compute_func_type (void)
3735 unsigned long type = ARM_FT_UNKNOWN;
3736 tree a;
3737 tree attr;
3739 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3741 /* Decide if the current function is volatile. Such functions
3742 never return, and many memory cycles can be saved by not storing
3743 register values that will never be needed again. This optimization
3744 was added to speed up context switching in a kernel application. */
3745 if (optimize > 0
3746 && (TREE_NOTHROW (current_function_decl)
3747 || !(flag_unwind_tables
3748 || (flag_exceptions
3749 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3750 && TREE_THIS_VOLATILE (current_function_decl))
3751 type |= ARM_FT_VOLATILE;
3753 if (cfun->static_chain_decl != NULL)
3754 type |= ARM_FT_NESTED;
3756 attr = DECL_ATTRIBUTES (current_function_decl);
3758 a = lookup_attribute ("naked", attr);
3759 if (a != NULL_TREE)
3760 type |= ARM_FT_NAKED;
3762 a = lookup_attribute ("isr", attr);
3763 if (a == NULL_TREE)
3764 a = lookup_attribute ("interrupt", attr);
3766 if (a == NULL_TREE)
3767 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3768 else
3769 type |= arm_isr_value (TREE_VALUE (a));
3771 if (lookup_attribute ("cmse_nonsecure_entry", attr))
3772 type |= ARM_FT_CMSE_ENTRY;
3774 return type;
3777 /* Returns the type of the current function. */
3779 unsigned long
3780 arm_current_func_type (void)
3782 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3783 cfun->machine->func_type = arm_compute_func_type ();
3785 return cfun->machine->func_type;
3788 bool
3789 arm_allocate_stack_slots_for_args (void)
3791 /* Naked functions should not allocate stack slots for arguments. */
3792 return !IS_NAKED (arm_current_func_type ());
3795 static bool
3796 arm_warn_func_return (tree decl)
3798 /* Naked functions are implemented entirely in assembly, including the
3799 return sequence, so suppress warnings about this. */
3800 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3804 /* Output assembler code for a block containing the constant parts
3805 of a trampoline, leaving space for the variable parts.
3807 On the ARM, (if r8 is the static chain regnum, and remembering that
3808 referencing pc adds an offset of 8) the trampoline looks like:
3809 ldr r8, [pc, #0]
3810 ldr pc, [pc]
3811 .word static chain value
3812 .word function's address
3813 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3815 static void
3816 arm_asm_trampoline_template (FILE *f)
3818 fprintf (f, "\t.syntax unified\n");
3820 if (TARGET_ARM)
3822 fprintf (f, "\t.arm\n");
3823 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3824 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3826 else if (TARGET_THUMB2)
3828 fprintf (f, "\t.thumb\n");
3829 /* The Thumb-2 trampoline is similar to the arm implementation.
3830 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3831 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3832 STATIC_CHAIN_REGNUM, PC_REGNUM);
3833 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3835 else
3837 ASM_OUTPUT_ALIGN (f, 2);
3838 fprintf (f, "\t.code\t16\n");
3839 fprintf (f, ".Ltrampoline_start:\n");
3840 asm_fprintf (f, "\tpush\t{r0, r1}\n");
3841 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3842 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3843 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3844 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3845 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3847 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3848 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3851 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3853 static void
3854 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3856 rtx fnaddr, mem, a_tramp;
3858 emit_block_move (m_tramp, assemble_trampoline_template (),
3859 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3861 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3862 emit_move_insn (mem, chain_value);
3864 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3865 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3866 emit_move_insn (mem, fnaddr);
3868 a_tramp = XEXP (m_tramp, 0);
3869 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3870 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
3871 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3874 /* Thumb trampolines should be entered in thumb mode, so set
3875 the bottom bit of the address. */
3877 static rtx
3878 arm_trampoline_adjust_address (rtx addr)
3880 if (TARGET_THUMB)
3881 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3882 NULL, 0, OPTAB_LIB_WIDEN);
3883 return addr;
3886 /* Return 1 if it is possible to return using a single instruction.
3887 If SIBLING is non-null, this is a test for a return before a sibling
3888 call. SIBLING is the call insn, so we can examine its register usage. */
3891 use_return_insn (int iscond, rtx sibling)
3893 int regno;
3894 unsigned int func_type;
3895 unsigned long saved_int_regs;
3896 unsigned HOST_WIDE_INT stack_adjust;
3897 arm_stack_offsets *offsets;
3899 /* Never use a return instruction before reload has run. */
3900 if (!reload_completed)
3901 return 0;
3903 func_type = arm_current_func_type ();
3905 /* Naked, volatile and stack alignment functions need special
3906 consideration. */
3907 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3908 return 0;
3910 /* So do interrupt functions that use the frame pointer and Thumb
3911 interrupt functions. */
3912 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3913 return 0;
3915 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3916 && !optimize_function_for_size_p (cfun))
3917 return 0;
3919 offsets = arm_get_frame_offsets ();
3920 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3922 /* As do variadic functions. */
3923 if (crtl->args.pretend_args_size
3924 || cfun->machine->uses_anonymous_args
3925 /* Or if the function calls __builtin_eh_return () */
3926 || crtl->calls_eh_return
3927 /* Or if the function calls alloca */
3928 || cfun->calls_alloca
3929 /* Or if there is a stack adjustment. However, if the stack pointer
3930 is saved on the stack, we can use a pre-incrementing stack load. */
3931 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3932 && stack_adjust == 4))
3933 /* Or if the static chain register was saved above the frame, under the
3934 assumption that the stack pointer isn't saved on the stack. */
3935 || (!(TARGET_APCS_FRAME && frame_pointer_needed)
3936 && arm_compute_static_chain_stack_bytes() != 0))
3937 return 0;
3939 saved_int_regs = offsets->saved_regs_mask;
3941 /* Unfortunately, the insn
3943 ldmib sp, {..., sp, ...}
3945 triggers a bug on most SA-110 based devices, such that the stack
3946 pointer won't be correctly restored if the instruction takes a
3947 page fault. We work around this problem by popping r3 along with
3948 the other registers, since that is never slower than executing
3949 another instruction.
3951 We test for !arm_arch5 here, because code for any architecture
3952 less than this could potentially be run on one of the buggy
3953 chips. */
3954 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3956 /* Validate that r3 is a call-clobbered register (always true in
3957 the default abi) ... */
3958 if (!call_used_regs[3])
3959 return 0;
3961 /* ... that it isn't being used for a return value ... */
3962 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
3963 return 0;
3965 /* ... or for a tail-call argument ... */
3966 if (sibling)
3968 gcc_assert (CALL_P (sibling));
3970 if (find_regno_fusage (sibling, USE, 3))
3971 return 0;
3974 /* ... and that there are no call-saved registers in r0-r2
3975 (always true in the default ABI). */
3976 if (saved_int_regs & 0x7)
3977 return 0;
3980 /* Can't be done if interworking with Thumb, and any registers have been
3981 stacked. */
3982 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
3983 return 0;
3985 /* On StrongARM, conditional returns are expensive if they aren't
3986 taken and multiple registers have been stacked. */
3987 if (iscond && arm_tune_strongarm)
3989 /* Conditional return when just the LR is stored is a simple
3990 conditional-load instruction, that's not expensive. */
3991 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
3992 return 0;
3994 if (flag_pic
3995 && arm_pic_register != INVALID_REGNUM
3996 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
3997 return 0;
4000 /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4001 several instructions if anything needs to be popped. */
4002 if (saved_int_regs && IS_CMSE_ENTRY (func_type))
4003 return 0;
4005 /* If there are saved registers but the LR isn't saved, then we need
4006 two instructions for the return. */
4007 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
4008 return 0;
4010 /* Can't be done if any of the VFP regs are pushed,
4011 since this also requires an insn. */
4012 if (TARGET_HARD_FLOAT)
4013 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
4014 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
4015 return 0;
4017 if (TARGET_REALLY_IWMMXT)
4018 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
4019 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
4020 return 0;
4022 return 1;
4025 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4026 shrink-wrapping if possible. This is the case if we need to emit a
4027 prologue, which we can test by looking at the offsets. */
4028 bool
4029 use_simple_return_p (void)
4031 arm_stack_offsets *offsets;
4033 offsets = arm_get_frame_offsets ();
4034 return offsets->outgoing_args != 0;
4037 /* Return TRUE if int I is a valid immediate ARM constant. */
4040 const_ok_for_arm (HOST_WIDE_INT i)
4042 int lowbit;
4044 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4045 be all zero, or all one. */
4046 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
4047 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
4048 != ((~(unsigned HOST_WIDE_INT) 0)
4049 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
4050 return FALSE;
4052 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
4054 /* Fast return for 0 and small values. We must do this for zero, since
4055 the code below can't handle that one case. */
4056 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
4057 return TRUE;
4059 /* Get the number of trailing zeros. */
4060 lowbit = ffs((int) i) - 1;
4062 /* Only even shifts are allowed in ARM mode so round down to the
4063 nearest even number. */
4064 if (TARGET_ARM)
4065 lowbit &= ~1;
4067 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
4068 return TRUE;
4070 if (TARGET_ARM)
4072 /* Allow rotated constants in ARM mode. */
4073 if (lowbit <= 4
4074 && ((i & ~0xc000003f) == 0
4075 || (i & ~0xf000000f) == 0
4076 || (i & ~0xfc000003) == 0))
4077 return TRUE;
4079 else
4081 HOST_WIDE_INT v;
4083 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
4084 v = i & 0xff;
4085 v |= v << 16;
4086 if (i == v || i == (v | (v << 8)))
4087 return TRUE;
4089 /* Allow repeated pattern 0xXY00XY00. */
4090 v = i & 0xff00;
4091 v |= v << 16;
4092 if (i == v)
4093 return TRUE;
4096 return FALSE;
4099 /* Return true if I is a valid constant for the operation CODE. */
4101 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
4103 if (const_ok_for_arm (i))
4104 return 1;
4106 switch (code)
4108 case SET:
4109 /* See if we can use movw. */
4110 if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
4111 return 1;
4112 else
4113 /* Otherwise, try mvn. */
4114 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4116 case PLUS:
4117 /* See if we can use addw or subw. */
4118 if (TARGET_THUMB2
4119 && ((i & 0xfffff000) == 0
4120 || ((-i) & 0xfffff000) == 0))
4121 return 1;
4122 /* Fall through. */
4123 case COMPARE:
4124 case EQ:
4125 case NE:
4126 case GT:
4127 case LE:
4128 case LT:
4129 case GE:
4130 case GEU:
4131 case LTU:
4132 case GTU:
4133 case LEU:
4134 case UNORDERED:
4135 case ORDERED:
4136 case UNEQ:
4137 case UNGE:
4138 case UNLT:
4139 case UNGT:
4140 case UNLE:
4141 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
4143 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
4144 case XOR:
4145 return 0;
4147 case IOR:
4148 if (TARGET_THUMB2)
4149 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4150 return 0;
4152 case AND:
4153 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4155 default:
4156 gcc_unreachable ();
4160 /* Return true if I is a valid di mode constant for the operation CODE. */
4162 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4164 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4165 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4166 rtx hi = GEN_INT (hi_val);
4167 rtx lo = GEN_INT (lo_val);
4169 if (TARGET_THUMB1)
4170 return 0;
4172 switch (code)
4174 case AND:
4175 case IOR:
4176 case XOR:
4177 return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
4178 && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
4179 case PLUS:
4180 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4182 default:
4183 return 0;
4187 /* Emit a sequence of insns to handle a large constant.
4188 CODE is the code of the operation required, it can be any of SET, PLUS,
4189 IOR, AND, XOR, MINUS;
4190 MODE is the mode in which the operation is being performed;
4191 VAL is the integer to operate on;
4192 SOURCE is the other operand (a register, or a null-pointer for SET);
4193 SUBTARGETS means it is safe to create scratch registers if that will
4194 either produce a simpler sequence, or we will want to cse the values.
4195 Return value is the number of insns emitted. */
4197 /* ??? Tweak this for thumb2. */
4199 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4200 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4202 rtx cond;
4204 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4205 cond = COND_EXEC_TEST (PATTERN (insn));
4206 else
4207 cond = NULL_RTX;
4209 if (subtargets || code == SET
4210 || (REG_P (target) && REG_P (source)
4211 && REGNO (target) != REGNO (source)))
4213 /* After arm_reorg has been called, we can't fix up expensive
4214 constants by pushing them into memory so we must synthesize
4215 them in-line, regardless of the cost. This is only likely to
4216 be more costly on chips that have load delay slots and we are
4217 compiling without running the scheduler (so no splitting
4218 occurred before the final instruction emission).
4220 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4222 if (!cfun->machine->after_arm_reorg
4223 && !cond
4224 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4225 1, 0)
4226 > (arm_constant_limit (optimize_function_for_size_p (cfun))
4227 + (code != SET))))
4229 if (code == SET)
4231 /* Currently SET is the only monadic value for CODE, all
4232 the rest are diadic. */
4233 if (TARGET_USE_MOVT)
4234 arm_emit_movpair (target, GEN_INT (val));
4235 else
4236 emit_set_insn (target, GEN_INT (val));
4238 return 1;
4240 else
4242 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4244 if (TARGET_USE_MOVT)
4245 arm_emit_movpair (temp, GEN_INT (val));
4246 else
4247 emit_set_insn (temp, GEN_INT (val));
4249 /* For MINUS, the value is subtracted from, since we never
4250 have subtraction of a constant. */
4251 if (code == MINUS)
4252 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4253 else
4254 emit_set_insn (target,
4255 gen_rtx_fmt_ee (code, mode, source, temp));
4256 return 2;
4261 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4265 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4266 ARM/THUMB2 immediates, and add up to VAL.
4267 Thr function return value gives the number of insns required. */
4268 static int
4269 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4270 struct four_ints *return_sequence)
4272 int best_consecutive_zeros = 0;
4273 int i;
4274 int best_start = 0;
4275 int insns1, insns2;
4276 struct four_ints tmp_sequence;
4278 /* If we aren't targeting ARM, the best place to start is always at
4279 the bottom, otherwise look more closely. */
4280 if (TARGET_ARM)
4282 for (i = 0; i < 32; i += 2)
4284 int consecutive_zeros = 0;
4286 if (!(val & (3 << i)))
4288 while ((i < 32) && !(val & (3 << i)))
4290 consecutive_zeros += 2;
4291 i += 2;
4293 if (consecutive_zeros > best_consecutive_zeros)
4295 best_consecutive_zeros = consecutive_zeros;
4296 best_start = i - consecutive_zeros;
4298 i -= 2;
4303 /* So long as it won't require any more insns to do so, it's
4304 desirable to emit a small constant (in bits 0...9) in the last
4305 insn. This way there is more chance that it can be combined with
4306 a later addressing insn to form a pre-indexed load or store
4307 operation. Consider:
4309 *((volatile int *)0xe0000100) = 1;
4310 *((volatile int *)0xe0000110) = 2;
4312 We want this to wind up as:
4314 mov rA, #0xe0000000
4315 mov rB, #1
4316 str rB, [rA, #0x100]
4317 mov rB, #2
4318 str rB, [rA, #0x110]
4320 rather than having to synthesize both large constants from scratch.
4322 Therefore, we calculate how many insns would be required to emit
4323 the constant starting from `best_start', and also starting from
4324 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4325 yield a shorter sequence, we may as well use zero. */
4326 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4327 if (best_start != 0
4328 && ((HOST_WIDE_INT_1U << best_start) < val))
4330 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4331 if (insns2 <= insns1)
4333 *return_sequence = tmp_sequence;
4334 insns1 = insns2;
4338 return insns1;
4341 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4342 static int
4343 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4344 struct four_ints *return_sequence, int i)
4346 int remainder = val & 0xffffffff;
4347 int insns = 0;
4349 /* Try and find a way of doing the job in either two or three
4350 instructions.
4352 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4353 location. We start at position I. This may be the MSB, or
4354 optimial_immediate_sequence may have positioned it at the largest block
4355 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4356 wrapping around to the top of the word when we drop off the bottom.
4357 In the worst case this code should produce no more than four insns.
4359 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4360 constants, shifted to any arbitrary location. We should always start
4361 at the MSB. */
4364 int end;
4365 unsigned int b1, b2, b3, b4;
4366 unsigned HOST_WIDE_INT result;
4367 int loc;
4369 gcc_assert (insns < 4);
4371 if (i <= 0)
4372 i += 32;
4374 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4375 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4377 loc = i;
4378 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4379 /* We can use addw/subw for the last 12 bits. */
4380 result = remainder;
4381 else
4383 /* Use an 8-bit shifted/rotated immediate. */
4384 end = i - 8;
4385 if (end < 0)
4386 end += 32;
4387 result = remainder & ((0x0ff << end)
4388 | ((i < end) ? (0xff >> (32 - end))
4389 : 0));
4390 i -= 8;
4393 else
4395 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4396 arbitrary shifts. */
4397 i -= TARGET_ARM ? 2 : 1;
4398 continue;
4401 /* Next, see if we can do a better job with a thumb2 replicated
4402 constant.
4404 We do it this way around to catch the cases like 0x01F001E0 where
4405 two 8-bit immediates would work, but a replicated constant would
4406 make it worse.
4408 TODO: 16-bit constants that don't clear all the bits, but still win.
4409 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4410 if (TARGET_THUMB2)
4412 b1 = (remainder & 0xff000000) >> 24;
4413 b2 = (remainder & 0x00ff0000) >> 16;
4414 b3 = (remainder & 0x0000ff00) >> 8;
4415 b4 = remainder & 0xff;
4417 if (loc > 24)
4419 /* The 8-bit immediate already found clears b1 (and maybe b2),
4420 but must leave b3 and b4 alone. */
4422 /* First try to find a 32-bit replicated constant that clears
4423 almost everything. We can assume that we can't do it in one,
4424 or else we wouldn't be here. */
4425 unsigned int tmp = b1 & b2 & b3 & b4;
4426 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4427 + (tmp << 24);
4428 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4429 + (tmp == b3) + (tmp == b4);
4430 if (tmp
4431 && (matching_bytes >= 3
4432 || (matching_bytes == 2
4433 && const_ok_for_op (remainder & ~tmp2, code))))
4435 /* At least 3 of the bytes match, and the fourth has at
4436 least as many bits set, or two of the bytes match
4437 and it will only require one more insn to finish. */
4438 result = tmp2;
4439 i = tmp != b1 ? 32
4440 : tmp != b2 ? 24
4441 : tmp != b3 ? 16
4442 : 8;
4445 /* Second, try to find a 16-bit replicated constant that can
4446 leave three of the bytes clear. If b2 or b4 is already
4447 zero, then we can. If the 8-bit from above would not
4448 clear b2 anyway, then we still win. */
4449 else if (b1 == b3 && (!b2 || !b4
4450 || (remainder & 0x00ff0000 & ~result)))
4452 result = remainder & 0xff00ff00;
4453 i = 24;
4456 else if (loc > 16)
4458 /* The 8-bit immediate already found clears b2 (and maybe b3)
4459 and we don't get here unless b1 is alredy clear, but it will
4460 leave b4 unchanged. */
4462 /* If we can clear b2 and b4 at once, then we win, since the
4463 8-bits couldn't possibly reach that far. */
4464 if (b2 == b4)
4466 result = remainder & 0x00ff00ff;
4467 i = 16;
4472 return_sequence->i[insns++] = result;
4473 remainder &= ~result;
4475 if (code == SET || code == MINUS)
4476 code = PLUS;
4478 while (remainder);
4480 return insns;
4483 /* Emit an instruction with the indicated PATTERN. If COND is
4484 non-NULL, conditionalize the execution of the instruction on COND
4485 being true. */
4487 static void
4488 emit_constant_insn (rtx cond, rtx pattern)
4490 if (cond)
4491 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4492 emit_insn (pattern);
4495 /* As above, but extra parameter GENERATE which, if clear, suppresses
4496 RTL generation. */
4498 static int
4499 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4500 unsigned HOST_WIDE_INT val, rtx target, rtx source,
4501 int subtargets, int generate)
4503 int can_invert = 0;
4504 int can_negate = 0;
4505 int final_invert = 0;
4506 int i;
4507 int set_sign_bit_copies = 0;
4508 int clear_sign_bit_copies = 0;
4509 int clear_zero_bit_copies = 0;
4510 int set_zero_bit_copies = 0;
4511 int insns = 0, neg_insns, inv_insns;
4512 unsigned HOST_WIDE_INT temp1, temp2;
4513 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4514 struct four_ints *immediates;
4515 struct four_ints pos_immediates, neg_immediates, inv_immediates;
4517 /* Find out which operations are safe for a given CODE. Also do a quick
4518 check for degenerate cases; these can occur when DImode operations
4519 are split. */
4520 switch (code)
4522 case SET:
4523 can_invert = 1;
4524 break;
4526 case PLUS:
4527 can_negate = 1;
4528 break;
4530 case IOR:
4531 if (remainder == 0xffffffff)
4533 if (generate)
4534 emit_constant_insn (cond,
4535 gen_rtx_SET (target,
4536 GEN_INT (ARM_SIGN_EXTEND (val))));
4537 return 1;
4540 if (remainder == 0)
4542 if (reload_completed && rtx_equal_p (target, source))
4543 return 0;
4545 if (generate)
4546 emit_constant_insn (cond, gen_rtx_SET (target, source));
4547 return 1;
4549 break;
4551 case AND:
4552 if (remainder == 0)
4554 if (generate)
4555 emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
4556 return 1;
4558 if (remainder == 0xffffffff)
4560 if (reload_completed && rtx_equal_p (target, source))
4561 return 0;
4562 if (generate)
4563 emit_constant_insn (cond, gen_rtx_SET (target, source));
4564 return 1;
4566 can_invert = 1;
4567 break;
4569 case XOR:
4570 if (remainder == 0)
4572 if (reload_completed && rtx_equal_p (target, source))
4573 return 0;
4574 if (generate)
4575 emit_constant_insn (cond, gen_rtx_SET (target, source));
4576 return 1;
4579 if (remainder == 0xffffffff)
4581 if (generate)
4582 emit_constant_insn (cond,
4583 gen_rtx_SET (target,
4584 gen_rtx_NOT (mode, source)));
4585 return 1;
4587 final_invert = 1;
4588 break;
4590 case MINUS:
4591 /* We treat MINUS as (val - source), since (source - val) is always
4592 passed as (source + (-val)). */
4593 if (remainder == 0)
4595 if (generate)
4596 emit_constant_insn (cond,
4597 gen_rtx_SET (target,
4598 gen_rtx_NEG (mode, source)));
4599 return 1;
4601 if (const_ok_for_arm (val))
4603 if (generate)
4604 emit_constant_insn (cond,
4605 gen_rtx_SET (target,
4606 gen_rtx_MINUS (mode, GEN_INT (val),
4607 source)));
4608 return 1;
4611 break;
4613 default:
4614 gcc_unreachable ();
4617 /* If we can do it in one insn get out quickly. */
4618 if (const_ok_for_op (val, code))
4620 if (generate)
4621 emit_constant_insn (cond,
4622 gen_rtx_SET (target,
4623 (source
4624 ? gen_rtx_fmt_ee (code, mode, source,
4625 GEN_INT (val))
4626 : GEN_INT (val))));
4627 return 1;
4630 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4631 insn. */
4632 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4633 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4635 if (generate)
4637 if (mode == SImode && i == 16)
4638 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4639 smaller insn. */
4640 emit_constant_insn (cond,
4641 gen_zero_extendhisi2
4642 (target, gen_lowpart (HImode, source)));
4643 else
4644 /* Extz only supports SImode, but we can coerce the operands
4645 into that mode. */
4646 emit_constant_insn (cond,
4647 gen_extzv_t2 (gen_lowpart (SImode, target),
4648 gen_lowpart (SImode, source),
4649 GEN_INT (i), const0_rtx));
4652 return 1;
4655 /* Calculate a few attributes that may be useful for specific
4656 optimizations. */
4657 /* Count number of leading zeros. */
4658 for (i = 31; i >= 0; i--)
4660 if ((remainder & (1 << i)) == 0)
4661 clear_sign_bit_copies++;
4662 else
4663 break;
4666 /* Count number of leading 1's. */
4667 for (i = 31; i >= 0; i--)
4669 if ((remainder & (1 << i)) != 0)
4670 set_sign_bit_copies++;
4671 else
4672 break;
4675 /* Count number of trailing zero's. */
4676 for (i = 0; i <= 31; i++)
4678 if ((remainder & (1 << i)) == 0)
4679 clear_zero_bit_copies++;
4680 else
4681 break;
4684 /* Count number of trailing 1's. */
4685 for (i = 0; i <= 31; i++)
4687 if ((remainder & (1 << i)) != 0)
4688 set_zero_bit_copies++;
4689 else
4690 break;
4693 switch (code)
4695 case SET:
4696 /* See if we can do this by sign_extending a constant that is known
4697 to be negative. This is a good, way of doing it, since the shift
4698 may well merge into a subsequent insn. */
4699 if (set_sign_bit_copies > 1)
4701 if (const_ok_for_arm
4702 (temp1 = ARM_SIGN_EXTEND (remainder
4703 << (set_sign_bit_copies - 1))))
4705 if (generate)
4707 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4708 emit_constant_insn (cond,
4709 gen_rtx_SET (new_src, GEN_INT (temp1)));
4710 emit_constant_insn (cond,
4711 gen_ashrsi3 (target, new_src,
4712 GEN_INT (set_sign_bit_copies - 1)));
4714 return 2;
4716 /* For an inverted constant, we will need to set the low bits,
4717 these will be shifted out of harm's way. */
4718 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4719 if (const_ok_for_arm (~temp1))
4721 if (generate)
4723 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4724 emit_constant_insn (cond,
4725 gen_rtx_SET (new_src, GEN_INT (temp1)));
4726 emit_constant_insn (cond,
4727 gen_ashrsi3 (target, new_src,
4728 GEN_INT (set_sign_bit_copies - 1)));
4730 return 2;
4734 /* See if we can calculate the value as the difference between two
4735 valid immediates. */
4736 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4738 int topshift = clear_sign_bit_copies & ~1;
4740 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4741 & (0xff000000 >> topshift));
4743 /* If temp1 is zero, then that means the 9 most significant
4744 bits of remainder were 1 and we've caused it to overflow.
4745 When topshift is 0 we don't need to do anything since we
4746 can borrow from 'bit 32'. */
4747 if (temp1 == 0 && topshift != 0)
4748 temp1 = 0x80000000 >> (topshift - 1);
4750 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4752 if (const_ok_for_arm (temp2))
4754 if (generate)
4756 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4757 emit_constant_insn (cond,
4758 gen_rtx_SET (new_src, GEN_INT (temp1)));
4759 emit_constant_insn (cond,
4760 gen_addsi3 (target, new_src,
4761 GEN_INT (-temp2)));
4764 return 2;
4768 /* See if we can generate this by setting the bottom (or the top)
4769 16 bits, and then shifting these into the other half of the
4770 word. We only look for the simplest cases, to do more would cost
4771 too much. Be careful, however, not to generate this when the
4772 alternative would take fewer insns. */
4773 if (val & 0xffff0000)
4775 temp1 = remainder & 0xffff0000;
4776 temp2 = remainder & 0x0000ffff;
4778 /* Overlaps outside this range are best done using other methods. */
4779 for (i = 9; i < 24; i++)
4781 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4782 && !const_ok_for_arm (temp2))
4784 rtx new_src = (subtargets
4785 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4786 : target);
4787 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4788 source, subtargets, generate);
4789 source = new_src;
4790 if (generate)
4791 emit_constant_insn
4792 (cond,
4793 gen_rtx_SET
4794 (target,
4795 gen_rtx_IOR (mode,
4796 gen_rtx_ASHIFT (mode, source,
4797 GEN_INT (i)),
4798 source)));
4799 return insns + 1;
4803 /* Don't duplicate cases already considered. */
4804 for (i = 17; i < 24; i++)
4806 if (((temp1 | (temp1 >> i)) == remainder)
4807 && !const_ok_for_arm (temp1))
4809 rtx new_src = (subtargets
4810 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4811 : target);
4812 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4813 source, subtargets, generate);
4814 source = new_src;
4815 if (generate)
4816 emit_constant_insn
4817 (cond,
4818 gen_rtx_SET (target,
4819 gen_rtx_IOR
4820 (mode,
4821 gen_rtx_LSHIFTRT (mode, source,
4822 GEN_INT (i)),
4823 source)));
4824 return insns + 1;
4828 break;
4830 case IOR:
4831 case XOR:
4832 /* If we have IOR or XOR, and the constant can be loaded in a
4833 single instruction, and we can find a temporary to put it in,
4834 then this can be done in two instructions instead of 3-4. */
4835 if (subtargets
4836 /* TARGET can't be NULL if SUBTARGETS is 0 */
4837 || (reload_completed && !reg_mentioned_p (target, source)))
4839 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4841 if (generate)
4843 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4845 emit_constant_insn (cond,
4846 gen_rtx_SET (sub, GEN_INT (val)));
4847 emit_constant_insn (cond,
4848 gen_rtx_SET (target,
4849 gen_rtx_fmt_ee (code, mode,
4850 source, sub)));
4852 return 2;
4856 if (code == XOR)
4857 break;
4859 /* Convert.
4860 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4861 and the remainder 0s for e.g. 0xfff00000)
4862 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4864 This can be done in 2 instructions by using shifts with mov or mvn.
4865 e.g. for
4866 x = x | 0xfff00000;
4867 we generate.
4868 mvn r0, r0, asl #12
4869 mvn r0, r0, lsr #12 */
4870 if (set_sign_bit_copies > 8
4871 && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
4873 if (generate)
4875 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4876 rtx shift = GEN_INT (set_sign_bit_copies);
4878 emit_constant_insn
4879 (cond,
4880 gen_rtx_SET (sub,
4881 gen_rtx_NOT (mode,
4882 gen_rtx_ASHIFT (mode,
4883 source,
4884 shift))));
4885 emit_constant_insn
4886 (cond,
4887 gen_rtx_SET (target,
4888 gen_rtx_NOT (mode,
4889 gen_rtx_LSHIFTRT (mode, sub,
4890 shift))));
4892 return 2;
4895 /* Convert
4896 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4898 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4900 For eg. r0 = r0 | 0xfff
4901 mvn r0, r0, lsr #12
4902 mvn r0, r0, asl #12
4905 if (set_zero_bit_copies > 8
4906 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4908 if (generate)
4910 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4911 rtx shift = GEN_INT (set_zero_bit_copies);
4913 emit_constant_insn
4914 (cond,
4915 gen_rtx_SET (sub,
4916 gen_rtx_NOT (mode,
4917 gen_rtx_LSHIFTRT (mode,
4918 source,
4919 shift))));
4920 emit_constant_insn
4921 (cond,
4922 gen_rtx_SET (target,
4923 gen_rtx_NOT (mode,
4924 gen_rtx_ASHIFT (mode, sub,
4925 shift))));
4927 return 2;
4930 /* This will never be reached for Thumb2 because orn is a valid
4931 instruction. This is for Thumb1 and the ARM 32 bit cases.
4933 x = y | constant (such that ~constant is a valid constant)
4934 Transform this to
4935 x = ~(~y & ~constant).
4937 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4939 if (generate)
4941 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4942 emit_constant_insn (cond,
4943 gen_rtx_SET (sub,
4944 gen_rtx_NOT (mode, source)));
4945 source = sub;
4946 if (subtargets)
4947 sub = gen_reg_rtx (mode);
4948 emit_constant_insn (cond,
4949 gen_rtx_SET (sub,
4950 gen_rtx_AND (mode, source,
4951 GEN_INT (temp1))));
4952 emit_constant_insn (cond,
4953 gen_rtx_SET (target,
4954 gen_rtx_NOT (mode, sub)));
4956 return 3;
4958 break;
4960 case AND:
4961 /* See if two shifts will do 2 or more insn's worth of work. */
4962 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
4964 HOST_WIDE_INT shift_mask = ((0xffffffff
4965 << (32 - clear_sign_bit_copies))
4966 & 0xffffffff);
4968 if ((remainder | shift_mask) != 0xffffffff)
4970 HOST_WIDE_INT new_val
4971 = ARM_SIGN_EXTEND (remainder | shift_mask);
4973 if (generate)
4975 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4976 insns = arm_gen_constant (AND, SImode, cond, new_val,
4977 new_src, source, subtargets, 1);
4978 source = new_src;
4980 else
4982 rtx targ = subtargets ? NULL_RTX : target;
4983 insns = arm_gen_constant (AND, mode, cond, new_val,
4984 targ, source, subtargets, 0);
4988 if (generate)
4990 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4991 rtx shift = GEN_INT (clear_sign_bit_copies);
4993 emit_insn (gen_ashlsi3 (new_src, source, shift));
4994 emit_insn (gen_lshrsi3 (target, new_src, shift));
4997 return insns + 2;
5000 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
5002 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
5004 if ((remainder | shift_mask) != 0xffffffff)
5006 HOST_WIDE_INT new_val
5007 = ARM_SIGN_EXTEND (remainder | shift_mask);
5008 if (generate)
5010 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5012 insns = arm_gen_constant (AND, mode, cond, new_val,
5013 new_src, source, subtargets, 1);
5014 source = new_src;
5016 else
5018 rtx targ = subtargets ? NULL_RTX : target;
5020 insns = arm_gen_constant (AND, mode, cond, new_val,
5021 targ, source, subtargets, 0);
5025 if (generate)
5027 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5028 rtx shift = GEN_INT (clear_zero_bit_copies);
5030 emit_insn (gen_lshrsi3 (new_src, source, shift));
5031 emit_insn (gen_ashlsi3 (target, new_src, shift));
5034 return insns + 2;
5037 break;
5039 default:
5040 break;
5043 /* Calculate what the instruction sequences would be if we generated it
5044 normally, negated, or inverted. */
5045 if (code == AND)
5046 /* AND cannot be split into multiple insns, so invert and use BIC. */
5047 insns = 99;
5048 else
5049 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
5051 if (can_negate)
5052 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
5053 &neg_immediates);
5054 else
5055 neg_insns = 99;
5057 if (can_invert || final_invert)
5058 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
5059 &inv_immediates);
5060 else
5061 inv_insns = 99;
5063 immediates = &pos_immediates;
5065 /* Is the negated immediate sequence more efficient? */
5066 if (neg_insns < insns && neg_insns <= inv_insns)
5068 insns = neg_insns;
5069 immediates = &neg_immediates;
5071 else
5072 can_negate = 0;
5074 /* Is the inverted immediate sequence more efficient?
5075 We must allow for an extra NOT instruction for XOR operations, although
5076 there is some chance that the final 'mvn' will get optimized later. */
5077 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
5079 insns = inv_insns;
5080 immediates = &inv_immediates;
5082 else
5084 can_invert = 0;
5085 final_invert = 0;
5088 /* Now output the chosen sequence as instructions. */
5089 if (generate)
5091 for (i = 0; i < insns; i++)
5093 rtx new_src, temp1_rtx;
5095 temp1 = immediates->i[i];
5097 if (code == SET || code == MINUS)
5098 new_src = (subtargets ? gen_reg_rtx (mode) : target);
5099 else if ((final_invert || i < (insns - 1)) && subtargets)
5100 new_src = gen_reg_rtx (mode);
5101 else
5102 new_src = target;
5104 if (can_invert)
5105 temp1 = ~temp1;
5106 else if (can_negate)
5107 temp1 = -temp1;
5109 temp1 = trunc_int_for_mode (temp1, mode);
5110 temp1_rtx = GEN_INT (temp1);
5112 if (code == SET)
5114 else if (code == MINUS)
5115 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
5116 else
5117 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
5119 emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
5120 source = new_src;
5122 if (code == SET)
5124 can_negate = can_invert;
5125 can_invert = 0;
5126 code = PLUS;
5128 else if (code == MINUS)
5129 code = PLUS;
5133 if (final_invert)
5135 if (generate)
5136 emit_constant_insn (cond, gen_rtx_SET (target,
5137 gen_rtx_NOT (mode, source)));
5138 insns++;
5141 return insns;
5144 /* Canonicalize a comparison so that we are more likely to recognize it.
5145 This can be done for a few constant compares, where we can make the
5146 immediate value easier to load. */
5148 static void
5149 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5150 bool op0_preserve_value)
5152 machine_mode mode;
5153 unsigned HOST_WIDE_INT i, maxval;
5155 mode = GET_MODE (*op0);
5156 if (mode == VOIDmode)
5157 mode = GET_MODE (*op1);
5159 maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5161 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
5162 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
5163 reversed or (for constant OP1) adjusted to GE/LT. Similarly
5164 for GTU/LEU in Thumb mode. */
5165 if (mode == DImode)
5168 if (*code == GT || *code == LE
5169 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
5171 /* Missing comparison. First try to use an available
5172 comparison. */
5173 if (CONST_INT_P (*op1))
5175 i = INTVAL (*op1);
5176 switch (*code)
5178 case GT:
5179 case LE:
5180 if (i != maxval
5181 && arm_const_double_by_immediates (GEN_INT (i + 1)))
5183 *op1 = GEN_INT (i + 1);
5184 *code = *code == GT ? GE : LT;
5185 return;
5187 break;
5188 case GTU:
5189 case LEU:
5190 if (i != ~((unsigned HOST_WIDE_INT) 0)
5191 && arm_const_double_by_immediates (GEN_INT (i + 1)))
5193 *op1 = GEN_INT (i + 1);
5194 *code = *code == GTU ? GEU : LTU;
5195 return;
5197 break;
5198 default:
5199 gcc_unreachable ();
5203 /* If that did not work, reverse the condition. */
5204 if (!op0_preserve_value)
5206 std::swap (*op0, *op1);
5207 *code = (int)swap_condition ((enum rtx_code)*code);
5210 return;
5213 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5214 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5215 to facilitate possible combining with a cmp into 'ands'. */
5216 if (mode == SImode
5217 && GET_CODE (*op0) == ZERO_EXTEND
5218 && GET_CODE (XEXP (*op0, 0)) == SUBREG
5219 && GET_MODE (XEXP (*op0, 0)) == QImode
5220 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5221 && subreg_lowpart_p (XEXP (*op0, 0))
5222 && *op1 == const0_rtx)
5223 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5224 GEN_INT (255));
5226 /* Comparisons smaller than DImode. Only adjust comparisons against
5227 an out-of-range constant. */
5228 if (!CONST_INT_P (*op1)
5229 || const_ok_for_arm (INTVAL (*op1))
5230 || const_ok_for_arm (- INTVAL (*op1)))
5231 return;
5233 i = INTVAL (*op1);
5235 switch (*code)
5237 case EQ:
5238 case NE:
5239 return;
5241 case GT:
5242 case LE:
5243 if (i != maxval
5244 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5246 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5247 *code = *code == GT ? GE : LT;
5248 return;
5250 break;
5252 case GE:
5253 case LT:
5254 if (i != ~maxval
5255 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5257 *op1 = GEN_INT (i - 1);
5258 *code = *code == GE ? GT : LE;
5259 return;
5261 break;
5263 case GTU:
5264 case LEU:
5265 if (i != ~((unsigned HOST_WIDE_INT) 0)
5266 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5268 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5269 *code = *code == GTU ? GEU : LTU;
5270 return;
5272 break;
5274 case GEU:
5275 case LTU:
5276 if (i != 0
5277 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5279 *op1 = GEN_INT (i - 1);
5280 *code = *code == GEU ? GTU : LEU;
5281 return;
5283 break;
5285 default:
5286 gcc_unreachable ();
5291 /* Define how to find the value returned by a function. */
5293 static rtx
5294 arm_function_value(const_tree type, const_tree func,
5295 bool outgoing ATTRIBUTE_UNUSED)
5297 machine_mode mode;
5298 int unsignedp ATTRIBUTE_UNUSED;
5299 rtx r ATTRIBUTE_UNUSED;
5301 mode = TYPE_MODE (type);
5303 if (TARGET_AAPCS_BASED)
5304 return aapcs_allocate_return_reg (mode, type, func);
5306 /* Promote integer types. */
5307 if (INTEGRAL_TYPE_P (type))
5308 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5310 /* Promotes small structs returned in a register to full-word size
5311 for big-endian AAPCS. */
5312 if (arm_return_in_msb (type))
5314 HOST_WIDE_INT size = int_size_in_bytes (type);
5315 if (size % UNITS_PER_WORD != 0)
5317 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5318 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5322 return arm_libcall_value_1 (mode);
5325 /* libcall hashtable helpers. */
5327 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5329 static inline hashval_t hash (const rtx_def *);
5330 static inline bool equal (const rtx_def *, const rtx_def *);
5331 static inline void remove (rtx_def *);
5334 inline bool
5335 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5337 return rtx_equal_p (p1, p2);
5340 inline hashval_t
5341 libcall_hasher::hash (const rtx_def *p1)
5343 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5346 typedef hash_table<libcall_hasher> libcall_table_type;
5348 static void
5349 add_libcall (libcall_table_type *htab, rtx libcall)
5351 *htab->find_slot (libcall, INSERT) = libcall;
5354 static bool
5355 arm_libcall_uses_aapcs_base (const_rtx libcall)
5357 static bool init_done = false;
5358 static libcall_table_type *libcall_htab = NULL;
5360 if (!init_done)
5362 init_done = true;
5364 libcall_htab = new libcall_table_type (31);
5365 add_libcall (libcall_htab,
5366 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5367 add_libcall (libcall_htab,
5368 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5369 add_libcall (libcall_htab,
5370 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5371 add_libcall (libcall_htab,
5372 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5374 add_libcall (libcall_htab,
5375 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5376 add_libcall (libcall_htab,
5377 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5378 add_libcall (libcall_htab,
5379 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5380 add_libcall (libcall_htab,
5381 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5383 add_libcall (libcall_htab,
5384 convert_optab_libfunc (sext_optab, SFmode, HFmode));
5385 add_libcall (libcall_htab,
5386 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5387 add_libcall (libcall_htab,
5388 convert_optab_libfunc (sfix_optab, SImode, DFmode));
5389 add_libcall (libcall_htab,
5390 convert_optab_libfunc (ufix_optab, SImode, DFmode));
5391 add_libcall (libcall_htab,
5392 convert_optab_libfunc (sfix_optab, DImode, DFmode));
5393 add_libcall (libcall_htab,
5394 convert_optab_libfunc (ufix_optab, DImode, DFmode));
5395 add_libcall (libcall_htab,
5396 convert_optab_libfunc (sfix_optab, DImode, SFmode));
5397 add_libcall (libcall_htab,
5398 convert_optab_libfunc (ufix_optab, DImode, SFmode));
5400 /* Values from double-precision helper functions are returned in core
5401 registers if the selected core only supports single-precision
5402 arithmetic, even if we are using the hard-float ABI. The same is
5403 true for single-precision helpers, but we will never be using the
5404 hard-float ABI on a CPU which doesn't support single-precision
5405 operations in hardware. */
5406 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5407 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5408 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5409 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5410 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5411 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5412 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5413 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5414 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5415 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5416 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5417 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5418 SFmode));
5419 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5420 DFmode));
5421 add_libcall (libcall_htab,
5422 convert_optab_libfunc (trunc_optab, HFmode, DFmode));
5425 return libcall && libcall_htab->find (libcall) != NULL;
5428 static rtx
5429 arm_libcall_value_1 (machine_mode mode)
5431 if (TARGET_AAPCS_BASED)
5432 return aapcs_libcall_value (mode);
5433 else if (TARGET_IWMMXT_ABI
5434 && arm_vector_mode_supported_p (mode))
5435 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5436 else
5437 return gen_rtx_REG (mode, ARG_REGISTER (1));
5440 /* Define how to find the value returned by a library function
5441 assuming the value has mode MODE. */
5443 static rtx
5444 arm_libcall_value (machine_mode mode, const_rtx libcall)
5446 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5447 && GET_MODE_CLASS (mode) == MODE_FLOAT)
5449 /* The following libcalls return their result in integer registers,
5450 even though they return a floating point value. */
5451 if (arm_libcall_uses_aapcs_base (libcall))
5452 return gen_rtx_REG (mode, ARG_REGISTER(1));
5456 return arm_libcall_value_1 (mode);
5459 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5461 static bool
5462 arm_function_value_regno_p (const unsigned int regno)
5464 if (regno == ARG_REGISTER (1)
5465 || (TARGET_32BIT
5466 && TARGET_AAPCS_BASED
5467 && TARGET_HARD_FLOAT
5468 && regno == FIRST_VFP_REGNUM)
5469 || (TARGET_IWMMXT_ABI
5470 && regno == FIRST_IWMMXT_REGNUM))
5471 return true;
5473 return false;
5476 /* Determine the amount of memory needed to store the possible return
5477 registers of an untyped call. */
5479 arm_apply_result_size (void)
5481 int size = 16;
5483 if (TARGET_32BIT)
5485 if (TARGET_HARD_FLOAT_ABI)
5486 size += 32;
5487 if (TARGET_IWMMXT_ABI)
5488 size += 8;
5491 return size;
5494 /* Decide whether TYPE should be returned in memory (true)
5495 or in a register (false). FNTYPE is the type of the function making
5496 the call. */
5497 static bool
5498 arm_return_in_memory (const_tree type, const_tree fntype)
5500 HOST_WIDE_INT size;
5502 size = int_size_in_bytes (type); /* Negative if not fixed size. */
5504 if (TARGET_AAPCS_BASED)
5506 /* Simple, non-aggregate types (ie not including vectors and
5507 complex) are always returned in a register (or registers).
5508 We don't care about which register here, so we can short-cut
5509 some of the detail. */
5510 if (!AGGREGATE_TYPE_P (type)
5511 && TREE_CODE (type) != VECTOR_TYPE
5512 && TREE_CODE (type) != COMPLEX_TYPE)
5513 return false;
5515 /* Any return value that is no larger than one word can be
5516 returned in r0. */
5517 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5518 return false;
5520 /* Check any available co-processors to see if they accept the
5521 type as a register candidate (VFP, for example, can return
5522 some aggregates in consecutive registers). These aren't
5523 available if the call is variadic. */
5524 if (aapcs_select_return_coproc (type, fntype) >= 0)
5525 return false;
5527 /* Vector values should be returned using ARM registers, not
5528 memory (unless they're over 16 bytes, which will break since
5529 we only have four call-clobbered registers to play with). */
5530 if (TREE_CODE (type) == VECTOR_TYPE)
5531 return (size < 0 || size > (4 * UNITS_PER_WORD));
5533 /* The rest go in memory. */
5534 return true;
5537 if (TREE_CODE (type) == VECTOR_TYPE)
5538 return (size < 0 || size > (4 * UNITS_PER_WORD));
5540 if (!AGGREGATE_TYPE_P (type) &&
5541 (TREE_CODE (type) != VECTOR_TYPE))
5542 /* All simple types are returned in registers. */
5543 return false;
5545 if (arm_abi != ARM_ABI_APCS)
5547 /* ATPCS and later return aggregate types in memory only if they are
5548 larger than a word (or are variable size). */
5549 return (size < 0 || size > UNITS_PER_WORD);
5552 /* For the arm-wince targets we choose to be compatible with Microsoft's
5553 ARM and Thumb compilers, which always return aggregates in memory. */
5554 #ifndef ARM_WINCE
5555 /* All structures/unions bigger than one word are returned in memory.
5556 Also catch the case where int_size_in_bytes returns -1. In this case
5557 the aggregate is either huge or of variable size, and in either case
5558 we will want to return it via memory and not in a register. */
5559 if (size < 0 || size > UNITS_PER_WORD)
5560 return true;
5562 if (TREE_CODE (type) == RECORD_TYPE)
5564 tree field;
5566 /* For a struct the APCS says that we only return in a register
5567 if the type is 'integer like' and every addressable element
5568 has an offset of zero. For practical purposes this means
5569 that the structure can have at most one non bit-field element
5570 and that this element must be the first one in the structure. */
5572 /* Find the first field, ignoring non FIELD_DECL things which will
5573 have been created by C++. */
5574 for (field = TYPE_FIELDS (type);
5575 field && TREE_CODE (field) != FIELD_DECL;
5576 field = DECL_CHAIN (field))
5577 continue;
5579 if (field == NULL)
5580 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5582 /* Check that the first field is valid for returning in a register. */
5584 /* ... Floats are not allowed */
5585 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5586 return true;
5588 /* ... Aggregates that are not themselves valid for returning in
5589 a register are not allowed. */
5590 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5591 return true;
5593 /* Now check the remaining fields, if any. Only bitfields are allowed,
5594 since they are not addressable. */
5595 for (field = DECL_CHAIN (field);
5596 field;
5597 field = DECL_CHAIN (field))
5599 if (TREE_CODE (field) != FIELD_DECL)
5600 continue;
5602 if (!DECL_BIT_FIELD_TYPE (field))
5603 return true;
5606 return false;
5609 if (TREE_CODE (type) == UNION_TYPE)
5611 tree field;
5613 /* Unions can be returned in registers if every element is
5614 integral, or can be returned in an integer register. */
5615 for (field = TYPE_FIELDS (type);
5616 field;
5617 field = DECL_CHAIN (field))
5619 if (TREE_CODE (field) != FIELD_DECL)
5620 continue;
5622 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5623 return true;
5625 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5626 return true;
5629 return false;
5631 #endif /* not ARM_WINCE */
5633 /* Return all other types in memory. */
5634 return true;
5637 const struct pcs_attribute_arg
5639 const char *arg;
5640 enum arm_pcs value;
5641 } pcs_attribute_args[] =
5643 {"aapcs", ARM_PCS_AAPCS},
5644 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5645 #if 0
5646 /* We could recognize these, but changes would be needed elsewhere
5647 * to implement them. */
5648 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5649 {"atpcs", ARM_PCS_ATPCS},
5650 {"apcs", ARM_PCS_APCS},
5651 #endif
5652 {NULL, ARM_PCS_UNKNOWN}
5655 static enum arm_pcs
5656 arm_pcs_from_attribute (tree attr)
5658 const struct pcs_attribute_arg *ptr;
5659 const char *arg;
5661 /* Get the value of the argument. */
5662 if (TREE_VALUE (attr) == NULL_TREE
5663 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5664 return ARM_PCS_UNKNOWN;
5666 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5668 /* Check it against the list of known arguments. */
5669 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5670 if (streq (arg, ptr->arg))
5671 return ptr->value;
5673 /* An unrecognized interrupt type. */
5674 return ARM_PCS_UNKNOWN;
5677 /* Get the PCS variant to use for this call. TYPE is the function's type
5678 specification, DECL is the specific declartion. DECL may be null if
5679 the call could be indirect or if this is a library call. */
5680 static enum arm_pcs
5681 arm_get_pcs_model (const_tree type, const_tree decl)
5683 bool user_convention = false;
5684 enum arm_pcs user_pcs = arm_pcs_default;
5685 tree attr;
5687 gcc_assert (type);
5689 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5690 if (attr)
5692 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5693 user_convention = true;
5696 if (TARGET_AAPCS_BASED)
5698 /* Detect varargs functions. These always use the base rules
5699 (no argument is ever a candidate for a co-processor
5700 register). */
5701 bool base_rules = stdarg_p (type);
5703 if (user_convention)
5705 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5706 sorry ("non-AAPCS derived PCS variant");
5707 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5708 error ("variadic functions must use the base AAPCS variant");
5711 if (base_rules)
5712 return ARM_PCS_AAPCS;
5713 else if (user_convention)
5714 return user_pcs;
5715 else if (decl && flag_unit_at_a_time)
5717 /* Local functions never leak outside this compilation unit,
5718 so we are free to use whatever conventions are
5719 appropriate. */
5720 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5721 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5722 if (i && i->local)
5723 return ARM_PCS_AAPCS_LOCAL;
5726 else if (user_convention && user_pcs != arm_pcs_default)
5727 sorry ("PCS variant");
5729 /* For everything else we use the target's default. */
5730 return arm_pcs_default;
5734 static void
5735 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5736 const_tree fntype ATTRIBUTE_UNUSED,
5737 rtx libcall ATTRIBUTE_UNUSED,
5738 const_tree fndecl ATTRIBUTE_UNUSED)
5740 /* Record the unallocated VFP registers. */
5741 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5742 pcum->aapcs_vfp_reg_alloc = 0;
5745 /* Walk down the type tree of TYPE counting consecutive base elements.
5746 If *MODEP is VOIDmode, then set it to the first valid floating point
5747 type. If a non-floating point type is found, or if a floating point
5748 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5749 otherwise return the count in the sub-tree. */
5750 static int
5751 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5753 machine_mode mode;
5754 HOST_WIDE_INT size;
5756 switch (TREE_CODE (type))
5758 case REAL_TYPE:
5759 mode = TYPE_MODE (type);
5760 if (mode != DFmode && mode != SFmode && mode != HFmode)
5761 return -1;
5763 if (*modep == VOIDmode)
5764 *modep = mode;
5766 if (*modep == mode)
5767 return 1;
5769 break;
5771 case COMPLEX_TYPE:
5772 mode = TYPE_MODE (TREE_TYPE (type));
5773 if (mode != DFmode && mode != SFmode)
5774 return -1;
5776 if (*modep == VOIDmode)
5777 *modep = mode;
5779 if (*modep == mode)
5780 return 2;
5782 break;
5784 case VECTOR_TYPE:
5785 /* Use V2SImode and V4SImode as representatives of all 64-bit
5786 and 128-bit vector types, whether or not those modes are
5787 supported with the present options. */
5788 size = int_size_in_bytes (type);
5789 switch (size)
5791 case 8:
5792 mode = V2SImode;
5793 break;
5794 case 16:
5795 mode = V4SImode;
5796 break;
5797 default:
5798 return -1;
5801 if (*modep == VOIDmode)
5802 *modep = mode;
5804 /* Vector modes are considered to be opaque: two vectors are
5805 equivalent for the purposes of being homogeneous aggregates
5806 if they are the same size. */
5807 if (*modep == mode)
5808 return 1;
5810 break;
5812 case ARRAY_TYPE:
5814 int count;
5815 tree index = TYPE_DOMAIN (type);
5817 /* Can't handle incomplete types nor sizes that are not
5818 fixed. */
5819 if (!COMPLETE_TYPE_P (type)
5820 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5821 return -1;
5823 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5824 if (count == -1
5825 || !index
5826 || !TYPE_MAX_VALUE (index)
5827 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5828 || !TYPE_MIN_VALUE (index)
5829 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5830 || count < 0)
5831 return -1;
5833 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5834 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5836 /* There must be no padding. */
5837 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5838 return -1;
5840 return count;
5843 case RECORD_TYPE:
5845 int count = 0;
5846 int sub_count;
5847 tree field;
5849 /* Can't handle incomplete types nor sizes that are not
5850 fixed. */
5851 if (!COMPLETE_TYPE_P (type)
5852 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5853 return -1;
5855 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5857 if (TREE_CODE (field) != FIELD_DECL)
5858 continue;
5860 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5861 if (sub_count < 0)
5862 return -1;
5863 count += sub_count;
5866 /* There must be no padding. */
5867 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5868 return -1;
5870 return count;
5873 case UNION_TYPE:
5874 case QUAL_UNION_TYPE:
5876 /* These aren't very interesting except in a degenerate case. */
5877 int count = 0;
5878 int sub_count;
5879 tree field;
5881 /* Can't handle incomplete types nor sizes that are not
5882 fixed. */
5883 if (!COMPLETE_TYPE_P (type)
5884 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5885 return -1;
5887 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5889 if (TREE_CODE (field) != FIELD_DECL)
5890 continue;
5892 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5893 if (sub_count < 0)
5894 return -1;
5895 count = count > sub_count ? count : sub_count;
5898 /* There must be no padding. */
5899 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5900 return -1;
5902 return count;
5905 default:
5906 break;
5909 return -1;
5912 /* Return true if PCS_VARIANT should use VFP registers. */
5913 static bool
5914 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5916 if (pcs_variant == ARM_PCS_AAPCS_VFP)
5918 static bool seen_thumb1_vfp = false;
5920 if (TARGET_THUMB1 && !seen_thumb1_vfp)
5922 sorry ("Thumb-1 hard-float VFP ABI");
5923 /* sorry() is not immediately fatal, so only display this once. */
5924 seen_thumb1_vfp = true;
5927 return true;
5930 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5931 return false;
5933 return (TARGET_32BIT && TARGET_HARD_FLOAT &&
5934 (TARGET_VFP_DOUBLE || !is_double));
5937 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5938 suitable for passing or returning in VFP registers for the PCS
5939 variant selected. If it is, then *BASE_MODE is updated to contain
5940 a machine mode describing each element of the argument's type and
5941 *COUNT to hold the number of such elements. */
5942 static bool
5943 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5944 machine_mode mode, const_tree type,
5945 machine_mode *base_mode, int *count)
5947 machine_mode new_mode = VOIDmode;
5949 /* If we have the type information, prefer that to working things
5950 out from the mode. */
5951 if (type)
5953 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5955 if (ag_count > 0 && ag_count <= 4)
5956 *count = ag_count;
5957 else
5958 return false;
5960 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
5961 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5962 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5964 *count = 1;
5965 new_mode = mode;
5967 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5969 *count = 2;
5970 new_mode = (mode == DCmode ? DFmode : SFmode);
5972 else
5973 return false;
5976 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
5977 return false;
5979 *base_mode = new_mode;
5980 return true;
5983 static bool
5984 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
5985 machine_mode mode, const_tree type)
5987 int count ATTRIBUTE_UNUSED;
5988 machine_mode ag_mode ATTRIBUTE_UNUSED;
5990 if (!use_vfp_abi (pcs_variant, false))
5991 return false;
5992 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5993 &ag_mode, &count);
5996 static bool
5997 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5998 const_tree type)
6000 if (!use_vfp_abi (pcum->pcs_variant, false))
6001 return false;
6003 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
6004 &pcum->aapcs_vfp_rmode,
6005 &pcum->aapcs_vfp_rcount);
6008 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
6009 for the behaviour of this function. */
6011 static bool
6012 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6013 const_tree type ATTRIBUTE_UNUSED)
6015 int rmode_size
6016 = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
6017 int shift = rmode_size / GET_MODE_SIZE (SFmode);
6018 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
6019 int regno;
6021 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
6022 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
6024 pcum->aapcs_vfp_reg_alloc = mask << regno;
6025 if (mode == BLKmode
6026 || (mode == TImode && ! TARGET_NEON)
6027 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
6029 int i;
6030 int rcount = pcum->aapcs_vfp_rcount;
6031 int rshift = shift;
6032 machine_mode rmode = pcum->aapcs_vfp_rmode;
6033 rtx par;
6034 if (!TARGET_NEON)
6036 /* Avoid using unsupported vector modes. */
6037 if (rmode == V2SImode)
6038 rmode = DImode;
6039 else if (rmode == V4SImode)
6041 rmode = DImode;
6042 rcount *= 2;
6043 rshift /= 2;
6046 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
6047 for (i = 0; i < rcount; i++)
6049 rtx tmp = gen_rtx_REG (rmode,
6050 FIRST_VFP_REGNUM + regno + i * rshift);
6051 tmp = gen_rtx_EXPR_LIST
6052 (VOIDmode, tmp,
6053 GEN_INT (i * GET_MODE_SIZE (rmode)));
6054 XVECEXP (par, 0, i) = tmp;
6057 pcum->aapcs_reg = par;
6059 else
6060 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
6061 return true;
6063 return false;
6066 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
6067 comment there for the behaviour of this function. */
6069 static rtx
6070 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
6071 machine_mode mode,
6072 const_tree type ATTRIBUTE_UNUSED)
6074 if (!use_vfp_abi (pcs_variant, false))
6075 return NULL;
6077 if (mode == BLKmode
6078 || (GET_MODE_CLASS (mode) == MODE_INT
6079 && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
6080 && !TARGET_NEON))
6082 int count;
6083 machine_mode ag_mode;
6084 int i;
6085 rtx par;
6086 int shift;
6088 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6089 &ag_mode, &count);
6091 if (!TARGET_NEON)
6093 if (ag_mode == V2SImode)
6094 ag_mode = DImode;
6095 else if (ag_mode == V4SImode)
6097 ag_mode = DImode;
6098 count *= 2;
6101 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
6102 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
6103 for (i = 0; i < count; i++)
6105 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
6106 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
6107 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
6108 XVECEXP (par, 0, i) = tmp;
6111 return par;
6114 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
6117 static void
6118 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
6119 machine_mode mode ATTRIBUTE_UNUSED,
6120 const_tree type ATTRIBUTE_UNUSED)
6122 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
6123 pcum->aapcs_vfp_reg_alloc = 0;
6124 return;
6127 #define AAPCS_CP(X) \
6129 aapcs_ ## X ## _cum_init, \
6130 aapcs_ ## X ## _is_call_candidate, \
6131 aapcs_ ## X ## _allocate, \
6132 aapcs_ ## X ## _is_return_candidate, \
6133 aapcs_ ## X ## _allocate_return_reg, \
6134 aapcs_ ## X ## _advance \
6137 /* Table of co-processors that can be used to pass arguments in
6138 registers. Idealy no arugment should be a candidate for more than
6139 one co-processor table entry, but the table is processed in order
6140 and stops after the first match. If that entry then fails to put
6141 the argument into a co-processor register, the argument will go on
6142 the stack. */
6143 static struct
6145 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
6146 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
6148 /* Return true if an argument of mode MODE (or type TYPE if MODE is
6149 BLKmode) is a candidate for this co-processor's registers; this
6150 function should ignore any position-dependent state in
6151 CUMULATIVE_ARGS and only use call-type dependent information. */
6152 bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6154 /* Return true if the argument does get a co-processor register; it
6155 should set aapcs_reg to an RTX of the register allocated as is
6156 required for a return from FUNCTION_ARG. */
6157 bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6159 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6160 be returned in this co-processor's registers. */
6161 bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6163 /* Allocate and return an RTX element to hold the return type of a call. This
6164 routine must not fail and will only be called if is_return_candidate
6165 returned true with the same parameters. */
6166 rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6168 /* Finish processing this argument and prepare to start processing
6169 the next one. */
6170 void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6171 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6173 AAPCS_CP(vfp)
6176 #undef AAPCS_CP
6178 static int
6179 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6180 const_tree type)
6182 int i;
6184 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6185 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6186 return i;
6188 return -1;
6191 static int
6192 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6194 /* We aren't passed a decl, so we can't check that a call is local.
6195 However, it isn't clear that that would be a win anyway, since it
6196 might limit some tail-calling opportunities. */
6197 enum arm_pcs pcs_variant;
6199 if (fntype)
6201 const_tree fndecl = NULL_TREE;
6203 if (TREE_CODE (fntype) == FUNCTION_DECL)
6205 fndecl = fntype;
6206 fntype = TREE_TYPE (fntype);
6209 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6211 else
6212 pcs_variant = arm_pcs_default;
6214 if (pcs_variant != ARM_PCS_AAPCS)
6216 int i;
6218 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6219 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6220 TYPE_MODE (type),
6221 type))
6222 return i;
6224 return -1;
6227 static rtx
6228 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6229 const_tree fntype)
6231 /* We aren't passed a decl, so we can't check that a call is local.
6232 However, it isn't clear that that would be a win anyway, since it
6233 might limit some tail-calling opportunities. */
6234 enum arm_pcs pcs_variant;
6235 int unsignedp ATTRIBUTE_UNUSED;
6237 if (fntype)
6239 const_tree fndecl = NULL_TREE;
6241 if (TREE_CODE (fntype) == FUNCTION_DECL)
6243 fndecl = fntype;
6244 fntype = TREE_TYPE (fntype);
6247 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6249 else
6250 pcs_variant = arm_pcs_default;
6252 /* Promote integer types. */
6253 if (type && INTEGRAL_TYPE_P (type))
6254 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
6256 if (pcs_variant != ARM_PCS_AAPCS)
6258 int i;
6260 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6261 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6262 type))
6263 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6264 mode, type);
6267 /* Promotes small structs returned in a register to full-word size
6268 for big-endian AAPCS. */
6269 if (type && arm_return_in_msb (type))
6271 HOST_WIDE_INT size = int_size_in_bytes (type);
6272 if (size % UNITS_PER_WORD != 0)
6274 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6275 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
6279 return gen_rtx_REG (mode, R0_REGNUM);
6282 static rtx
6283 aapcs_libcall_value (machine_mode mode)
6285 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6286 && GET_MODE_SIZE (mode) <= 4)
6287 mode = SImode;
6289 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
6292 /* Lay out a function argument using the AAPCS rules. The rule
6293 numbers referred to here are those in the AAPCS. */
6294 static void
6295 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
6296 const_tree type, bool named)
6298 int nregs, nregs2;
6299 int ncrn;
6301 /* We only need to do this once per argument. */
6302 if (pcum->aapcs_arg_processed)
6303 return;
6305 pcum->aapcs_arg_processed = true;
6307 /* Special case: if named is false then we are handling an incoming
6308 anonymous argument which is on the stack. */
6309 if (!named)
6310 return;
6312 /* Is this a potential co-processor register candidate? */
6313 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6315 int slot = aapcs_select_call_coproc (pcum, mode, type);
6316 pcum->aapcs_cprc_slot = slot;
6318 /* We don't have to apply any of the rules from part B of the
6319 preparation phase, these are handled elsewhere in the
6320 compiler. */
6322 if (slot >= 0)
6324 /* A Co-processor register candidate goes either in its own
6325 class of registers or on the stack. */
6326 if (!pcum->aapcs_cprc_failed[slot])
6328 /* C1.cp - Try to allocate the argument to co-processor
6329 registers. */
6330 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
6331 return;
6333 /* C2.cp - Put the argument on the stack and note that we
6334 can't assign any more candidates in this slot. We also
6335 need to note that we have allocated stack space, so that
6336 we won't later try to split a non-cprc candidate between
6337 core registers and the stack. */
6338 pcum->aapcs_cprc_failed[slot] = true;
6339 pcum->can_split = false;
6342 /* We didn't get a register, so this argument goes on the
6343 stack. */
6344 gcc_assert (pcum->can_split == false);
6345 return;
6349 /* C3 - For double-word aligned arguments, round the NCRN up to the
6350 next even number. */
6351 ncrn = pcum->aapcs_ncrn;
6352 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
6353 ncrn++;
6355 nregs = ARM_NUM_REGS2(mode, type);
6357 /* Sigh, this test should really assert that nregs > 0, but a GCC
6358 extension allows empty structs and then gives them empty size; it
6359 then allows such a structure to be passed by value. For some of
6360 the code below we have to pretend that such an argument has
6361 non-zero size so that we 'locate' it correctly either in
6362 registers or on the stack. */
6363 gcc_assert (nregs >= 0);
6365 nregs2 = nregs ? nregs : 1;
6367 /* C4 - Argument fits entirely in core registers. */
6368 if (ncrn + nregs2 <= NUM_ARG_REGS)
6370 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6371 pcum->aapcs_next_ncrn = ncrn + nregs;
6372 return;
6375 /* C5 - Some core registers left and there are no arguments already
6376 on the stack: split this argument between the remaining core
6377 registers and the stack. */
6378 if (ncrn < NUM_ARG_REGS && pcum->can_split)
6380 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6381 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6382 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
6383 return;
6386 /* C6 - NCRN is set to 4. */
6387 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6389 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
6390 return;
6393 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6394 for a call to a function whose data type is FNTYPE.
6395 For a library call, FNTYPE is NULL. */
6396 void
6397 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
6398 rtx libname,
6399 tree fndecl ATTRIBUTE_UNUSED)
6401 /* Long call handling. */
6402 if (fntype)
6403 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
6404 else
6405 pcum->pcs_variant = arm_pcs_default;
6407 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6409 if (arm_libcall_uses_aapcs_base (libname))
6410 pcum->pcs_variant = ARM_PCS_AAPCS;
6412 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
6413 pcum->aapcs_reg = NULL_RTX;
6414 pcum->aapcs_partial = 0;
6415 pcum->aapcs_arg_processed = false;
6416 pcum->aapcs_cprc_slot = -1;
6417 pcum->can_split = true;
6419 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6421 int i;
6423 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6425 pcum->aapcs_cprc_failed[i] = false;
6426 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
6429 return;
6432 /* Legacy ABIs */
6434 /* On the ARM, the offset starts at 0. */
6435 pcum->nregs = 0;
6436 pcum->iwmmxt_nregs = 0;
6437 pcum->can_split = true;
6439 /* Varargs vectors are treated the same as long long.
6440 named_count avoids having to change the way arm handles 'named' */
6441 pcum->named_count = 0;
6442 pcum->nargs = 0;
6444 if (TARGET_REALLY_IWMMXT && fntype)
6446 tree fn_arg;
6448 for (fn_arg = TYPE_ARG_TYPES (fntype);
6449 fn_arg;
6450 fn_arg = TREE_CHAIN (fn_arg))
6451 pcum->named_count += 1;
6453 if (! pcum->named_count)
6454 pcum->named_count = INT_MAX;
6458 /* Return true if mode/type need doubleword alignment. */
6459 static bool
6460 arm_needs_doubleword_align (machine_mode mode, const_tree type)
6462 if (!type)
6463 return PARM_BOUNDARY < GET_MODE_ALIGNMENT (mode);
6465 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
6466 if (!AGGREGATE_TYPE_P (type))
6467 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
6469 /* Array types: Use member alignment of element type. */
6470 if (TREE_CODE (type) == ARRAY_TYPE)
6471 return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
6473 /* Record/aggregate types: Use greatest member alignment of any member. */
6474 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6475 if (DECL_ALIGN (field) > PARM_BOUNDARY)
6476 return true;
6478 return false;
6482 /* Determine where to put an argument to a function.
6483 Value is zero to push the argument on the stack,
6484 or a hard register in which to store the argument.
6486 MODE is the argument's machine mode.
6487 TYPE is the data type of the argument (as a tree).
6488 This is null for libcalls where that information may
6489 not be available.
6490 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6491 the preceding args and about the function being called.
6492 NAMED is nonzero if this argument is a named parameter
6493 (otherwise it is an extra parameter matching an ellipsis).
6495 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6496 other arguments are passed on the stack. If (NAMED == 0) (which happens
6497 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6498 defined), say it is passed in the stack (function_prologue will
6499 indeed make it pass in the stack if necessary). */
6501 static rtx
6502 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
6503 const_tree type, bool named)
6505 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6506 int nregs;
6508 /* Handle the special case quickly. Pick an arbitrary value for op2 of
6509 a call insn (op3 of a call_value insn). */
6510 if (mode == VOIDmode)
6511 return const0_rtx;
6513 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6515 aapcs_layout_arg (pcum, mode, type, named);
6516 return pcum->aapcs_reg;
6519 /* Varargs vectors are treated the same as long long.
6520 named_count avoids having to change the way arm handles 'named' */
6521 if (TARGET_IWMMXT_ABI
6522 && arm_vector_mode_supported_p (mode)
6523 && pcum->named_count > pcum->nargs + 1)
6525 if (pcum->iwmmxt_nregs <= 9)
6526 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
6527 else
6529 pcum->can_split = false;
6530 return NULL_RTX;
6534 /* Put doubleword aligned quantities in even register pairs. */
6535 if (pcum->nregs & 1
6536 && ARM_DOUBLEWORD_ALIGN
6537 && arm_needs_doubleword_align (mode, type))
6538 pcum->nregs++;
6540 /* Only allow splitting an arg between regs and memory if all preceding
6541 args were allocated to regs. For args passed by reference we only count
6542 the reference pointer. */
6543 if (pcum->can_split)
6544 nregs = 1;
6545 else
6546 nregs = ARM_NUM_REGS2 (mode, type);
6548 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
6549 return NULL_RTX;
6551 return gen_rtx_REG (mode, pcum->nregs);
6554 static unsigned int
6555 arm_function_arg_boundary (machine_mode mode, const_tree type)
6557 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
6558 ? DOUBLEWORD_ALIGNMENT
6559 : PARM_BOUNDARY);
6562 static int
6563 arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
6564 tree type, bool named)
6566 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6567 int nregs = pcum->nregs;
6569 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6571 aapcs_layout_arg (pcum, mode, type, named);
6572 return pcum->aapcs_partial;
6575 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6576 return 0;
6578 if (NUM_ARG_REGS > nregs
6579 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6580 && pcum->can_split)
6581 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6583 return 0;
6586 /* Update the data in PCUM to advance over an argument
6587 of mode MODE and data type TYPE.
6588 (TYPE is null for libcalls where that information may not be available.) */
6590 static void
6591 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6592 const_tree type, bool named)
6594 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6596 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6598 aapcs_layout_arg (pcum, mode, type, named);
6600 if (pcum->aapcs_cprc_slot >= 0)
6602 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6603 type);
6604 pcum->aapcs_cprc_slot = -1;
6607 /* Generic stuff. */
6608 pcum->aapcs_arg_processed = false;
6609 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6610 pcum->aapcs_reg = NULL_RTX;
6611 pcum->aapcs_partial = 0;
6613 else
6615 pcum->nargs += 1;
6616 if (arm_vector_mode_supported_p (mode)
6617 && pcum->named_count > pcum->nargs
6618 && TARGET_IWMMXT_ABI)
6619 pcum->iwmmxt_nregs += 1;
6620 else
6621 pcum->nregs += ARM_NUM_REGS2 (mode, type);
6625 /* Variable sized types are passed by reference. This is a GCC
6626 extension to the ARM ABI. */
6628 static bool
6629 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6630 machine_mode mode ATTRIBUTE_UNUSED,
6631 const_tree type, bool named ATTRIBUTE_UNUSED)
6633 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6636 /* Encode the current state of the #pragma [no_]long_calls. */
6637 typedef enum
6639 OFF, /* No #pragma [no_]long_calls is in effect. */
6640 LONG, /* #pragma long_calls is in effect. */
6641 SHORT /* #pragma no_long_calls is in effect. */
6642 } arm_pragma_enum;
6644 static arm_pragma_enum arm_pragma_long_calls = OFF;
6646 void
6647 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6649 arm_pragma_long_calls = LONG;
6652 void
6653 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6655 arm_pragma_long_calls = SHORT;
6658 void
6659 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6661 arm_pragma_long_calls = OFF;
6664 /* Handle an attribute requiring a FUNCTION_DECL;
6665 arguments as in struct attribute_spec.handler. */
6666 static tree
6667 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6668 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6670 if (TREE_CODE (*node) != FUNCTION_DECL)
6672 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6673 name);
6674 *no_add_attrs = true;
6677 return NULL_TREE;
6680 /* Handle an "interrupt" or "isr" attribute;
6681 arguments as in struct attribute_spec.handler. */
6682 static tree
6683 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6684 bool *no_add_attrs)
6686 if (DECL_P (*node))
6688 if (TREE_CODE (*node) != FUNCTION_DECL)
6690 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6691 name);
6692 *no_add_attrs = true;
6694 /* FIXME: the argument if any is checked for type attributes;
6695 should it be checked for decl ones? */
6697 else
6699 if (TREE_CODE (*node) == FUNCTION_TYPE
6700 || TREE_CODE (*node) == METHOD_TYPE)
6702 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6704 warning (OPT_Wattributes, "%qE attribute ignored",
6705 name);
6706 *no_add_attrs = true;
6709 else if (TREE_CODE (*node) == POINTER_TYPE
6710 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6711 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6712 && arm_isr_value (args) != ARM_FT_UNKNOWN)
6714 *node = build_variant_type_copy (*node);
6715 TREE_TYPE (*node) = build_type_attribute_variant
6716 (TREE_TYPE (*node),
6717 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6718 *no_add_attrs = true;
6720 else
6722 /* Possibly pass this attribute on from the type to a decl. */
6723 if (flags & ((int) ATTR_FLAG_DECL_NEXT
6724 | (int) ATTR_FLAG_FUNCTION_NEXT
6725 | (int) ATTR_FLAG_ARRAY_NEXT))
6727 *no_add_attrs = true;
6728 return tree_cons (name, args, NULL_TREE);
6730 else
6732 warning (OPT_Wattributes, "%qE attribute ignored",
6733 name);
6738 return NULL_TREE;
6741 /* Handle a "pcs" attribute; arguments as in struct
6742 attribute_spec.handler. */
6743 static tree
6744 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6745 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6747 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6749 warning (OPT_Wattributes, "%qE attribute ignored", name);
6750 *no_add_attrs = true;
6752 return NULL_TREE;
6755 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6756 /* Handle the "notshared" attribute. This attribute is another way of
6757 requesting hidden visibility. ARM's compiler supports
6758 "__declspec(notshared)"; we support the same thing via an
6759 attribute. */
6761 static tree
6762 arm_handle_notshared_attribute (tree *node,
6763 tree name ATTRIBUTE_UNUSED,
6764 tree args ATTRIBUTE_UNUSED,
6765 int flags ATTRIBUTE_UNUSED,
6766 bool *no_add_attrs)
6768 tree decl = TYPE_NAME (*node);
6770 if (decl)
6772 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6773 DECL_VISIBILITY_SPECIFIED (decl) = 1;
6774 *no_add_attrs = false;
6776 return NULL_TREE;
6778 #endif
6780 /* This function returns true if a function with declaration FNDECL and type
6781 FNTYPE uses the stack to pass arguments or return variables and false
6782 otherwise. This is used for functions with the attributes
6783 'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
6784 diagnostic messages if the stack is used. NAME is the name of the attribute
6785 used. */
6787 static bool
6788 cmse_func_args_or_return_in_stack (tree fndecl, tree name, tree fntype)
6790 function_args_iterator args_iter;
6791 CUMULATIVE_ARGS args_so_far_v;
6792 cumulative_args_t args_so_far;
6793 bool first_param = true;
6794 tree arg_type, prev_arg_type = NULL_TREE, ret_type;
6796 /* Error out if any argument is passed on the stack. */
6797 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
6798 args_so_far = pack_cumulative_args (&args_so_far_v);
6799 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
6801 rtx arg_rtx;
6802 machine_mode arg_mode = TYPE_MODE (arg_type);
6804 prev_arg_type = arg_type;
6805 if (VOID_TYPE_P (arg_type))
6806 continue;
6808 if (!first_param)
6809 arm_function_arg_advance (args_so_far, arg_mode, arg_type, true);
6810 arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type, true);
6811 if (!arg_rtx
6812 || arm_arg_partial_bytes (args_so_far, arg_mode, arg_type, true))
6814 error ("%qE attribute not available to functions with arguments "
6815 "passed on the stack", name);
6816 return true;
6818 first_param = false;
6821 /* Error out for variadic functions since we cannot control how many
6822 arguments will be passed and thus stack could be used. stdarg_p () is not
6823 used for the checking to avoid browsing arguments twice. */
6824 if (prev_arg_type != NULL_TREE && !VOID_TYPE_P (prev_arg_type))
6826 error ("%qE attribute not available to functions with variable number "
6827 "of arguments", name);
6828 return true;
6831 /* Error out if return value is passed on the stack. */
6832 ret_type = TREE_TYPE (fntype);
6833 if (arm_return_in_memory (ret_type, fntype))
6835 error ("%qE attribute not available to functions that return value on "
6836 "the stack", name);
6837 return true;
6839 return false;
6842 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
6843 function will check whether the attribute is allowed here and will add the
6844 attribute to the function declaration tree or otherwise issue a warning. */
6846 static tree
6847 arm_handle_cmse_nonsecure_entry (tree *node, tree name,
6848 tree /* args */,
6849 int /* flags */,
6850 bool *no_add_attrs)
6852 tree fndecl;
6854 if (!use_cmse)
6856 *no_add_attrs = true;
6857 warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
6858 name);
6859 return NULL_TREE;
6862 /* Ignore attribute for function types. */
6863 if (TREE_CODE (*node) != FUNCTION_DECL)
6865 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6866 name);
6867 *no_add_attrs = true;
6868 return NULL_TREE;
6871 fndecl = *node;
6873 /* Warn for static linkage functions. */
6874 if (!TREE_PUBLIC (fndecl))
6876 warning (OPT_Wattributes, "%qE attribute has no effect on functions "
6877 "with static linkage", name);
6878 *no_add_attrs = true;
6879 return NULL_TREE;
6882 *no_add_attrs |= cmse_func_args_or_return_in_stack (fndecl, name,
6883 TREE_TYPE (fndecl));
6884 return NULL_TREE;
6888 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
6889 function will check whether the attribute is allowed here and will add the
6890 attribute to the function type tree or otherwise issue a diagnostic. The
6891 reason we check this at declaration time is to only allow the use of the
6892 attribute with declarations of function pointers and not function
6893 declarations. This function checks NODE is of the expected type and issues
6894 diagnostics otherwise using NAME. If it is not of the expected type
6895 *NO_ADD_ATTRS will be set to true. */
6897 static tree
6898 arm_handle_cmse_nonsecure_call (tree *node, tree name,
6899 tree /* args */,
6900 int /* flags */,
6901 bool *no_add_attrs)
6903 tree decl = NULL_TREE, fntype = NULL_TREE;
6904 tree type;
6906 if (!use_cmse)
6908 *no_add_attrs = true;
6909 warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
6910 name);
6911 return NULL_TREE;
6914 if (TREE_CODE (*node) == VAR_DECL || TREE_CODE (*node) == TYPE_DECL)
6916 decl = *node;
6917 fntype = TREE_TYPE (decl);
6920 while (fntype != NULL_TREE && TREE_CODE (fntype) == POINTER_TYPE)
6921 fntype = TREE_TYPE (fntype);
6923 if (!decl || TREE_CODE (fntype) != FUNCTION_TYPE)
6925 warning (OPT_Wattributes, "%qE attribute only applies to base type of a "
6926 "function pointer", name);
6927 *no_add_attrs = true;
6928 return NULL_TREE;
6931 *no_add_attrs |= cmse_func_args_or_return_in_stack (NULL, name, fntype);
6933 if (*no_add_attrs)
6934 return NULL_TREE;
6936 /* Prevent trees being shared among function types with and without
6937 cmse_nonsecure_call attribute. */
6938 type = TREE_TYPE (decl);
6940 type = build_distinct_type_copy (type);
6941 TREE_TYPE (decl) = type;
6942 fntype = type;
6944 while (TREE_CODE (fntype) != FUNCTION_TYPE)
6946 type = fntype;
6947 fntype = TREE_TYPE (fntype);
6948 fntype = build_distinct_type_copy (fntype);
6949 TREE_TYPE (type) = fntype;
6952 /* Construct a type attribute and add it to the function type. */
6953 tree attrs = tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE,
6954 TYPE_ATTRIBUTES (fntype));
6955 TYPE_ATTRIBUTES (fntype) = attrs;
6956 return NULL_TREE;
6959 /* Return 0 if the attributes for two types are incompatible, 1 if they
6960 are compatible, and 2 if they are nearly compatible (which causes a
6961 warning to be generated). */
6962 static int
6963 arm_comp_type_attributes (const_tree type1, const_tree type2)
6965 int l1, l2, s1, s2;
6967 /* Check for mismatch of non-default calling convention. */
6968 if (TREE_CODE (type1) != FUNCTION_TYPE)
6969 return 1;
6971 /* Check for mismatched call attributes. */
6972 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
6973 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
6974 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
6975 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
6977 /* Only bother to check if an attribute is defined. */
6978 if (l1 | l2 | s1 | s2)
6980 /* If one type has an attribute, the other must have the same attribute. */
6981 if ((l1 != l2) || (s1 != s2))
6982 return 0;
6984 /* Disallow mixed attributes. */
6985 if ((l1 & s2) || (l2 & s1))
6986 return 0;
6989 /* Check for mismatched ISR attribute. */
6990 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
6991 if (! l1)
6992 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
6993 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
6994 if (! l2)
6995 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
6996 if (l1 != l2)
6997 return 0;
6999 l1 = lookup_attribute ("cmse_nonsecure_call",
7000 TYPE_ATTRIBUTES (type1)) != NULL;
7001 l2 = lookup_attribute ("cmse_nonsecure_call",
7002 TYPE_ATTRIBUTES (type2)) != NULL;
7004 if (l1 != l2)
7005 return 0;
7007 return 1;
7010 /* Assigns default attributes to newly defined type. This is used to
7011 set short_call/long_call attributes for function types of
7012 functions defined inside corresponding #pragma scopes. */
7013 static void
7014 arm_set_default_type_attributes (tree type)
7016 /* Add __attribute__ ((long_call)) to all functions, when
7017 inside #pragma long_calls or __attribute__ ((short_call)),
7018 when inside #pragma no_long_calls. */
7019 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
7021 tree type_attr_list, attr_name;
7022 type_attr_list = TYPE_ATTRIBUTES (type);
7024 if (arm_pragma_long_calls == LONG)
7025 attr_name = get_identifier ("long_call");
7026 else if (arm_pragma_long_calls == SHORT)
7027 attr_name = get_identifier ("short_call");
7028 else
7029 return;
7031 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
7032 TYPE_ATTRIBUTES (type) = type_attr_list;
7036 /* Return true if DECL is known to be linked into section SECTION. */
7038 static bool
7039 arm_function_in_section_p (tree decl, section *section)
7041 /* We can only be certain about the prevailing symbol definition. */
7042 if (!decl_binds_to_current_def_p (decl))
7043 return false;
7045 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
7046 if (!DECL_SECTION_NAME (decl))
7048 /* Make sure that we will not create a unique section for DECL. */
7049 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
7050 return false;
7053 return function_section (decl) == section;
7056 /* Return nonzero if a 32-bit "long_call" should be generated for
7057 a call from the current function to DECL. We generate a long_call
7058 if the function:
7060 a. has an __attribute__((long call))
7061 or b. is within the scope of a #pragma long_calls
7062 or c. the -mlong-calls command line switch has been specified
7064 However we do not generate a long call if the function:
7066 d. has an __attribute__ ((short_call))
7067 or e. is inside the scope of a #pragma no_long_calls
7068 or f. is defined in the same section as the current function. */
7070 bool
7071 arm_is_long_call_p (tree decl)
7073 tree attrs;
7075 if (!decl)
7076 return TARGET_LONG_CALLS;
7078 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
7079 if (lookup_attribute ("short_call", attrs))
7080 return false;
7082 /* For "f", be conservative, and only cater for cases in which the
7083 whole of the current function is placed in the same section. */
7084 if (!flag_reorder_blocks_and_partition
7085 && TREE_CODE (decl) == FUNCTION_DECL
7086 && arm_function_in_section_p (decl, current_function_section ()))
7087 return false;
7089 if (lookup_attribute ("long_call", attrs))
7090 return true;
7092 return TARGET_LONG_CALLS;
7095 /* Return nonzero if it is ok to make a tail-call to DECL. */
7096 static bool
7097 arm_function_ok_for_sibcall (tree decl, tree exp)
7099 unsigned long func_type;
7101 if (cfun->machine->sibcall_blocked)
7102 return false;
7104 /* Never tailcall something if we are generating code for Thumb-1. */
7105 if (TARGET_THUMB1)
7106 return false;
7108 /* The PIC register is live on entry to VxWorks PLT entries, so we
7109 must make the call before restoring the PIC register. */
7110 if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
7111 return false;
7113 /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7114 may be used both as target of the call and base register for restoring
7115 the VFP registers */
7116 if (TARGET_APCS_FRAME && TARGET_ARM
7117 && TARGET_HARD_FLOAT
7118 && decl && arm_is_long_call_p (decl))
7119 return false;
7121 /* If we are interworking and the function is not declared static
7122 then we can't tail-call it unless we know that it exists in this
7123 compilation unit (since it might be a Thumb routine). */
7124 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
7125 && !TREE_ASM_WRITTEN (decl))
7126 return false;
7128 func_type = arm_current_func_type ();
7129 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
7130 if (IS_INTERRUPT (func_type))
7131 return false;
7133 /* ARMv8-M non-secure entry functions need to return with bxns which is only
7134 generated for entry functions themselves. */
7135 if (IS_CMSE_ENTRY (arm_current_func_type ()))
7136 return false;
7138 /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7139 this would complicate matters for later code generation. */
7140 if (TREE_CODE (exp) == CALL_EXPR)
7142 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7143 if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype)))
7144 return false;
7147 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
7149 /* Check that the return value locations are the same. For
7150 example that we aren't returning a value from the sibling in
7151 a VFP register but then need to transfer it to a core
7152 register. */
7153 rtx a, b;
7154 tree decl_or_type = decl;
7156 /* If it is an indirect function pointer, get the function type. */
7157 if (!decl)
7158 decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7160 a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
7161 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
7162 cfun->decl, false);
7163 if (!rtx_equal_p (a, b))
7164 return false;
7167 /* Never tailcall if function may be called with a misaligned SP. */
7168 if (IS_STACKALIGN (func_type))
7169 return false;
7171 /* The AAPCS says that, on bare-metal, calls to unresolved weak
7172 references should become a NOP. Don't convert such calls into
7173 sibling calls. */
7174 if (TARGET_AAPCS_BASED
7175 && arm_abi == ARM_ABI_AAPCS
7176 && decl
7177 && DECL_WEAK (decl))
7178 return false;
7180 /* We cannot do a tailcall for an indirect call by descriptor if all the
7181 argument registers are used because the only register left to load the
7182 address is IP and it will already contain the static chain. */
7183 if (!decl && CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines)
7185 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7186 CUMULATIVE_ARGS cum;
7187 cumulative_args_t cum_v;
7189 arm_init_cumulative_args (&cum, fntype, NULL_RTX, NULL_TREE);
7190 cum_v = pack_cumulative_args (&cum);
7192 for (tree t = TYPE_ARG_TYPES (fntype); t; t = TREE_CHAIN (t))
7194 tree type = TREE_VALUE (t);
7195 if (!VOID_TYPE_P (type))
7196 arm_function_arg_advance (cum_v, TYPE_MODE (type), type, true);
7199 if (!arm_function_arg (cum_v, SImode, integer_type_node, true))
7200 return false;
7203 /* Everything else is ok. */
7204 return true;
7208 /* Addressing mode support functions. */
7210 /* Return nonzero if X is a legitimate immediate operand when compiling
7211 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
7213 legitimate_pic_operand_p (rtx x)
7215 if (GET_CODE (x) == SYMBOL_REF
7216 || (GET_CODE (x) == CONST
7217 && GET_CODE (XEXP (x, 0)) == PLUS
7218 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
7219 return 0;
7221 return 1;
7224 /* Record that the current function needs a PIC register. Initialize
7225 cfun->machine->pic_reg if we have not already done so. */
7227 static void
7228 require_pic_register (void)
7230 /* A lot of the logic here is made obscure by the fact that this
7231 routine gets called as part of the rtx cost estimation process.
7232 We don't want those calls to affect any assumptions about the real
7233 function; and further, we can't call entry_of_function() until we
7234 start the real expansion process. */
7235 if (!crtl->uses_pic_offset_table)
7237 gcc_assert (can_create_pseudo_p ());
7238 if (arm_pic_register != INVALID_REGNUM
7239 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
7241 if (!cfun->machine->pic_reg)
7242 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
7244 /* Play games to avoid marking the function as needing pic
7245 if we are being called as part of the cost-estimation
7246 process. */
7247 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7248 crtl->uses_pic_offset_table = 1;
7250 else
7252 rtx_insn *seq, *insn;
7254 if (!cfun->machine->pic_reg)
7255 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
7257 /* Play games to avoid marking the function as needing pic
7258 if we are being called as part of the cost-estimation
7259 process. */
7260 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7262 crtl->uses_pic_offset_table = 1;
7263 start_sequence ();
7265 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
7266 && arm_pic_register > LAST_LO_REGNUM)
7267 emit_move_insn (cfun->machine->pic_reg,
7268 gen_rtx_REG (Pmode, arm_pic_register));
7269 else
7270 arm_load_pic_register (0UL);
7272 seq = get_insns ();
7273 end_sequence ();
7275 for (insn = seq; insn; insn = NEXT_INSN (insn))
7276 if (INSN_P (insn))
7277 INSN_LOCATION (insn) = prologue_location;
7279 /* We can be called during expansion of PHI nodes, where
7280 we can't yet emit instructions directly in the final
7281 insn stream. Queue the insns on the entry edge, they will
7282 be committed after everything else is expanded. */
7283 insert_insn_on_edge (seq,
7284 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
7291 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
7293 if (GET_CODE (orig) == SYMBOL_REF
7294 || GET_CODE (orig) == LABEL_REF)
7296 if (reg == 0)
7298 gcc_assert (can_create_pseudo_p ());
7299 reg = gen_reg_rtx (Pmode);
7302 /* VxWorks does not impose a fixed gap between segments; the run-time
7303 gap can be different from the object-file gap. We therefore can't
7304 use GOTOFF unless we are absolutely sure that the symbol is in the
7305 same segment as the GOT. Unfortunately, the flexibility of linker
7306 scripts means that we can't be sure of that in general, so assume
7307 that GOTOFF is never valid on VxWorks. */
7308 /* References to weak symbols cannot be resolved locally: they
7309 may be overridden by a non-weak definition at link time. */
7310 rtx_insn *insn;
7311 if ((GET_CODE (orig) == LABEL_REF
7312 || (GET_CODE (orig) == SYMBOL_REF
7313 && SYMBOL_REF_LOCAL_P (orig)
7314 && (SYMBOL_REF_DECL (orig)
7315 ? !DECL_WEAK (SYMBOL_REF_DECL (orig)) : 1)))
7316 && NEED_GOT_RELOC
7317 && arm_pic_data_is_text_relative)
7318 insn = arm_pic_static_addr (orig, reg);
7319 else
7321 rtx pat;
7322 rtx mem;
7324 /* If this function doesn't have a pic register, create one now. */
7325 require_pic_register ();
7327 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
7329 /* Make the MEM as close to a constant as possible. */
7330 mem = SET_SRC (pat);
7331 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
7332 MEM_READONLY_P (mem) = 1;
7333 MEM_NOTRAP_P (mem) = 1;
7335 insn = emit_insn (pat);
7338 /* Put a REG_EQUAL note on this insn, so that it can be optimized
7339 by loop. */
7340 set_unique_reg_note (insn, REG_EQUAL, orig);
7342 return reg;
7344 else if (GET_CODE (orig) == CONST)
7346 rtx base, offset;
7348 if (GET_CODE (XEXP (orig, 0)) == PLUS
7349 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
7350 return orig;
7352 /* Handle the case where we have: const (UNSPEC_TLS). */
7353 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
7354 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
7355 return orig;
7357 /* Handle the case where we have:
7358 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
7359 CONST_INT. */
7360 if (GET_CODE (XEXP (orig, 0)) == PLUS
7361 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
7362 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
7364 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
7365 return orig;
7368 if (reg == 0)
7370 gcc_assert (can_create_pseudo_p ());
7371 reg = gen_reg_rtx (Pmode);
7374 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
7376 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
7377 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
7378 base == reg ? 0 : reg);
7380 if (CONST_INT_P (offset))
7382 /* The base register doesn't really matter, we only want to
7383 test the index for the appropriate mode. */
7384 if (!arm_legitimate_index_p (mode, offset, SET, 0))
7386 gcc_assert (can_create_pseudo_p ());
7387 offset = force_reg (Pmode, offset);
7390 if (CONST_INT_P (offset))
7391 return plus_constant (Pmode, base, INTVAL (offset));
7394 if (GET_MODE_SIZE (mode) > 4
7395 && (GET_MODE_CLASS (mode) == MODE_INT
7396 || TARGET_SOFT_FLOAT))
7398 emit_insn (gen_addsi3 (reg, base, offset));
7399 return reg;
7402 return gen_rtx_PLUS (Pmode, base, offset);
7405 return orig;
7409 /* Find a spare register to use during the prolog of a function. */
7411 static int
7412 thumb_find_work_register (unsigned long pushed_regs_mask)
7414 int reg;
7416 /* Check the argument registers first as these are call-used. The
7417 register allocation order means that sometimes r3 might be used
7418 but earlier argument registers might not, so check them all. */
7419 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
7420 if (!df_regs_ever_live_p (reg))
7421 return reg;
7423 /* Before going on to check the call-saved registers we can try a couple
7424 more ways of deducing that r3 is available. The first is when we are
7425 pushing anonymous arguments onto the stack and we have less than 4
7426 registers worth of fixed arguments(*). In this case r3 will be part of
7427 the variable argument list and so we can be sure that it will be
7428 pushed right at the start of the function. Hence it will be available
7429 for the rest of the prologue.
7430 (*): ie crtl->args.pretend_args_size is greater than 0. */
7431 if (cfun->machine->uses_anonymous_args
7432 && crtl->args.pretend_args_size > 0)
7433 return LAST_ARG_REGNUM;
7435 /* The other case is when we have fixed arguments but less than 4 registers
7436 worth. In this case r3 might be used in the body of the function, but
7437 it is not being used to convey an argument into the function. In theory
7438 we could just check crtl->args.size to see how many bytes are
7439 being passed in argument registers, but it seems that it is unreliable.
7440 Sometimes it will have the value 0 when in fact arguments are being
7441 passed. (See testcase execute/20021111-1.c for an example). So we also
7442 check the args_info.nregs field as well. The problem with this field is
7443 that it makes no allowances for arguments that are passed to the
7444 function but which are not used. Hence we could miss an opportunity
7445 when a function has an unused argument in r3. But it is better to be
7446 safe than to be sorry. */
7447 if (! cfun->machine->uses_anonymous_args
7448 && crtl->args.size >= 0
7449 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
7450 && (TARGET_AAPCS_BASED
7451 ? crtl->args.info.aapcs_ncrn < 4
7452 : crtl->args.info.nregs < 4))
7453 return LAST_ARG_REGNUM;
7455 /* Otherwise look for a call-saved register that is going to be pushed. */
7456 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
7457 if (pushed_regs_mask & (1 << reg))
7458 return reg;
7460 if (TARGET_THUMB2)
7462 /* Thumb-2 can use high regs. */
7463 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
7464 if (pushed_regs_mask & (1 << reg))
7465 return reg;
7467 /* Something went wrong - thumb_compute_save_reg_mask()
7468 should have arranged for a suitable register to be pushed. */
7469 gcc_unreachable ();
7472 static GTY(()) int pic_labelno;
7474 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
7475 low register. */
7477 void
7478 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
7480 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
7482 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
7483 return;
7485 gcc_assert (flag_pic);
7487 pic_reg = cfun->machine->pic_reg;
7488 if (TARGET_VXWORKS_RTP)
7490 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
7491 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7492 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
7494 emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
7496 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7497 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
7499 else
7501 /* We use an UNSPEC rather than a LABEL_REF because this label
7502 never appears in the code stream. */
7504 labelno = GEN_INT (pic_labelno++);
7505 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7506 l1 = gen_rtx_CONST (VOIDmode, l1);
7508 /* On the ARM the PC register contains 'dot + 8' at the time of the
7509 addition, on the Thumb it is 'dot + 4'. */
7510 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7511 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
7512 UNSPEC_GOTSYM_OFF);
7513 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7515 if (TARGET_32BIT)
7517 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7519 else /* TARGET_THUMB1 */
7521 if (arm_pic_register != INVALID_REGNUM
7522 && REGNO (pic_reg) > LAST_LO_REGNUM)
7524 /* We will have pushed the pic register, so we should always be
7525 able to find a work register. */
7526 pic_tmp = gen_rtx_REG (SImode,
7527 thumb_find_work_register (saved_regs));
7528 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
7529 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
7530 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
7532 else if (arm_pic_register != INVALID_REGNUM
7533 && arm_pic_register > LAST_LO_REGNUM
7534 && REGNO (pic_reg) <= LAST_LO_REGNUM)
7536 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7537 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
7538 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
7540 else
7541 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7545 /* Need to emit this whether or not we obey regdecls,
7546 since setjmp/longjmp can cause life info to screw up. */
7547 emit_use (pic_reg);
7550 /* Generate code to load the address of a static var when flag_pic is set. */
7551 static rtx_insn *
7552 arm_pic_static_addr (rtx orig, rtx reg)
7554 rtx l1, labelno, offset_rtx;
7556 gcc_assert (flag_pic);
7558 /* We use an UNSPEC rather than a LABEL_REF because this label
7559 never appears in the code stream. */
7560 labelno = GEN_INT (pic_labelno++);
7561 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7562 l1 = gen_rtx_CONST (VOIDmode, l1);
7564 /* On the ARM the PC register contains 'dot + 8' at the time of the
7565 addition, on the Thumb it is 'dot + 4'. */
7566 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7567 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
7568 UNSPEC_SYMBOL_OFFSET);
7569 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
7571 return emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
7574 /* Return nonzero if X is valid as an ARM state addressing register. */
7575 static int
7576 arm_address_register_rtx_p (rtx x, int strict_p)
7578 int regno;
7580 if (!REG_P (x))
7581 return 0;
7583 regno = REGNO (x);
7585 if (strict_p)
7586 return ARM_REGNO_OK_FOR_BASE_P (regno);
7588 return (regno <= LAST_ARM_REGNUM
7589 || regno >= FIRST_PSEUDO_REGISTER
7590 || regno == FRAME_POINTER_REGNUM
7591 || regno == ARG_POINTER_REGNUM);
7594 /* Return TRUE if this rtx is the difference of a symbol and a label,
7595 and will reduce to a PC-relative relocation in the object file.
7596 Expressions like this can be left alone when generating PIC, rather
7597 than forced through the GOT. */
7598 static int
7599 pcrel_constant_p (rtx x)
7601 if (GET_CODE (x) == MINUS)
7602 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
7604 return FALSE;
7607 /* Return true if X will surely end up in an index register after next
7608 splitting pass. */
7609 static bool
7610 will_be_in_index_register (const_rtx x)
7612 /* arm.md: calculate_pic_address will split this into a register. */
7613 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
7616 /* Return nonzero if X is a valid ARM state address operand. */
7618 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
7619 int strict_p)
7621 bool use_ldrd;
7622 enum rtx_code code = GET_CODE (x);
7624 if (arm_address_register_rtx_p (x, strict_p))
7625 return 1;
7627 use_ldrd = (TARGET_LDRD
7628 && (mode == DImode || mode == DFmode));
7630 if (code == POST_INC || code == PRE_DEC
7631 || ((code == PRE_INC || code == POST_DEC)
7632 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7633 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7635 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7636 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7637 && GET_CODE (XEXP (x, 1)) == PLUS
7638 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7640 rtx addend = XEXP (XEXP (x, 1), 1);
7642 /* Don't allow ldrd post increment by register because it's hard
7643 to fixup invalid register choices. */
7644 if (use_ldrd
7645 && GET_CODE (x) == POST_MODIFY
7646 && REG_P (addend))
7647 return 0;
7649 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
7650 && arm_legitimate_index_p (mode, addend, outer, strict_p));
7653 /* After reload constants split into minipools will have addresses
7654 from a LABEL_REF. */
7655 else if (reload_completed
7656 && (code == LABEL_REF
7657 || (code == CONST
7658 && GET_CODE (XEXP (x, 0)) == PLUS
7659 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7660 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7661 return 1;
7663 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7664 return 0;
7666 else if (code == PLUS)
7668 rtx xop0 = XEXP (x, 0);
7669 rtx xop1 = XEXP (x, 1);
7671 return ((arm_address_register_rtx_p (xop0, strict_p)
7672 && ((CONST_INT_P (xop1)
7673 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
7674 || (!strict_p && will_be_in_index_register (xop1))))
7675 || (arm_address_register_rtx_p (xop1, strict_p)
7676 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
7679 #if 0
7680 /* Reload currently can't handle MINUS, so disable this for now */
7681 else if (GET_CODE (x) == MINUS)
7683 rtx xop0 = XEXP (x, 0);
7684 rtx xop1 = XEXP (x, 1);
7686 return (arm_address_register_rtx_p (xop0, strict_p)
7687 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
7689 #endif
7691 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7692 && code == SYMBOL_REF
7693 && CONSTANT_POOL_ADDRESS_P (x)
7694 && ! (flag_pic
7695 && symbol_mentioned_p (get_pool_constant (x))
7696 && ! pcrel_constant_p (get_pool_constant (x))))
7697 return 1;
7699 return 0;
7702 /* Return nonzero if X is a valid Thumb-2 address operand. */
7703 static int
7704 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7706 bool use_ldrd;
7707 enum rtx_code code = GET_CODE (x);
7709 if (arm_address_register_rtx_p (x, strict_p))
7710 return 1;
7712 use_ldrd = (TARGET_LDRD
7713 && (mode == DImode || mode == DFmode));
7715 if (code == POST_INC || code == PRE_DEC
7716 || ((code == PRE_INC || code == POST_DEC)
7717 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7718 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7720 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7721 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7722 && GET_CODE (XEXP (x, 1)) == PLUS
7723 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7725 /* Thumb-2 only has autoincrement by constant. */
7726 rtx addend = XEXP (XEXP (x, 1), 1);
7727 HOST_WIDE_INT offset;
7729 if (!CONST_INT_P (addend))
7730 return 0;
7732 offset = INTVAL(addend);
7733 if (GET_MODE_SIZE (mode) <= 4)
7734 return (offset > -256 && offset < 256);
7736 return (use_ldrd && offset > -1024 && offset < 1024
7737 && (offset & 3) == 0);
7740 /* After reload constants split into minipools will have addresses
7741 from a LABEL_REF. */
7742 else if (reload_completed
7743 && (code == LABEL_REF
7744 || (code == CONST
7745 && GET_CODE (XEXP (x, 0)) == PLUS
7746 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7747 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7748 return 1;
7750 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7751 return 0;
7753 else if (code == PLUS)
7755 rtx xop0 = XEXP (x, 0);
7756 rtx xop1 = XEXP (x, 1);
7758 return ((arm_address_register_rtx_p (xop0, strict_p)
7759 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
7760 || (!strict_p && will_be_in_index_register (xop1))))
7761 || (arm_address_register_rtx_p (xop1, strict_p)
7762 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
7765 /* Normally we can assign constant values to target registers without
7766 the help of constant pool. But there are cases we have to use constant
7767 pool like:
7768 1) assign a label to register.
7769 2) sign-extend a 8bit value to 32bit and then assign to register.
7771 Constant pool access in format:
7772 (set (reg r0) (mem (symbol_ref (".LC0"))))
7773 will cause the use of literal pool (later in function arm_reorg).
7774 So here we mark such format as an invalid format, then the compiler
7775 will adjust it into:
7776 (set (reg r0) (symbol_ref (".LC0")))
7777 (set (reg r0) (mem (reg r0))).
7778 No extra register is required, and (mem (reg r0)) won't cause the use
7779 of literal pools. */
7780 else if (arm_disable_literal_pool && code == SYMBOL_REF
7781 && CONSTANT_POOL_ADDRESS_P (x))
7782 return 0;
7784 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7785 && code == SYMBOL_REF
7786 && CONSTANT_POOL_ADDRESS_P (x)
7787 && ! (flag_pic
7788 && symbol_mentioned_p (get_pool_constant (x))
7789 && ! pcrel_constant_p (get_pool_constant (x))))
7790 return 1;
7792 return 0;
7795 /* Return nonzero if INDEX is valid for an address index operand in
7796 ARM state. */
7797 static int
7798 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
7799 int strict_p)
7801 HOST_WIDE_INT range;
7802 enum rtx_code code = GET_CODE (index);
7804 /* Standard coprocessor addressing modes. */
7805 if (TARGET_HARD_FLOAT
7806 && (mode == SFmode || mode == DFmode))
7807 return (code == CONST_INT && INTVAL (index) < 1024
7808 && INTVAL (index) > -1024
7809 && (INTVAL (index) & 3) == 0);
7811 /* For quad modes, we restrict the constant offset to be slightly less
7812 than what the instruction format permits. We do this because for
7813 quad mode moves, we will actually decompose them into two separate
7814 double-mode reads or writes. INDEX must therefore be a valid
7815 (double-mode) offset and so should INDEX+8. */
7816 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7817 return (code == CONST_INT
7818 && INTVAL (index) < 1016
7819 && INTVAL (index) > -1024
7820 && (INTVAL (index) & 3) == 0);
7822 /* We have no such constraint on double mode offsets, so we permit the
7823 full range of the instruction format. */
7824 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7825 return (code == CONST_INT
7826 && INTVAL (index) < 1024
7827 && INTVAL (index) > -1024
7828 && (INTVAL (index) & 3) == 0);
7830 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7831 return (code == CONST_INT
7832 && INTVAL (index) < 1024
7833 && INTVAL (index) > -1024
7834 && (INTVAL (index) & 3) == 0);
7836 if (arm_address_register_rtx_p (index, strict_p)
7837 && (GET_MODE_SIZE (mode) <= 4))
7838 return 1;
7840 if (mode == DImode || mode == DFmode)
7842 if (code == CONST_INT)
7844 HOST_WIDE_INT val = INTVAL (index);
7846 if (TARGET_LDRD)
7847 return val > -256 && val < 256;
7848 else
7849 return val > -4096 && val < 4092;
7852 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
7855 if (GET_MODE_SIZE (mode) <= 4
7856 && ! (arm_arch4
7857 && (mode == HImode
7858 || mode == HFmode
7859 || (mode == QImode && outer == SIGN_EXTEND))))
7861 if (code == MULT)
7863 rtx xiop0 = XEXP (index, 0);
7864 rtx xiop1 = XEXP (index, 1);
7866 return ((arm_address_register_rtx_p (xiop0, strict_p)
7867 && power_of_two_operand (xiop1, SImode))
7868 || (arm_address_register_rtx_p (xiop1, strict_p)
7869 && power_of_two_operand (xiop0, SImode)));
7871 else if (code == LSHIFTRT || code == ASHIFTRT
7872 || code == ASHIFT || code == ROTATERT)
7874 rtx op = XEXP (index, 1);
7876 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7877 && CONST_INT_P (op)
7878 && INTVAL (op) > 0
7879 && INTVAL (op) <= 31);
7883 /* For ARM v4 we may be doing a sign-extend operation during the
7884 load. */
7885 if (arm_arch4)
7887 if (mode == HImode
7888 || mode == HFmode
7889 || (outer == SIGN_EXTEND && mode == QImode))
7890 range = 256;
7891 else
7892 range = 4096;
7894 else
7895 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
7897 return (code == CONST_INT
7898 && INTVAL (index) < range
7899 && INTVAL (index) > -range);
7902 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7903 index operand. i.e. 1, 2, 4 or 8. */
7904 static bool
7905 thumb2_index_mul_operand (rtx op)
7907 HOST_WIDE_INT val;
7909 if (!CONST_INT_P (op))
7910 return false;
7912 val = INTVAL(op);
7913 return (val == 1 || val == 2 || val == 4 || val == 8);
7916 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
7917 static int
7918 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
7920 enum rtx_code code = GET_CODE (index);
7922 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
7923 /* Standard coprocessor addressing modes. */
7924 if (TARGET_HARD_FLOAT
7925 && (mode == SFmode || mode == DFmode))
7926 return (code == CONST_INT && INTVAL (index) < 1024
7927 /* Thumb-2 allows only > -256 index range for it's core register
7928 load/stores. Since we allow SF/DF in core registers, we have
7929 to use the intersection between -256~4096 (core) and -1024~1024
7930 (coprocessor). */
7931 && INTVAL (index) > -256
7932 && (INTVAL (index) & 3) == 0);
7934 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7936 /* For DImode assume values will usually live in core regs
7937 and only allow LDRD addressing modes. */
7938 if (!TARGET_LDRD || mode != DImode)
7939 return (code == CONST_INT
7940 && INTVAL (index) < 1024
7941 && INTVAL (index) > -1024
7942 && (INTVAL (index) & 3) == 0);
7945 /* For quad modes, we restrict the constant offset to be slightly less
7946 than what the instruction format permits. We do this because for
7947 quad mode moves, we will actually decompose them into two separate
7948 double-mode reads or writes. INDEX must therefore be a valid
7949 (double-mode) offset and so should INDEX+8. */
7950 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7951 return (code == CONST_INT
7952 && INTVAL (index) < 1016
7953 && INTVAL (index) > -1024
7954 && (INTVAL (index) & 3) == 0);
7956 /* We have no such constraint on double mode offsets, so we permit the
7957 full range of the instruction format. */
7958 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7959 return (code == CONST_INT
7960 && INTVAL (index) < 1024
7961 && INTVAL (index) > -1024
7962 && (INTVAL (index) & 3) == 0);
7964 if (arm_address_register_rtx_p (index, strict_p)
7965 && (GET_MODE_SIZE (mode) <= 4))
7966 return 1;
7968 if (mode == DImode || mode == DFmode)
7970 if (code == CONST_INT)
7972 HOST_WIDE_INT val = INTVAL (index);
7973 /* ??? Can we assume ldrd for thumb2? */
7974 /* Thumb-2 ldrd only has reg+const addressing modes. */
7975 /* ldrd supports offsets of +-1020.
7976 However the ldr fallback does not. */
7977 return val > -256 && val < 256 && (val & 3) == 0;
7979 else
7980 return 0;
7983 if (code == MULT)
7985 rtx xiop0 = XEXP (index, 0);
7986 rtx xiop1 = XEXP (index, 1);
7988 return ((arm_address_register_rtx_p (xiop0, strict_p)
7989 && thumb2_index_mul_operand (xiop1))
7990 || (arm_address_register_rtx_p (xiop1, strict_p)
7991 && thumb2_index_mul_operand (xiop0)));
7993 else if (code == ASHIFT)
7995 rtx op = XEXP (index, 1);
7997 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7998 && CONST_INT_P (op)
7999 && INTVAL (op) > 0
8000 && INTVAL (op) <= 3);
8003 return (code == CONST_INT
8004 && INTVAL (index) < 4096
8005 && INTVAL (index) > -256);
8008 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
8009 static int
8010 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
8012 int regno;
8014 if (!REG_P (x))
8015 return 0;
8017 regno = REGNO (x);
8019 if (strict_p)
8020 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
8022 return (regno <= LAST_LO_REGNUM
8023 || regno > LAST_VIRTUAL_REGISTER
8024 || regno == FRAME_POINTER_REGNUM
8025 || (GET_MODE_SIZE (mode) >= 4
8026 && (regno == STACK_POINTER_REGNUM
8027 || regno >= FIRST_PSEUDO_REGISTER
8028 || x == hard_frame_pointer_rtx
8029 || x == arg_pointer_rtx)));
8032 /* Return nonzero if x is a legitimate index register. This is the case
8033 for any base register that can access a QImode object. */
8034 inline static int
8035 thumb1_index_register_rtx_p (rtx x, int strict_p)
8037 return thumb1_base_register_rtx_p (x, QImode, strict_p);
8040 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8042 The AP may be eliminated to either the SP or the FP, so we use the
8043 least common denominator, e.g. SImode, and offsets from 0 to 64.
8045 ??? Verify whether the above is the right approach.
8047 ??? Also, the FP may be eliminated to the SP, so perhaps that
8048 needs special handling also.
8050 ??? Look at how the mips16 port solves this problem. It probably uses
8051 better ways to solve some of these problems.
8053 Although it is not incorrect, we don't accept QImode and HImode
8054 addresses based on the frame pointer or arg pointer until the
8055 reload pass starts. This is so that eliminating such addresses
8056 into stack based ones won't produce impossible code. */
8058 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8060 /* ??? Not clear if this is right. Experiment. */
8061 if (GET_MODE_SIZE (mode) < 4
8062 && !(reload_in_progress || reload_completed)
8063 && (reg_mentioned_p (frame_pointer_rtx, x)
8064 || reg_mentioned_p (arg_pointer_rtx, x)
8065 || reg_mentioned_p (virtual_incoming_args_rtx, x)
8066 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
8067 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
8068 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
8069 return 0;
8071 /* Accept any base register. SP only in SImode or larger. */
8072 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
8073 return 1;
8075 /* This is PC relative data before arm_reorg runs. */
8076 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
8077 && GET_CODE (x) == SYMBOL_REF
8078 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
8079 return 1;
8081 /* This is PC relative data after arm_reorg runs. */
8082 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
8083 && reload_completed
8084 && (GET_CODE (x) == LABEL_REF
8085 || (GET_CODE (x) == CONST
8086 && GET_CODE (XEXP (x, 0)) == PLUS
8087 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8088 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8089 return 1;
8091 /* Post-inc indexing only supported for SImode and larger. */
8092 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
8093 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
8094 return 1;
8096 else if (GET_CODE (x) == PLUS)
8098 /* REG+REG address can be any two index registers. */
8099 /* We disallow FRAME+REG addressing since we know that FRAME
8100 will be replaced with STACK, and SP relative addressing only
8101 permits SP+OFFSET. */
8102 if (GET_MODE_SIZE (mode) <= 4
8103 && XEXP (x, 0) != frame_pointer_rtx
8104 && XEXP (x, 1) != frame_pointer_rtx
8105 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8106 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
8107 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
8108 return 1;
8110 /* REG+const has 5-7 bit offset for non-SP registers. */
8111 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8112 || XEXP (x, 0) == arg_pointer_rtx)
8113 && CONST_INT_P (XEXP (x, 1))
8114 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
8115 return 1;
8117 /* REG+const has 10-bit offset for SP, but only SImode and
8118 larger is supported. */
8119 /* ??? Should probably check for DI/DFmode overflow here
8120 just like GO_IF_LEGITIMATE_OFFSET does. */
8121 else if (REG_P (XEXP (x, 0))
8122 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
8123 && GET_MODE_SIZE (mode) >= 4
8124 && CONST_INT_P (XEXP (x, 1))
8125 && INTVAL (XEXP (x, 1)) >= 0
8126 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
8127 && (INTVAL (XEXP (x, 1)) & 3) == 0)
8128 return 1;
8130 else if (REG_P (XEXP (x, 0))
8131 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
8132 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
8133 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
8134 && REGNO (XEXP (x, 0))
8135 <= LAST_VIRTUAL_POINTER_REGISTER))
8136 && GET_MODE_SIZE (mode) >= 4
8137 && CONST_INT_P (XEXP (x, 1))
8138 && (INTVAL (XEXP (x, 1)) & 3) == 0)
8139 return 1;
8142 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8143 && GET_MODE_SIZE (mode) == 4
8144 && GET_CODE (x) == SYMBOL_REF
8145 && CONSTANT_POOL_ADDRESS_P (x)
8146 && ! (flag_pic
8147 && symbol_mentioned_p (get_pool_constant (x))
8148 && ! pcrel_constant_p (get_pool_constant (x))))
8149 return 1;
8151 return 0;
8154 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
8155 instruction of mode MODE. */
8157 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
8159 switch (GET_MODE_SIZE (mode))
8161 case 1:
8162 return val >= 0 && val < 32;
8164 case 2:
8165 return val >= 0 && val < 64 && (val & 1) == 0;
8167 default:
8168 return (val >= 0
8169 && (val + GET_MODE_SIZE (mode)) <= 128
8170 && (val & 3) == 0);
8174 bool
8175 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
8177 if (TARGET_ARM)
8178 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
8179 else if (TARGET_THUMB2)
8180 return thumb2_legitimate_address_p (mode, x, strict_p);
8181 else /* if (TARGET_THUMB1) */
8182 return thumb1_legitimate_address_p (mode, x, strict_p);
8185 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
8187 Given an rtx X being reloaded into a reg required to be
8188 in class CLASS, return the class of reg to actually use.
8189 In general this is just CLASS, but for the Thumb core registers and
8190 immediate constants we prefer a LO_REGS class or a subset. */
8192 static reg_class_t
8193 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
8195 if (TARGET_32BIT)
8196 return rclass;
8197 else
8199 if (rclass == GENERAL_REGS)
8200 return LO_REGS;
8201 else
8202 return rclass;
8206 /* Build the SYMBOL_REF for __tls_get_addr. */
8208 static GTY(()) rtx tls_get_addr_libfunc;
8210 static rtx
8211 get_tls_get_addr (void)
8213 if (!tls_get_addr_libfunc)
8214 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
8215 return tls_get_addr_libfunc;
8219 arm_load_tp (rtx target)
8221 if (!target)
8222 target = gen_reg_rtx (SImode);
8224 if (TARGET_HARD_TP)
8226 /* Can return in any reg. */
8227 emit_insn (gen_load_tp_hard (target));
8229 else
8231 /* Always returned in r0. Immediately copy the result into a pseudo,
8232 otherwise other uses of r0 (e.g. setting up function arguments) may
8233 clobber the value. */
8235 rtx tmp;
8237 emit_insn (gen_load_tp_soft ());
8239 tmp = gen_rtx_REG (SImode, R0_REGNUM);
8240 emit_move_insn (target, tmp);
8242 return target;
8245 static rtx
8246 load_tls_operand (rtx x, rtx reg)
8248 rtx tmp;
8250 if (reg == NULL_RTX)
8251 reg = gen_reg_rtx (SImode);
8253 tmp = gen_rtx_CONST (SImode, x);
8255 emit_move_insn (reg, tmp);
8257 return reg;
8260 static rtx_insn *
8261 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
8263 rtx label, labelno, sum;
8265 gcc_assert (reloc != TLS_DESCSEQ);
8266 start_sequence ();
8268 labelno = GEN_INT (pic_labelno++);
8269 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8270 label = gen_rtx_CONST (VOIDmode, label);
8272 sum = gen_rtx_UNSPEC (Pmode,
8273 gen_rtvec (4, x, GEN_INT (reloc), label,
8274 GEN_INT (TARGET_ARM ? 8 : 4)),
8275 UNSPEC_TLS);
8276 reg = load_tls_operand (sum, reg);
8278 if (TARGET_ARM)
8279 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
8280 else
8281 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8283 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
8284 LCT_PURE, /* LCT_CONST? */
8285 Pmode, 1, reg, Pmode);
8287 rtx_insn *insns = get_insns ();
8288 end_sequence ();
8290 return insns;
8293 static rtx
8294 arm_tls_descseq_addr (rtx x, rtx reg)
8296 rtx labelno = GEN_INT (pic_labelno++);
8297 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8298 rtx sum = gen_rtx_UNSPEC (Pmode,
8299 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
8300 gen_rtx_CONST (VOIDmode, label),
8301 GEN_INT (!TARGET_ARM)),
8302 UNSPEC_TLS);
8303 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
8305 emit_insn (gen_tlscall (x, labelno));
8306 if (!reg)
8307 reg = gen_reg_rtx (SImode);
8308 else
8309 gcc_assert (REGNO (reg) != R0_REGNUM);
8311 emit_move_insn (reg, reg0);
8313 return reg;
8317 legitimize_tls_address (rtx x, rtx reg)
8319 rtx dest, tp, label, labelno, sum, ret, eqv, addend;
8320 rtx_insn *insns;
8321 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
8323 switch (model)
8325 case TLS_MODEL_GLOBAL_DYNAMIC:
8326 if (TARGET_GNU2_TLS)
8328 reg = arm_tls_descseq_addr (x, reg);
8330 tp = arm_load_tp (NULL_RTX);
8332 dest = gen_rtx_PLUS (Pmode, tp, reg);
8334 else
8336 /* Original scheme */
8337 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
8338 dest = gen_reg_rtx (Pmode);
8339 emit_libcall_block (insns, dest, ret, x);
8341 return dest;
8343 case TLS_MODEL_LOCAL_DYNAMIC:
8344 if (TARGET_GNU2_TLS)
8346 reg = arm_tls_descseq_addr (x, reg);
8348 tp = arm_load_tp (NULL_RTX);
8350 dest = gen_rtx_PLUS (Pmode, tp, reg);
8352 else
8354 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
8356 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
8357 share the LDM result with other LD model accesses. */
8358 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
8359 UNSPEC_TLS);
8360 dest = gen_reg_rtx (Pmode);
8361 emit_libcall_block (insns, dest, ret, eqv);
8363 /* Load the addend. */
8364 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
8365 GEN_INT (TLS_LDO32)),
8366 UNSPEC_TLS);
8367 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
8368 dest = gen_rtx_PLUS (Pmode, dest, addend);
8370 return dest;
8372 case TLS_MODEL_INITIAL_EXEC:
8373 labelno = GEN_INT (pic_labelno++);
8374 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8375 label = gen_rtx_CONST (VOIDmode, label);
8376 sum = gen_rtx_UNSPEC (Pmode,
8377 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
8378 GEN_INT (TARGET_ARM ? 8 : 4)),
8379 UNSPEC_TLS);
8380 reg = load_tls_operand (sum, reg);
8382 if (TARGET_ARM)
8383 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
8384 else if (TARGET_THUMB2)
8385 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
8386 else
8388 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8389 emit_move_insn (reg, gen_const_mem (SImode, reg));
8392 tp = arm_load_tp (NULL_RTX);
8394 return gen_rtx_PLUS (Pmode, tp, reg);
8396 case TLS_MODEL_LOCAL_EXEC:
8397 tp = arm_load_tp (NULL_RTX);
8399 reg = gen_rtx_UNSPEC (Pmode,
8400 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
8401 UNSPEC_TLS);
8402 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
8404 return gen_rtx_PLUS (Pmode, tp, reg);
8406 default:
8407 abort ();
8411 /* Try machine-dependent ways of modifying an illegitimate address
8412 to be legitimate. If we find one, return the new, valid address. */
8414 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8416 if (arm_tls_referenced_p (x))
8418 rtx addend = NULL;
8420 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
8422 addend = XEXP (XEXP (x, 0), 1);
8423 x = XEXP (XEXP (x, 0), 0);
8426 if (GET_CODE (x) != SYMBOL_REF)
8427 return x;
8429 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
8431 x = legitimize_tls_address (x, NULL_RTX);
8433 if (addend)
8435 x = gen_rtx_PLUS (SImode, x, addend);
8436 orig_x = x;
8438 else
8439 return x;
8442 if (!TARGET_ARM)
8444 /* TODO: legitimize_address for Thumb2. */
8445 if (TARGET_THUMB2)
8446 return x;
8447 return thumb_legitimize_address (x, orig_x, mode);
8450 if (GET_CODE (x) == PLUS)
8452 rtx xop0 = XEXP (x, 0);
8453 rtx xop1 = XEXP (x, 1);
8455 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
8456 xop0 = force_reg (SImode, xop0);
8458 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
8459 && !symbol_mentioned_p (xop1))
8460 xop1 = force_reg (SImode, xop1);
8462 if (ARM_BASE_REGISTER_RTX_P (xop0)
8463 && CONST_INT_P (xop1))
8465 HOST_WIDE_INT n, low_n;
8466 rtx base_reg, val;
8467 n = INTVAL (xop1);
8469 /* VFP addressing modes actually allow greater offsets, but for
8470 now we just stick with the lowest common denominator. */
8471 if (mode == DImode || mode == DFmode)
8473 low_n = n & 0x0f;
8474 n &= ~0x0f;
8475 if (low_n > 4)
8477 n += 16;
8478 low_n -= 16;
8481 else
8483 low_n = ((mode) == TImode ? 0
8484 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
8485 n -= low_n;
8488 base_reg = gen_reg_rtx (SImode);
8489 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
8490 emit_move_insn (base_reg, val);
8491 x = plus_constant (Pmode, base_reg, low_n);
8493 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8494 x = gen_rtx_PLUS (SImode, xop0, xop1);
8497 /* XXX We don't allow MINUS any more -- see comment in
8498 arm_legitimate_address_outer_p (). */
8499 else if (GET_CODE (x) == MINUS)
8501 rtx xop0 = XEXP (x, 0);
8502 rtx xop1 = XEXP (x, 1);
8504 if (CONSTANT_P (xop0))
8505 xop0 = force_reg (SImode, xop0);
8507 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
8508 xop1 = force_reg (SImode, xop1);
8510 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8511 x = gen_rtx_MINUS (SImode, xop0, xop1);
8514 /* Make sure to take full advantage of the pre-indexed addressing mode
8515 with absolute addresses which often allows for the base register to
8516 be factorized for multiple adjacent memory references, and it might
8517 even allows for the mini pool to be avoided entirely. */
8518 else if (CONST_INT_P (x) && optimize > 0)
8520 unsigned int bits;
8521 HOST_WIDE_INT mask, base, index;
8522 rtx base_reg;
8524 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
8525 use a 8-bit index. So let's use a 12-bit index for SImode only and
8526 hope that arm_gen_constant will enable ldrb to use more bits. */
8527 bits = (mode == SImode) ? 12 : 8;
8528 mask = (1 << bits) - 1;
8529 base = INTVAL (x) & ~mask;
8530 index = INTVAL (x) & mask;
8531 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
8533 /* It'll most probably be more efficient to generate the base
8534 with more bits set and use a negative index instead. */
8535 base |= mask;
8536 index -= mask;
8538 base_reg = force_reg (SImode, GEN_INT (base));
8539 x = plus_constant (Pmode, base_reg, index);
8542 if (flag_pic)
8544 /* We need to find and carefully transform any SYMBOL and LABEL
8545 references; so go back to the original address expression. */
8546 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8548 if (new_x != orig_x)
8549 x = new_x;
8552 return x;
8556 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8557 to be legitimate. If we find one, return the new, valid address. */
8559 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8561 if (GET_CODE (x) == PLUS
8562 && CONST_INT_P (XEXP (x, 1))
8563 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
8564 || INTVAL (XEXP (x, 1)) < 0))
8566 rtx xop0 = XEXP (x, 0);
8567 rtx xop1 = XEXP (x, 1);
8568 HOST_WIDE_INT offset = INTVAL (xop1);
8570 /* Try and fold the offset into a biasing of the base register and
8571 then offsetting that. Don't do this when optimizing for space
8572 since it can cause too many CSEs. */
8573 if (optimize_size && offset >= 0
8574 && offset < 256 + 31 * GET_MODE_SIZE (mode))
8576 HOST_WIDE_INT delta;
8578 if (offset >= 256)
8579 delta = offset - (256 - GET_MODE_SIZE (mode));
8580 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
8581 delta = 31 * GET_MODE_SIZE (mode);
8582 else
8583 delta = offset & (~31 * GET_MODE_SIZE (mode));
8585 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
8586 NULL_RTX);
8587 x = plus_constant (Pmode, xop0, delta);
8589 else if (offset < 0 && offset > -256)
8590 /* Small negative offsets are best done with a subtract before the
8591 dereference, forcing these into a register normally takes two
8592 instructions. */
8593 x = force_operand (x, NULL_RTX);
8594 else
8596 /* For the remaining cases, force the constant into a register. */
8597 xop1 = force_reg (SImode, xop1);
8598 x = gen_rtx_PLUS (SImode, xop0, xop1);
8601 else if (GET_CODE (x) == PLUS
8602 && s_register_operand (XEXP (x, 1), SImode)
8603 && !s_register_operand (XEXP (x, 0), SImode))
8605 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
8607 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
8610 if (flag_pic)
8612 /* We need to find and carefully transform any SYMBOL and LABEL
8613 references; so go back to the original address expression. */
8614 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8616 if (new_x != orig_x)
8617 x = new_x;
8620 return x;
8623 /* Return TRUE if X contains any TLS symbol references. */
8625 bool
8626 arm_tls_referenced_p (rtx x)
8628 if (! TARGET_HAVE_TLS)
8629 return false;
8631 subrtx_iterator::array_type array;
8632 FOR_EACH_SUBRTX (iter, array, x, ALL)
8634 const_rtx x = *iter;
8635 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
8636 return true;
8638 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8639 TLS offsets, not real symbol references. */
8640 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8641 iter.skip_subrtxes ();
8643 return false;
8646 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8648 On the ARM, allow any integer (invalid ones are removed later by insn
8649 patterns), nice doubles and symbol_refs which refer to the function's
8650 constant pool XXX.
8652 When generating pic allow anything. */
8654 static bool
8655 arm_legitimate_constant_p_1 (machine_mode, rtx x)
8657 return flag_pic || !label_mentioned_p (x);
8660 static bool
8661 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8663 /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
8664 RTX. These RTX must therefore be allowed for Thumb-1 so that when run
8665 for ARMv8-M Baseline or later the result is valid. */
8666 if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
8667 x = XEXP (x, 0);
8669 return (CONST_INT_P (x)
8670 || CONST_DOUBLE_P (x)
8671 || CONSTANT_ADDRESS_P (x)
8672 || flag_pic);
8675 static bool
8676 arm_legitimate_constant_p (machine_mode mode, rtx x)
8678 return (!arm_cannot_force_const_mem (mode, x)
8679 && (TARGET_32BIT
8680 ? arm_legitimate_constant_p_1 (mode, x)
8681 : thumb_legitimate_constant_p (mode, x)));
8684 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8686 static bool
8687 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8689 rtx base, offset;
8691 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8693 split_const (x, &base, &offset);
8694 if (GET_CODE (base) == SYMBOL_REF
8695 && !offset_within_block_p (base, INTVAL (offset)))
8696 return true;
8698 return arm_tls_referenced_p (x);
8701 #define REG_OR_SUBREG_REG(X) \
8702 (REG_P (X) \
8703 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8705 #define REG_OR_SUBREG_RTX(X) \
8706 (REG_P (X) ? (X) : SUBREG_REG (X))
8708 static inline int
8709 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8711 machine_mode mode = GET_MODE (x);
8712 int total, words;
8714 switch (code)
8716 case ASHIFT:
8717 case ASHIFTRT:
8718 case LSHIFTRT:
8719 case ROTATERT:
8720 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8722 case PLUS:
8723 case MINUS:
8724 case COMPARE:
8725 case NEG:
8726 case NOT:
8727 return COSTS_N_INSNS (1);
8729 case MULT:
8730 if (CONST_INT_P (XEXP (x, 1)))
8732 int cycles = 0;
8733 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8735 while (i)
8737 i >>= 2;
8738 cycles++;
8740 return COSTS_N_INSNS (2) + cycles;
8742 return COSTS_N_INSNS (1) + 16;
8744 case SET:
8745 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8746 the mode. */
8747 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8748 return (COSTS_N_INSNS (words)
8749 + 4 * ((MEM_P (SET_SRC (x)))
8750 + MEM_P (SET_DEST (x))));
8752 case CONST_INT:
8753 if (outer == SET)
8755 if (UINTVAL (x) < 256
8756 /* 16-bit constant. */
8757 || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
8758 return 0;
8759 if (thumb_shiftable_const (INTVAL (x)))
8760 return COSTS_N_INSNS (2);
8761 return COSTS_N_INSNS (3);
8763 else if ((outer == PLUS || outer == COMPARE)
8764 && INTVAL (x) < 256 && INTVAL (x) > -256)
8765 return 0;
8766 else if ((outer == IOR || outer == XOR || outer == AND)
8767 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8768 return COSTS_N_INSNS (1);
8769 else if (outer == AND)
8771 int i;
8772 /* This duplicates the tests in the andsi3 expander. */
8773 for (i = 9; i <= 31; i++)
8774 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
8775 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
8776 return COSTS_N_INSNS (2);
8778 else if (outer == ASHIFT || outer == ASHIFTRT
8779 || outer == LSHIFTRT)
8780 return 0;
8781 return COSTS_N_INSNS (2);
8783 case CONST:
8784 case CONST_DOUBLE:
8785 case LABEL_REF:
8786 case SYMBOL_REF:
8787 return COSTS_N_INSNS (3);
8789 case UDIV:
8790 case UMOD:
8791 case DIV:
8792 case MOD:
8793 return 100;
8795 case TRUNCATE:
8796 return 99;
8798 case AND:
8799 case XOR:
8800 case IOR:
8801 /* XXX guess. */
8802 return 8;
8804 case MEM:
8805 /* XXX another guess. */
8806 /* Memory costs quite a lot for the first word, but subsequent words
8807 load at the equivalent of a single insn each. */
8808 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8809 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8810 ? 4 : 0));
8812 case IF_THEN_ELSE:
8813 /* XXX a guess. */
8814 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8815 return 14;
8816 return 2;
8818 case SIGN_EXTEND:
8819 case ZERO_EXTEND:
8820 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
8821 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
8823 if (mode == SImode)
8824 return total;
8826 if (arm_arch6)
8827 return total + COSTS_N_INSNS (1);
8829 /* Assume a two-shift sequence. Increase the cost slightly so
8830 we prefer actual shifts over an extend operation. */
8831 return total + 1 + COSTS_N_INSNS (2);
8833 default:
8834 return 99;
8838 /* Estimates the size cost of thumb1 instructions.
8839 For now most of the code is copied from thumb1_rtx_costs. We need more
8840 fine grain tuning when we have more related test cases. */
8841 static inline int
8842 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8844 machine_mode mode = GET_MODE (x);
8845 int words, cost;
8847 switch (code)
8849 case ASHIFT:
8850 case ASHIFTRT:
8851 case LSHIFTRT:
8852 case ROTATERT:
8853 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8855 case PLUS:
8856 case MINUS:
8857 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8858 defined by RTL expansion, especially for the expansion of
8859 multiplication. */
8860 if ((GET_CODE (XEXP (x, 0)) == MULT
8861 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
8862 || (GET_CODE (XEXP (x, 1)) == MULT
8863 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
8864 return COSTS_N_INSNS (2);
8865 /* Fall through. */
8866 case COMPARE:
8867 case NEG:
8868 case NOT:
8869 return COSTS_N_INSNS (1);
8871 case MULT:
8872 if (CONST_INT_P (XEXP (x, 1)))
8874 /* Thumb1 mul instruction can't operate on const. We must Load it
8875 into a register first. */
8876 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
8877 /* For the targets which have a very small and high-latency multiply
8878 unit, we prefer to synthesize the mult with up to 5 instructions,
8879 giving a good balance between size and performance. */
8880 if (arm_arch6m && arm_m_profile_small_mul)
8881 return COSTS_N_INSNS (5);
8882 else
8883 return COSTS_N_INSNS (1) + const_size;
8885 return COSTS_N_INSNS (1);
8887 case SET:
8888 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8889 the mode. */
8890 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8891 cost = COSTS_N_INSNS (words);
8892 if (satisfies_constraint_J (SET_SRC (x))
8893 || satisfies_constraint_K (SET_SRC (x))
8894 /* Too big an immediate for a 2-byte mov, using MOVT. */
8895 || (CONST_INT_P (SET_SRC (x))
8896 && UINTVAL (SET_SRC (x)) >= 256
8897 && TARGET_HAVE_MOVT
8898 && satisfies_constraint_j (SET_SRC (x)))
8899 /* thumb1_movdi_insn. */
8900 || ((words > 1) && MEM_P (SET_SRC (x))))
8901 cost += COSTS_N_INSNS (1);
8902 return cost;
8904 case CONST_INT:
8905 if (outer == SET)
8907 if (UINTVAL (x) < 256)
8908 return COSTS_N_INSNS (1);
8909 /* movw is 4byte long. */
8910 if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
8911 return COSTS_N_INSNS (2);
8912 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
8913 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
8914 return COSTS_N_INSNS (2);
8915 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
8916 if (thumb_shiftable_const (INTVAL (x)))
8917 return COSTS_N_INSNS (2);
8918 return COSTS_N_INSNS (3);
8920 else if ((outer == PLUS || outer == COMPARE)
8921 && INTVAL (x) < 256 && INTVAL (x) > -256)
8922 return 0;
8923 else if ((outer == IOR || outer == XOR || outer == AND)
8924 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8925 return COSTS_N_INSNS (1);
8926 else if (outer == AND)
8928 int i;
8929 /* This duplicates the tests in the andsi3 expander. */
8930 for (i = 9; i <= 31; i++)
8931 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
8932 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
8933 return COSTS_N_INSNS (2);
8935 else if (outer == ASHIFT || outer == ASHIFTRT
8936 || outer == LSHIFTRT)
8937 return 0;
8938 return COSTS_N_INSNS (2);
8940 case CONST:
8941 case CONST_DOUBLE:
8942 case LABEL_REF:
8943 case SYMBOL_REF:
8944 return COSTS_N_INSNS (3);
8946 case UDIV:
8947 case UMOD:
8948 case DIV:
8949 case MOD:
8950 return 100;
8952 case TRUNCATE:
8953 return 99;
8955 case AND:
8956 case XOR:
8957 case IOR:
8958 return COSTS_N_INSNS (1);
8960 case MEM:
8961 return (COSTS_N_INSNS (1)
8962 + COSTS_N_INSNS (1)
8963 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8964 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8965 ? COSTS_N_INSNS (1) : 0));
8967 case IF_THEN_ELSE:
8968 /* XXX a guess. */
8969 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8970 return 14;
8971 return 2;
8973 case ZERO_EXTEND:
8974 /* XXX still guessing. */
8975 switch (GET_MODE (XEXP (x, 0)))
8977 case QImode:
8978 return (1 + (mode == DImode ? 4 : 0)
8979 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8981 case HImode:
8982 return (4 + (mode == DImode ? 4 : 0)
8983 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8985 case SImode:
8986 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8988 default:
8989 return 99;
8992 default:
8993 return 99;
8997 /* Helper function for arm_rtx_costs. If the operand is a valid shift
8998 operand, then return the operand that is being shifted. If the shift
8999 is not by a constant, then set SHIFT_REG to point to the operand.
9000 Return NULL if OP is not a shifter operand. */
9001 static rtx
9002 shifter_op_p (rtx op, rtx *shift_reg)
9004 enum rtx_code code = GET_CODE (op);
9006 if (code == MULT && CONST_INT_P (XEXP (op, 1))
9007 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9008 return XEXP (op, 0);
9009 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9010 return XEXP (op, 0);
9011 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9012 || code == ASHIFTRT)
9014 if (!CONST_INT_P (XEXP (op, 1)))
9015 *shift_reg = XEXP (op, 1);
9016 return XEXP (op, 0);
9019 return NULL;
9022 static bool
9023 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9025 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9026 rtx_code code = GET_CODE (x);
9027 gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
9029 switch (XINT (x, 1))
9031 case UNSPEC_UNALIGNED_LOAD:
9032 /* We can only do unaligned loads into the integer unit, and we can't
9033 use LDM or LDRD. */
9034 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9035 if (speed_p)
9036 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9037 + extra_cost->ldst.load_unaligned);
9039 #ifdef NOT_YET
9040 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9041 ADDR_SPACE_GENERIC, speed_p);
9042 #endif
9043 return true;
9045 case UNSPEC_UNALIGNED_STORE:
9046 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9047 if (speed_p)
9048 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9049 + extra_cost->ldst.store_unaligned);
9051 *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
9052 #ifdef NOT_YET
9053 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9054 ADDR_SPACE_GENERIC, speed_p);
9055 #endif
9056 return true;
9058 case UNSPEC_VRINTZ:
9059 case UNSPEC_VRINTP:
9060 case UNSPEC_VRINTM:
9061 case UNSPEC_VRINTR:
9062 case UNSPEC_VRINTX:
9063 case UNSPEC_VRINTA:
9064 if (speed_p)
9065 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9067 return true;
9068 default:
9069 *cost = COSTS_N_INSNS (2);
9070 break;
9072 return true;
9075 /* Cost of a libcall. We assume one insn per argument, an amount for the
9076 call (one insn for -Os) and then one for processing the result. */
9077 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9079 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9080 do \
9082 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9083 if (shift_op != NULL \
9084 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9086 if (shift_reg) \
9088 if (speed_p) \
9089 *cost += extra_cost->alu.arith_shift_reg; \
9090 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9091 ASHIFT, 1, speed_p); \
9093 else if (speed_p) \
9094 *cost += extra_cost->alu.arith_shift; \
9096 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
9097 ASHIFT, 0, speed_p) \
9098 + rtx_cost (XEXP (x, 1 - IDX), \
9099 GET_MODE (shift_op), \
9100 OP, 1, speed_p)); \
9101 return true; \
9104 while (0);
9106 /* RTX costs. Make an estimate of the cost of executing the operation
9107 X, which is contained with an operation with code OUTER_CODE.
9108 SPEED_P indicates whether the cost desired is the performance cost,
9109 or the size cost. The estimate is stored in COST and the return
9110 value is TRUE if the cost calculation is final, or FALSE if the
9111 caller should recurse through the operands of X to add additional
9112 costs.
9114 We currently make no attempt to model the size savings of Thumb-2
9115 16-bit instructions. At the normal points in compilation where
9116 this code is called we have no measure of whether the condition
9117 flags are live or not, and thus no realistic way to determine what
9118 the size will eventually be. */
9119 static bool
9120 arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
9121 const struct cpu_cost_table *extra_cost,
9122 int *cost, bool speed_p)
9124 machine_mode mode = GET_MODE (x);
9126 *cost = COSTS_N_INSNS (1);
9128 if (TARGET_THUMB1)
9130 if (speed_p)
9131 *cost = thumb1_rtx_costs (x, code, outer_code);
9132 else
9133 *cost = thumb1_size_rtx_costs (x, code, outer_code);
9134 return true;
9137 switch (code)
9139 case SET:
9140 *cost = 0;
9141 /* SET RTXs don't have a mode so we get it from the destination. */
9142 mode = GET_MODE (SET_DEST (x));
9144 if (REG_P (SET_SRC (x))
9145 && REG_P (SET_DEST (x)))
9147 /* Assume that most copies can be done with a single insn,
9148 unless we don't have HW FP, in which case everything
9149 larger than word mode will require two insns. */
9150 *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9151 && GET_MODE_SIZE (mode) > 4)
9152 || mode == DImode)
9153 ? 2 : 1);
9154 /* Conditional register moves can be encoded
9155 in 16 bits in Thumb mode. */
9156 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9157 *cost >>= 1;
9159 return true;
9162 if (CONST_INT_P (SET_SRC (x)))
9164 /* Handle CONST_INT here, since the value doesn't have a mode
9165 and we would otherwise be unable to work out the true cost. */
9166 *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
9167 0, speed_p);
9168 outer_code = SET;
9169 /* Slightly lower the cost of setting a core reg to a constant.
9170 This helps break up chains and allows for better scheduling. */
9171 if (REG_P (SET_DEST (x))
9172 && REGNO (SET_DEST (x)) <= LR_REGNUM)
9173 *cost -= 1;
9174 x = SET_SRC (x);
9175 /* Immediate moves with an immediate in the range [0, 255] can be
9176 encoded in 16 bits in Thumb mode. */
9177 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9178 && INTVAL (x) >= 0 && INTVAL (x) <=255)
9179 *cost >>= 1;
9180 goto const_int_cost;
9183 return false;
9185 case MEM:
9186 /* A memory access costs 1 insn if the mode is small, or the address is
9187 a single register, otherwise it costs one insn per word. */
9188 if (REG_P (XEXP (x, 0)))
9189 *cost = COSTS_N_INSNS (1);
9190 else if (flag_pic
9191 && GET_CODE (XEXP (x, 0)) == PLUS
9192 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9193 /* This will be split into two instructions.
9194 See arm.md:calculate_pic_address. */
9195 *cost = COSTS_N_INSNS (2);
9196 else
9197 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9199 /* For speed optimizations, add the costs of the address and
9200 accessing memory. */
9201 if (speed_p)
9202 #ifdef NOT_YET
9203 *cost += (extra_cost->ldst.load
9204 + arm_address_cost (XEXP (x, 0), mode,
9205 ADDR_SPACE_GENERIC, speed_p));
9206 #else
9207 *cost += extra_cost->ldst.load;
9208 #endif
9209 return true;
9211 case PARALLEL:
9213 /* Calculations of LDM costs are complex. We assume an initial cost
9214 (ldm_1st) which will load the number of registers mentioned in
9215 ldm_regs_per_insn_1st registers; then each additional
9216 ldm_regs_per_insn_subsequent registers cost one more insn. The
9217 formula for N regs is thus:
9219 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9220 + ldm_regs_per_insn_subsequent - 1)
9221 / ldm_regs_per_insn_subsequent).
9223 Additional costs may also be added for addressing. A similar
9224 formula is used for STM. */
9226 bool is_ldm = load_multiple_operation (x, SImode);
9227 bool is_stm = store_multiple_operation (x, SImode);
9229 if (is_ldm || is_stm)
9231 if (speed_p)
9233 HOST_WIDE_INT nregs = XVECLEN (x, 0);
9234 HOST_WIDE_INT regs_per_insn_1st = is_ldm
9235 ? extra_cost->ldst.ldm_regs_per_insn_1st
9236 : extra_cost->ldst.stm_regs_per_insn_1st;
9237 HOST_WIDE_INT regs_per_insn_sub = is_ldm
9238 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9239 : extra_cost->ldst.stm_regs_per_insn_subsequent;
9241 *cost += regs_per_insn_1st
9242 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9243 + regs_per_insn_sub - 1)
9244 / regs_per_insn_sub);
9245 return true;
9249 return false;
9251 case DIV:
9252 case UDIV:
9253 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9254 && (mode == SFmode || !TARGET_VFP_SINGLE))
9255 *cost += COSTS_N_INSNS (speed_p
9256 ? extra_cost->fp[mode != SFmode].div : 0);
9257 else if (mode == SImode && TARGET_IDIV)
9258 *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
9259 else
9260 *cost = LIBCALL_COST (2);
9261 return false; /* All arguments must be in registers. */
9263 case MOD:
9264 /* MOD by a power of 2 can be expanded as:
9265 rsbs r1, r0, #0
9266 and r0, r0, #(n - 1)
9267 and r1, r1, #(n - 1)
9268 rsbpl r0, r1, #0. */
9269 if (CONST_INT_P (XEXP (x, 1))
9270 && exact_log2 (INTVAL (XEXP (x, 1))) > 0
9271 && mode == SImode)
9273 *cost += COSTS_N_INSNS (3);
9275 if (speed_p)
9276 *cost += 2 * extra_cost->alu.logical
9277 + extra_cost->alu.arith;
9278 return true;
9281 /* Fall-through. */
9282 case UMOD:
9283 *cost = LIBCALL_COST (2);
9284 return false; /* All arguments must be in registers. */
9286 case ROTATE:
9287 if (mode == SImode && REG_P (XEXP (x, 1)))
9289 *cost += (COSTS_N_INSNS (1)
9290 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9291 if (speed_p)
9292 *cost += extra_cost->alu.shift_reg;
9293 return true;
9295 /* Fall through */
9296 case ROTATERT:
9297 case ASHIFT:
9298 case LSHIFTRT:
9299 case ASHIFTRT:
9300 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9302 *cost += (COSTS_N_INSNS (2)
9303 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9304 if (speed_p)
9305 *cost += 2 * extra_cost->alu.shift;
9306 return true;
9308 else if (mode == SImode)
9310 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9311 /* Slightly disparage register shifts at -Os, but not by much. */
9312 if (!CONST_INT_P (XEXP (x, 1)))
9313 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9314 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9315 return true;
9317 else if (GET_MODE_CLASS (mode) == MODE_INT
9318 && GET_MODE_SIZE (mode) < 4)
9320 if (code == ASHIFT)
9322 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9323 /* Slightly disparage register shifts at -Os, but not by
9324 much. */
9325 if (!CONST_INT_P (XEXP (x, 1)))
9326 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9327 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9329 else if (code == LSHIFTRT || code == ASHIFTRT)
9331 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9333 /* Can use SBFX/UBFX. */
9334 if (speed_p)
9335 *cost += extra_cost->alu.bfx;
9336 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9338 else
9340 *cost += COSTS_N_INSNS (1);
9341 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9342 if (speed_p)
9344 if (CONST_INT_P (XEXP (x, 1)))
9345 *cost += 2 * extra_cost->alu.shift;
9346 else
9347 *cost += (extra_cost->alu.shift
9348 + extra_cost->alu.shift_reg);
9350 else
9351 /* Slightly disparage register shifts. */
9352 *cost += !CONST_INT_P (XEXP (x, 1));
9355 else /* Rotates. */
9357 *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
9358 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9359 if (speed_p)
9361 if (CONST_INT_P (XEXP (x, 1)))
9362 *cost += (2 * extra_cost->alu.shift
9363 + extra_cost->alu.log_shift);
9364 else
9365 *cost += (extra_cost->alu.shift
9366 + extra_cost->alu.shift_reg
9367 + extra_cost->alu.log_shift_reg);
9370 return true;
9373 *cost = LIBCALL_COST (2);
9374 return false;
9376 case BSWAP:
9377 if (arm_arch6)
9379 if (mode == SImode)
9381 if (speed_p)
9382 *cost += extra_cost->alu.rev;
9384 return false;
9387 else
9389 /* No rev instruction available. Look at arm_legacy_rev
9390 and thumb_legacy_rev for the form of RTL used then. */
9391 if (TARGET_THUMB)
9393 *cost += COSTS_N_INSNS (9);
9395 if (speed_p)
9397 *cost += 6 * extra_cost->alu.shift;
9398 *cost += 3 * extra_cost->alu.logical;
9401 else
9403 *cost += COSTS_N_INSNS (4);
9405 if (speed_p)
9407 *cost += 2 * extra_cost->alu.shift;
9408 *cost += extra_cost->alu.arith_shift;
9409 *cost += 2 * extra_cost->alu.logical;
9412 return true;
9414 return false;
9416 case MINUS:
9417 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9418 && (mode == SFmode || !TARGET_VFP_SINGLE))
9420 if (GET_CODE (XEXP (x, 0)) == MULT
9421 || GET_CODE (XEXP (x, 1)) == MULT)
9423 rtx mul_op0, mul_op1, sub_op;
9425 if (speed_p)
9426 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9428 if (GET_CODE (XEXP (x, 0)) == MULT)
9430 mul_op0 = XEXP (XEXP (x, 0), 0);
9431 mul_op1 = XEXP (XEXP (x, 0), 1);
9432 sub_op = XEXP (x, 1);
9434 else
9436 mul_op0 = XEXP (XEXP (x, 1), 0);
9437 mul_op1 = XEXP (XEXP (x, 1), 1);
9438 sub_op = XEXP (x, 0);
9441 /* The first operand of the multiply may be optionally
9442 negated. */
9443 if (GET_CODE (mul_op0) == NEG)
9444 mul_op0 = XEXP (mul_op0, 0);
9446 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9447 + rtx_cost (mul_op1, mode, code, 0, speed_p)
9448 + rtx_cost (sub_op, mode, code, 0, speed_p));
9450 return true;
9453 if (speed_p)
9454 *cost += extra_cost->fp[mode != SFmode].addsub;
9455 return false;
9458 if (mode == SImode)
9460 rtx shift_by_reg = NULL;
9461 rtx shift_op;
9462 rtx non_shift_op;
9464 shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9465 if (shift_op == NULL)
9467 shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9468 non_shift_op = XEXP (x, 0);
9470 else
9471 non_shift_op = XEXP (x, 1);
9473 if (shift_op != NULL)
9475 if (shift_by_reg != NULL)
9477 if (speed_p)
9478 *cost += extra_cost->alu.arith_shift_reg;
9479 *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
9481 else if (speed_p)
9482 *cost += extra_cost->alu.arith_shift;
9484 *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
9485 *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
9486 return true;
9489 if (arm_arch_thumb2
9490 && GET_CODE (XEXP (x, 1)) == MULT)
9492 /* MLS. */
9493 if (speed_p)
9494 *cost += extra_cost->mult[0].add;
9495 *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
9496 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
9497 *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
9498 return true;
9501 if (CONST_INT_P (XEXP (x, 0)))
9503 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9504 INTVAL (XEXP (x, 0)), NULL_RTX,
9505 NULL_RTX, 1, 0);
9506 *cost = COSTS_N_INSNS (insns);
9507 if (speed_p)
9508 *cost += insns * extra_cost->alu.arith;
9509 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9510 return true;
9512 else if (speed_p)
9513 *cost += extra_cost->alu.arith;
9515 return false;
9518 if (GET_MODE_CLASS (mode) == MODE_INT
9519 && GET_MODE_SIZE (mode) < 4)
9521 rtx shift_op, shift_reg;
9522 shift_reg = NULL;
9524 /* We check both sides of the MINUS for shifter operands since,
9525 unlike PLUS, it's not commutative. */
9527 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
9528 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
9530 /* Slightly disparage, as we might need to widen the result. */
9531 *cost += 1;
9532 if (speed_p)
9533 *cost += extra_cost->alu.arith;
9535 if (CONST_INT_P (XEXP (x, 0)))
9537 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9538 return true;
9541 return false;
9544 if (mode == DImode)
9546 *cost += COSTS_N_INSNS (1);
9548 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9550 rtx op1 = XEXP (x, 1);
9552 if (speed_p)
9553 *cost += 2 * extra_cost->alu.arith;
9555 if (GET_CODE (op1) == ZERO_EXTEND)
9556 *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
9557 0, speed_p);
9558 else
9559 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
9560 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9561 0, speed_p);
9562 return true;
9564 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9566 if (speed_p)
9567 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9568 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
9569 0, speed_p)
9570 + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
9571 return true;
9573 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9574 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9576 if (speed_p)
9577 *cost += (extra_cost->alu.arith
9578 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9579 ? extra_cost->alu.arith
9580 : extra_cost->alu.arith_shift));
9581 *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
9582 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
9583 GET_CODE (XEXP (x, 1)), 0, speed_p));
9584 return true;
9587 if (speed_p)
9588 *cost += 2 * extra_cost->alu.arith;
9589 return false;
9592 /* Vector mode? */
9594 *cost = LIBCALL_COST (2);
9595 return false;
9597 case PLUS:
9598 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9599 && (mode == SFmode || !TARGET_VFP_SINGLE))
9601 if (GET_CODE (XEXP (x, 0)) == MULT)
9603 rtx mul_op0, mul_op1, add_op;
9605 if (speed_p)
9606 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9608 mul_op0 = XEXP (XEXP (x, 0), 0);
9609 mul_op1 = XEXP (XEXP (x, 0), 1);
9610 add_op = XEXP (x, 1);
9612 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9613 + rtx_cost (mul_op1, mode, code, 0, speed_p)
9614 + rtx_cost (add_op, mode, code, 0, speed_p));
9616 return true;
9619 if (speed_p)
9620 *cost += extra_cost->fp[mode != SFmode].addsub;
9621 return false;
9623 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9625 *cost = LIBCALL_COST (2);
9626 return false;
9629 /* Narrow modes can be synthesized in SImode, but the range
9630 of useful sub-operations is limited. Check for shift operations
9631 on one of the operands. Only left shifts can be used in the
9632 narrow modes. */
9633 if (GET_MODE_CLASS (mode) == MODE_INT
9634 && GET_MODE_SIZE (mode) < 4)
9636 rtx shift_op, shift_reg;
9637 shift_reg = NULL;
9639 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
9641 if (CONST_INT_P (XEXP (x, 1)))
9643 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9644 INTVAL (XEXP (x, 1)), NULL_RTX,
9645 NULL_RTX, 1, 0);
9646 *cost = COSTS_N_INSNS (insns);
9647 if (speed_p)
9648 *cost += insns * extra_cost->alu.arith;
9649 /* Slightly penalize a narrow operation as the result may
9650 need widening. */
9651 *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9652 return true;
9655 /* Slightly penalize a narrow operation as the result may
9656 need widening. */
9657 *cost += 1;
9658 if (speed_p)
9659 *cost += extra_cost->alu.arith;
9661 return false;
9664 if (mode == SImode)
9666 rtx shift_op, shift_reg;
9668 if (TARGET_INT_SIMD
9669 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9670 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9672 /* UXTA[BH] or SXTA[BH]. */
9673 if (speed_p)
9674 *cost += extra_cost->alu.extend_arith;
9675 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9676 0, speed_p)
9677 + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
9678 return true;
9681 shift_reg = NULL;
9682 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9683 if (shift_op != NULL)
9685 if (shift_reg)
9687 if (speed_p)
9688 *cost += extra_cost->alu.arith_shift_reg;
9689 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
9691 else if (speed_p)
9692 *cost += extra_cost->alu.arith_shift;
9694 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
9695 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9696 return true;
9698 if (GET_CODE (XEXP (x, 0)) == MULT)
9700 rtx mul_op = XEXP (x, 0);
9702 if (TARGET_DSP_MULTIPLY
9703 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9704 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9705 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9706 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9707 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9708 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9709 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9710 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9711 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9712 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9713 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9714 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9715 == 16))))))
9717 /* SMLA[BT][BT]. */
9718 if (speed_p)
9719 *cost += extra_cost->mult[0].extend_add;
9720 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
9721 SIGN_EXTEND, 0, speed_p)
9722 + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
9723 SIGN_EXTEND, 0, speed_p)
9724 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9725 return true;
9728 if (speed_p)
9729 *cost += extra_cost->mult[0].add;
9730 *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
9731 + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
9732 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9733 return true;
9735 if (CONST_INT_P (XEXP (x, 1)))
9737 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9738 INTVAL (XEXP (x, 1)), NULL_RTX,
9739 NULL_RTX, 1, 0);
9740 *cost = COSTS_N_INSNS (insns);
9741 if (speed_p)
9742 *cost += insns * extra_cost->alu.arith;
9743 *cost += rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9744 return true;
9746 else if (speed_p)
9747 *cost += extra_cost->alu.arith;
9749 return false;
9752 if (mode == DImode)
9754 if (arm_arch3m
9755 && GET_CODE (XEXP (x, 0)) == MULT
9756 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
9757 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
9758 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
9759 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
9761 if (speed_p)
9762 *cost += extra_cost->mult[1].extend_add;
9763 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
9764 ZERO_EXTEND, 0, speed_p)
9765 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
9766 ZERO_EXTEND, 0, speed_p)
9767 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9768 return true;
9771 *cost += COSTS_N_INSNS (1);
9773 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9774 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9776 if (speed_p)
9777 *cost += (extra_cost->alu.arith
9778 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9779 ? extra_cost->alu.arith
9780 : extra_cost->alu.arith_shift));
9782 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9783 0, speed_p)
9784 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9785 return true;
9788 if (speed_p)
9789 *cost += 2 * extra_cost->alu.arith;
9790 return false;
9793 /* Vector mode? */
9794 *cost = LIBCALL_COST (2);
9795 return false;
9796 case IOR:
9797 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
9799 if (speed_p)
9800 *cost += extra_cost->alu.rev;
9802 return true;
9804 /* Fall through. */
9805 case AND: case XOR:
9806 if (mode == SImode)
9808 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9809 rtx op0 = XEXP (x, 0);
9810 rtx shift_op, shift_reg;
9812 if (subcode == NOT
9813 && (code == AND
9814 || (code == IOR && TARGET_THUMB2)))
9815 op0 = XEXP (op0, 0);
9817 shift_reg = NULL;
9818 shift_op = shifter_op_p (op0, &shift_reg);
9819 if (shift_op != NULL)
9821 if (shift_reg)
9823 if (speed_p)
9824 *cost += extra_cost->alu.log_shift_reg;
9825 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
9827 else if (speed_p)
9828 *cost += extra_cost->alu.log_shift;
9830 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
9831 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9832 return true;
9835 if (CONST_INT_P (XEXP (x, 1)))
9837 int insns = arm_gen_constant (code, SImode, NULL_RTX,
9838 INTVAL (XEXP (x, 1)), NULL_RTX,
9839 NULL_RTX, 1, 0);
9841 *cost = COSTS_N_INSNS (insns);
9842 if (speed_p)
9843 *cost += insns * extra_cost->alu.logical;
9844 *cost += rtx_cost (op0, mode, code, 0, speed_p);
9845 return true;
9848 if (speed_p)
9849 *cost += extra_cost->alu.logical;
9850 *cost += (rtx_cost (op0, mode, code, 0, speed_p)
9851 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9852 return true;
9855 if (mode == DImode)
9857 rtx op0 = XEXP (x, 0);
9858 enum rtx_code subcode = GET_CODE (op0);
9860 *cost += COSTS_N_INSNS (1);
9862 if (subcode == NOT
9863 && (code == AND
9864 || (code == IOR && TARGET_THUMB2)))
9865 op0 = XEXP (op0, 0);
9867 if (GET_CODE (op0) == ZERO_EXTEND)
9869 if (speed_p)
9870 *cost += 2 * extra_cost->alu.logical;
9872 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
9873 0, speed_p)
9874 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
9875 return true;
9877 else if (GET_CODE (op0) == SIGN_EXTEND)
9879 if (speed_p)
9880 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
9882 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
9883 0, speed_p)
9884 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
9885 return true;
9888 if (speed_p)
9889 *cost += 2 * extra_cost->alu.logical;
9891 return true;
9893 /* Vector mode? */
9895 *cost = LIBCALL_COST (2);
9896 return false;
9898 case MULT:
9899 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9900 && (mode == SFmode || !TARGET_VFP_SINGLE))
9902 rtx op0 = XEXP (x, 0);
9904 if (GET_CODE (op0) == NEG && !flag_rounding_math)
9905 op0 = XEXP (op0, 0);
9907 if (speed_p)
9908 *cost += extra_cost->fp[mode != SFmode].mult;
9910 *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
9911 + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
9912 return true;
9914 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9916 *cost = LIBCALL_COST (2);
9917 return false;
9920 if (mode == SImode)
9922 if (TARGET_DSP_MULTIPLY
9923 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
9924 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
9925 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
9926 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
9927 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
9928 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
9929 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
9930 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
9931 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
9932 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
9933 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
9934 && (INTVAL (XEXP (XEXP (x, 1), 1))
9935 == 16))))))
9937 /* SMUL[TB][TB]. */
9938 if (speed_p)
9939 *cost += extra_cost->mult[0].extend;
9940 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
9941 SIGN_EXTEND, 0, speed_p);
9942 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
9943 SIGN_EXTEND, 1, speed_p);
9944 return true;
9946 if (speed_p)
9947 *cost += extra_cost->mult[0].simple;
9948 return false;
9951 if (mode == DImode)
9953 if (arm_arch3m
9954 && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9955 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
9956 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
9957 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
9959 if (speed_p)
9960 *cost += extra_cost->mult[1].extend;
9961 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
9962 ZERO_EXTEND, 0, speed_p)
9963 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
9964 ZERO_EXTEND, 0, speed_p));
9965 return true;
9968 *cost = LIBCALL_COST (2);
9969 return false;
9972 /* Vector mode? */
9973 *cost = LIBCALL_COST (2);
9974 return false;
9976 case NEG:
9977 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9978 && (mode == SFmode || !TARGET_VFP_SINGLE))
9980 if (GET_CODE (XEXP (x, 0)) == MULT)
9982 /* VNMUL. */
9983 *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
9984 return true;
9987 if (speed_p)
9988 *cost += extra_cost->fp[mode != SFmode].neg;
9990 return false;
9992 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9994 *cost = LIBCALL_COST (1);
9995 return false;
9998 if (mode == SImode)
10000 if (GET_CODE (XEXP (x, 0)) == ABS)
10002 *cost += COSTS_N_INSNS (1);
10003 /* Assume the non-flag-changing variant. */
10004 if (speed_p)
10005 *cost += (extra_cost->alu.log_shift
10006 + extra_cost->alu.arith_shift);
10007 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
10008 return true;
10011 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10012 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10014 *cost += COSTS_N_INSNS (1);
10015 /* No extra cost for MOV imm and MVN imm. */
10016 /* If the comparison op is using the flags, there's no further
10017 cost, otherwise we need to add the cost of the comparison. */
10018 if (!(REG_P (XEXP (XEXP (x, 0), 0))
10019 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10020 && XEXP (XEXP (x, 0), 1) == const0_rtx))
10022 mode = GET_MODE (XEXP (XEXP (x, 0), 0));
10023 *cost += (COSTS_N_INSNS (1)
10024 + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
10025 0, speed_p)
10026 + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
10027 1, speed_p));
10028 if (speed_p)
10029 *cost += extra_cost->alu.arith;
10031 return true;
10034 if (speed_p)
10035 *cost += extra_cost->alu.arith;
10036 return false;
10039 if (GET_MODE_CLASS (mode) == MODE_INT
10040 && GET_MODE_SIZE (mode) < 4)
10042 /* Slightly disparage, as we might need an extend operation. */
10043 *cost += 1;
10044 if (speed_p)
10045 *cost += extra_cost->alu.arith;
10046 return false;
10049 if (mode == DImode)
10051 *cost += COSTS_N_INSNS (1);
10052 if (speed_p)
10053 *cost += 2 * extra_cost->alu.arith;
10054 return false;
10057 /* Vector mode? */
10058 *cost = LIBCALL_COST (1);
10059 return false;
10061 case NOT:
10062 if (mode == SImode)
10064 rtx shift_op;
10065 rtx shift_reg = NULL;
10067 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10069 if (shift_op)
10071 if (shift_reg != NULL)
10073 if (speed_p)
10074 *cost += extra_cost->alu.log_shift_reg;
10075 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10077 else if (speed_p)
10078 *cost += extra_cost->alu.log_shift;
10079 *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
10080 return true;
10083 if (speed_p)
10084 *cost += extra_cost->alu.logical;
10085 return false;
10087 if (mode == DImode)
10089 *cost += COSTS_N_INSNS (1);
10090 return false;
10093 /* Vector mode? */
10095 *cost += LIBCALL_COST (1);
10096 return false;
10098 case IF_THEN_ELSE:
10100 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10102 *cost += COSTS_N_INSNS (3);
10103 return true;
10105 int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
10106 int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
10108 *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
10109 /* Assume that if one arm of the if_then_else is a register,
10110 that it will be tied with the result and eliminate the
10111 conditional insn. */
10112 if (REG_P (XEXP (x, 1)))
10113 *cost += op2cost;
10114 else if (REG_P (XEXP (x, 2)))
10115 *cost += op1cost;
10116 else
10118 if (speed_p)
10120 if (extra_cost->alu.non_exec_costs_exec)
10121 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10122 else
10123 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10125 else
10126 *cost += op1cost + op2cost;
10129 return true;
10131 case COMPARE:
10132 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10133 *cost = 0;
10134 else
10136 machine_mode op0mode;
10137 /* We'll mostly assume that the cost of a compare is the cost of the
10138 LHS. However, there are some notable exceptions. */
10140 /* Floating point compares are never done as side-effects. */
10141 op0mode = GET_MODE (XEXP (x, 0));
10142 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10143 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10145 if (speed_p)
10146 *cost += extra_cost->fp[op0mode != SFmode].compare;
10148 if (XEXP (x, 1) == CONST0_RTX (op0mode))
10150 *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
10151 return true;
10154 return false;
10156 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10158 *cost = LIBCALL_COST (2);
10159 return false;
10162 /* DImode compares normally take two insns. */
10163 if (op0mode == DImode)
10165 *cost += COSTS_N_INSNS (1);
10166 if (speed_p)
10167 *cost += 2 * extra_cost->alu.arith;
10168 return false;
10171 if (op0mode == SImode)
10173 rtx shift_op;
10174 rtx shift_reg;
10176 if (XEXP (x, 1) == const0_rtx
10177 && !(REG_P (XEXP (x, 0))
10178 || (GET_CODE (XEXP (x, 0)) == SUBREG
10179 && REG_P (SUBREG_REG (XEXP (x, 0))))))
10181 *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10183 /* Multiply operations that set the flags are often
10184 significantly more expensive. */
10185 if (speed_p
10186 && GET_CODE (XEXP (x, 0)) == MULT
10187 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10188 *cost += extra_cost->mult[0].flag_setting;
10190 if (speed_p
10191 && GET_CODE (XEXP (x, 0)) == PLUS
10192 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10193 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10194 0), 1), mode))
10195 *cost += extra_cost->mult[0].flag_setting;
10196 return true;
10199 shift_reg = NULL;
10200 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10201 if (shift_op != NULL)
10203 if (shift_reg != NULL)
10205 *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
10206 1, speed_p);
10207 if (speed_p)
10208 *cost += extra_cost->alu.arith_shift_reg;
10210 else if (speed_p)
10211 *cost += extra_cost->alu.arith_shift;
10212 *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
10213 *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
10214 return true;
10217 if (speed_p)
10218 *cost += extra_cost->alu.arith;
10219 if (CONST_INT_P (XEXP (x, 1))
10220 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10222 *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10223 return true;
10225 return false;
10228 /* Vector mode? */
10230 *cost = LIBCALL_COST (2);
10231 return false;
10233 return true;
10235 case EQ:
10236 case NE:
10237 case LT:
10238 case LE:
10239 case GT:
10240 case GE:
10241 case LTU:
10242 case LEU:
10243 case GEU:
10244 case GTU:
10245 case ORDERED:
10246 case UNORDERED:
10247 case UNEQ:
10248 case UNLE:
10249 case UNLT:
10250 case UNGE:
10251 case UNGT:
10252 case LTGT:
10253 if (outer_code == SET)
10255 /* Is it a store-flag operation? */
10256 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10257 && XEXP (x, 1) == const0_rtx)
10259 /* Thumb also needs an IT insn. */
10260 *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
10261 return true;
10263 if (XEXP (x, 1) == const0_rtx)
10265 switch (code)
10267 case LT:
10268 /* LSR Rd, Rn, #31. */
10269 if (speed_p)
10270 *cost += extra_cost->alu.shift;
10271 break;
10273 case EQ:
10274 /* RSBS T1, Rn, #0
10275 ADC Rd, Rn, T1. */
10277 case NE:
10278 /* SUBS T1, Rn, #1
10279 SBC Rd, Rn, T1. */
10280 *cost += COSTS_N_INSNS (1);
10281 break;
10283 case LE:
10284 /* RSBS T1, Rn, Rn, LSR #31
10285 ADC Rd, Rn, T1. */
10286 *cost += COSTS_N_INSNS (1);
10287 if (speed_p)
10288 *cost += extra_cost->alu.arith_shift;
10289 break;
10291 case GT:
10292 /* RSB Rd, Rn, Rn, ASR #1
10293 LSR Rd, Rd, #31. */
10294 *cost += COSTS_N_INSNS (1);
10295 if (speed_p)
10296 *cost += (extra_cost->alu.arith_shift
10297 + extra_cost->alu.shift);
10298 break;
10300 case GE:
10301 /* ASR Rd, Rn, #31
10302 ADD Rd, Rn, #1. */
10303 *cost += COSTS_N_INSNS (1);
10304 if (speed_p)
10305 *cost += extra_cost->alu.shift;
10306 break;
10308 default:
10309 /* Remaining cases are either meaningless or would take
10310 three insns anyway. */
10311 *cost = COSTS_N_INSNS (3);
10312 break;
10314 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10315 return true;
10317 else
10319 *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10320 if (CONST_INT_P (XEXP (x, 1))
10321 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10323 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10324 return true;
10327 return false;
10330 /* Not directly inside a set. If it involves the condition code
10331 register it must be the condition for a branch, cond_exec or
10332 I_T_E operation. Since the comparison is performed elsewhere
10333 this is just the control part which has no additional
10334 cost. */
10335 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10336 && XEXP (x, 1) == const0_rtx)
10338 *cost = 0;
10339 return true;
10341 return false;
10343 case ABS:
10344 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10345 && (mode == SFmode || !TARGET_VFP_SINGLE))
10347 if (speed_p)
10348 *cost += extra_cost->fp[mode != SFmode].neg;
10350 return false;
10352 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10354 *cost = LIBCALL_COST (1);
10355 return false;
10358 if (mode == SImode)
10360 if (speed_p)
10361 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10362 return false;
10364 /* Vector mode? */
10365 *cost = LIBCALL_COST (1);
10366 return false;
10368 case SIGN_EXTEND:
10369 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10370 && MEM_P (XEXP (x, 0)))
10372 if (mode == DImode)
10373 *cost += COSTS_N_INSNS (1);
10375 if (!speed_p)
10376 return true;
10378 if (GET_MODE (XEXP (x, 0)) == SImode)
10379 *cost += extra_cost->ldst.load;
10380 else
10381 *cost += extra_cost->ldst.load_sign_extend;
10383 if (mode == DImode)
10384 *cost += extra_cost->alu.shift;
10386 return true;
10389 /* Widening from less than 32-bits requires an extend operation. */
10390 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10392 /* We have SXTB/SXTH. */
10393 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10394 if (speed_p)
10395 *cost += extra_cost->alu.extend;
10397 else if (GET_MODE (XEXP (x, 0)) != SImode)
10399 /* Needs two shifts. */
10400 *cost += COSTS_N_INSNS (1);
10401 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10402 if (speed_p)
10403 *cost += 2 * extra_cost->alu.shift;
10406 /* Widening beyond 32-bits requires one more insn. */
10407 if (mode == DImode)
10409 *cost += COSTS_N_INSNS (1);
10410 if (speed_p)
10411 *cost += extra_cost->alu.shift;
10414 return true;
10416 case ZERO_EXTEND:
10417 if ((arm_arch4
10418 || GET_MODE (XEXP (x, 0)) == SImode
10419 || GET_MODE (XEXP (x, 0)) == QImode)
10420 && MEM_P (XEXP (x, 0)))
10422 *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10424 if (mode == DImode)
10425 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10427 return true;
10430 /* Widening from less than 32-bits requires an extend operation. */
10431 if (GET_MODE (XEXP (x, 0)) == QImode)
10433 /* UXTB can be a shorter instruction in Thumb2, but it might
10434 be slower than the AND Rd, Rn, #255 alternative. When
10435 optimizing for speed it should never be slower to use
10436 AND, and we don't really model 16-bit vs 32-bit insns
10437 here. */
10438 if (speed_p)
10439 *cost += extra_cost->alu.logical;
10441 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10443 /* We have UXTB/UXTH. */
10444 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10445 if (speed_p)
10446 *cost += extra_cost->alu.extend;
10448 else if (GET_MODE (XEXP (x, 0)) != SImode)
10450 /* Needs two shifts. It's marginally preferable to use
10451 shifts rather than two BIC instructions as the second
10452 shift may merge with a subsequent insn as a shifter
10453 op. */
10454 *cost = COSTS_N_INSNS (2);
10455 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10456 if (speed_p)
10457 *cost += 2 * extra_cost->alu.shift;
10460 /* Widening beyond 32-bits requires one more insn. */
10461 if (mode == DImode)
10463 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10466 return true;
10468 case CONST_INT:
10469 *cost = 0;
10470 /* CONST_INT has no mode, so we cannot tell for sure how many
10471 insns are really going to be needed. The best we can do is
10472 look at the value passed. If it fits in SImode, then assume
10473 that's the mode it will be used for. Otherwise assume it
10474 will be used in DImode. */
10475 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10476 mode = SImode;
10477 else
10478 mode = DImode;
10480 /* Avoid blowing up in arm_gen_constant (). */
10481 if (!(outer_code == PLUS
10482 || outer_code == AND
10483 || outer_code == IOR
10484 || outer_code == XOR
10485 || outer_code == MINUS))
10486 outer_code = SET;
10488 const_int_cost:
10489 if (mode == SImode)
10491 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10492 INTVAL (x), NULL, NULL,
10493 0, 0));
10494 /* Extra costs? */
10496 else
10498 *cost += COSTS_N_INSNS (arm_gen_constant
10499 (outer_code, SImode, NULL,
10500 trunc_int_for_mode (INTVAL (x), SImode),
10501 NULL, NULL, 0, 0)
10502 + arm_gen_constant (outer_code, SImode, NULL,
10503 INTVAL (x) >> 32, NULL,
10504 NULL, 0, 0));
10505 /* Extra costs? */
10508 return true;
10510 case CONST:
10511 case LABEL_REF:
10512 case SYMBOL_REF:
10513 if (speed_p)
10515 if (arm_arch_thumb2 && !flag_pic)
10516 *cost += COSTS_N_INSNS (1);
10517 else
10518 *cost += extra_cost->ldst.load;
10520 else
10521 *cost += COSTS_N_INSNS (1);
10523 if (flag_pic)
10525 *cost += COSTS_N_INSNS (1);
10526 if (speed_p)
10527 *cost += extra_cost->alu.arith;
10530 return true;
10532 case CONST_FIXED:
10533 *cost = COSTS_N_INSNS (4);
10534 /* Fixme. */
10535 return true;
10537 case CONST_DOUBLE:
10538 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10539 && (mode == SFmode || !TARGET_VFP_SINGLE))
10541 if (vfp3_const_double_rtx (x))
10543 if (speed_p)
10544 *cost += extra_cost->fp[mode == DFmode].fpconst;
10545 return true;
10548 if (speed_p)
10550 if (mode == DFmode)
10551 *cost += extra_cost->ldst.loadd;
10552 else
10553 *cost += extra_cost->ldst.loadf;
10555 else
10556 *cost += COSTS_N_INSNS (1 + (mode == DFmode));
10558 return true;
10560 *cost = COSTS_N_INSNS (4);
10561 return true;
10563 case CONST_VECTOR:
10564 /* Fixme. */
10565 if (TARGET_NEON
10566 && TARGET_HARD_FLOAT
10567 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10568 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10569 *cost = COSTS_N_INSNS (1);
10570 else
10571 *cost = COSTS_N_INSNS (4);
10572 return true;
10574 case HIGH:
10575 case LO_SUM:
10576 /* When optimizing for size, we prefer constant pool entries to
10577 MOVW/MOVT pairs, so bump the cost of these slightly. */
10578 if (!speed_p)
10579 *cost += 1;
10580 return true;
10582 case CLZ:
10583 if (speed_p)
10584 *cost += extra_cost->alu.clz;
10585 return false;
10587 case SMIN:
10588 if (XEXP (x, 1) == const0_rtx)
10590 if (speed_p)
10591 *cost += extra_cost->alu.log_shift;
10592 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10593 return true;
10595 /* Fall through. */
10596 case SMAX:
10597 case UMIN:
10598 case UMAX:
10599 *cost += COSTS_N_INSNS (1);
10600 return false;
10602 case TRUNCATE:
10603 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10604 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10605 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10606 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10607 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10608 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10609 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10610 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10611 == ZERO_EXTEND))))
10613 if (speed_p)
10614 *cost += extra_cost->mult[1].extend;
10615 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
10616 ZERO_EXTEND, 0, speed_p)
10617 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
10618 ZERO_EXTEND, 0, speed_p));
10619 return true;
10621 *cost = LIBCALL_COST (1);
10622 return false;
10624 case UNSPEC_VOLATILE:
10625 case UNSPEC:
10626 return arm_unspec_cost (x, outer_code, speed_p, cost);
10628 case PC:
10629 /* Reading the PC is like reading any other register. Writing it
10630 is more expensive, but we take that into account elsewhere. */
10631 *cost = 0;
10632 return true;
10634 case ZERO_EXTRACT:
10635 /* TODO: Simple zero_extract of bottom bits using AND. */
10636 /* Fall through. */
10637 case SIGN_EXTRACT:
10638 if (arm_arch6
10639 && mode == SImode
10640 && CONST_INT_P (XEXP (x, 1))
10641 && CONST_INT_P (XEXP (x, 2)))
10643 if (speed_p)
10644 *cost += extra_cost->alu.bfx;
10645 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10646 return true;
10648 /* Without UBFX/SBFX, need to resort to shift operations. */
10649 *cost += COSTS_N_INSNS (1);
10650 if (speed_p)
10651 *cost += 2 * extra_cost->alu.shift;
10652 *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
10653 return true;
10655 case FLOAT_EXTEND:
10656 if (TARGET_HARD_FLOAT)
10658 if (speed_p)
10659 *cost += extra_cost->fp[mode == DFmode].widen;
10660 if (!TARGET_FPU_ARMV8
10661 && GET_MODE (XEXP (x, 0)) == HFmode)
10663 /* Pre v8, widening HF->DF is a two-step process, first
10664 widening to SFmode. */
10665 *cost += COSTS_N_INSNS (1);
10666 if (speed_p)
10667 *cost += extra_cost->fp[0].widen;
10669 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10670 return true;
10673 *cost = LIBCALL_COST (1);
10674 return false;
10676 case FLOAT_TRUNCATE:
10677 if (TARGET_HARD_FLOAT)
10679 if (speed_p)
10680 *cost += extra_cost->fp[mode == DFmode].narrow;
10681 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10682 return true;
10683 /* Vector modes? */
10685 *cost = LIBCALL_COST (1);
10686 return false;
10688 case FMA:
10689 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
10691 rtx op0 = XEXP (x, 0);
10692 rtx op1 = XEXP (x, 1);
10693 rtx op2 = XEXP (x, 2);
10696 /* vfms or vfnma. */
10697 if (GET_CODE (op0) == NEG)
10698 op0 = XEXP (op0, 0);
10700 /* vfnms or vfnma. */
10701 if (GET_CODE (op2) == NEG)
10702 op2 = XEXP (op2, 0);
10704 *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
10705 *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
10706 *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
10708 if (speed_p)
10709 *cost += extra_cost->fp[mode ==DFmode].fma;
10711 return true;
10714 *cost = LIBCALL_COST (3);
10715 return false;
10717 case FIX:
10718 case UNSIGNED_FIX:
10719 if (TARGET_HARD_FLOAT)
10721 /* The *combine_vcvtf2i reduces a vmul+vcvt into
10722 a vcvt fixed-point conversion. */
10723 if (code == FIX && mode == SImode
10724 && GET_CODE (XEXP (x, 0)) == FIX
10725 && GET_MODE (XEXP (x, 0)) == SFmode
10726 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10727 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
10728 > 0)
10730 if (speed_p)
10731 *cost += extra_cost->fp[0].toint;
10733 *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10734 code, 0, speed_p);
10735 return true;
10738 if (GET_MODE_CLASS (mode) == MODE_INT)
10740 mode = GET_MODE (XEXP (x, 0));
10741 if (speed_p)
10742 *cost += extra_cost->fp[mode == DFmode].toint;
10743 /* Strip of the 'cost' of rounding towards zero. */
10744 if (GET_CODE (XEXP (x, 0)) == FIX)
10745 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
10746 0, speed_p);
10747 else
10748 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10749 /* ??? Increase the cost to deal with transferring from
10750 FP -> CORE registers? */
10751 return true;
10753 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
10754 && TARGET_FPU_ARMV8)
10756 if (speed_p)
10757 *cost += extra_cost->fp[mode == DFmode].roundint;
10758 return false;
10760 /* Vector costs? */
10762 *cost = LIBCALL_COST (1);
10763 return false;
10765 case FLOAT:
10766 case UNSIGNED_FLOAT:
10767 if (TARGET_HARD_FLOAT)
10769 /* ??? Increase the cost to deal with transferring from CORE
10770 -> FP registers? */
10771 if (speed_p)
10772 *cost += extra_cost->fp[mode == DFmode].fromint;
10773 return false;
10775 *cost = LIBCALL_COST (1);
10776 return false;
10778 case CALL:
10779 return true;
10781 case ASM_OPERANDS:
10783 /* Just a guess. Guess number of instructions in the asm
10784 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
10785 though (see PR60663). */
10786 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
10787 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
10789 *cost = COSTS_N_INSNS (asm_length + num_operands);
10790 return true;
10792 default:
10793 if (mode != VOIDmode)
10794 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
10795 else
10796 *cost = COSTS_N_INSNS (4); /* Who knows? */
10797 return false;
10801 #undef HANDLE_NARROW_SHIFT_ARITH
10803 /* RTX costs entry point. */
10805 static bool
10806 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
10807 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
10809 bool result;
10810 int code = GET_CODE (x);
10811 gcc_assert (current_tune->insn_extra_cost);
10813 result = arm_rtx_costs_internal (x, (enum rtx_code) code,
10814 (enum rtx_code) outer_code,
10815 current_tune->insn_extra_cost,
10816 total, speed);
10818 if (dump_file && (dump_flags & TDF_DETAILS))
10820 print_rtl_single (dump_file, x);
10821 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
10822 *total, result ? "final" : "partial");
10824 return result;
10827 /* All address computations that can be done are free, but rtx cost returns
10828 the same for practically all of them. So we weight the different types
10829 of address here in the order (most pref first):
10830 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
10831 static inline int
10832 arm_arm_address_cost (rtx x)
10834 enum rtx_code c = GET_CODE (x);
10836 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
10837 return 0;
10838 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
10839 return 10;
10841 if (c == PLUS)
10843 if (CONST_INT_P (XEXP (x, 1)))
10844 return 2;
10846 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
10847 return 3;
10849 return 4;
10852 return 6;
10855 static inline int
10856 arm_thumb_address_cost (rtx x)
10858 enum rtx_code c = GET_CODE (x);
10860 if (c == REG)
10861 return 1;
10862 if (c == PLUS
10863 && REG_P (XEXP (x, 0))
10864 && CONST_INT_P (XEXP (x, 1)))
10865 return 1;
10867 return 2;
10870 static int
10871 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
10872 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
10874 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
10877 /* Adjust cost hook for XScale. */
10878 static bool
10879 xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
10880 int * cost)
10882 /* Some true dependencies can have a higher cost depending
10883 on precisely how certain input operands are used. */
10884 if (dep_type == 0
10885 && recog_memoized (insn) >= 0
10886 && recog_memoized (dep) >= 0)
10888 int shift_opnum = get_attr_shift (insn);
10889 enum attr_type attr_type = get_attr_type (dep);
10891 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
10892 operand for INSN. If we have a shifted input operand and the
10893 instruction we depend on is another ALU instruction, then we may
10894 have to account for an additional stall. */
10895 if (shift_opnum != 0
10896 && (attr_type == TYPE_ALU_SHIFT_IMM
10897 || attr_type == TYPE_ALUS_SHIFT_IMM
10898 || attr_type == TYPE_LOGIC_SHIFT_IMM
10899 || attr_type == TYPE_LOGICS_SHIFT_IMM
10900 || attr_type == TYPE_ALU_SHIFT_REG
10901 || attr_type == TYPE_ALUS_SHIFT_REG
10902 || attr_type == TYPE_LOGIC_SHIFT_REG
10903 || attr_type == TYPE_LOGICS_SHIFT_REG
10904 || attr_type == TYPE_MOV_SHIFT
10905 || attr_type == TYPE_MVN_SHIFT
10906 || attr_type == TYPE_MOV_SHIFT_REG
10907 || attr_type == TYPE_MVN_SHIFT_REG))
10909 rtx shifted_operand;
10910 int opno;
10912 /* Get the shifted operand. */
10913 extract_insn (insn);
10914 shifted_operand = recog_data.operand[shift_opnum];
10916 /* Iterate over all the operands in DEP. If we write an operand
10917 that overlaps with SHIFTED_OPERAND, then we have increase the
10918 cost of this dependency. */
10919 extract_insn (dep);
10920 preprocess_constraints (dep);
10921 for (opno = 0; opno < recog_data.n_operands; opno++)
10923 /* We can ignore strict inputs. */
10924 if (recog_data.operand_type[opno] == OP_IN)
10925 continue;
10927 if (reg_overlap_mentioned_p (recog_data.operand[opno],
10928 shifted_operand))
10930 *cost = 2;
10931 return false;
10936 return true;
10939 /* Adjust cost hook for Cortex A9. */
10940 static bool
10941 cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
10942 int * cost)
10944 switch (dep_type)
10946 case REG_DEP_ANTI:
10947 *cost = 0;
10948 return false;
10950 case REG_DEP_TRUE:
10951 case REG_DEP_OUTPUT:
10952 if (recog_memoized (insn) >= 0
10953 && recog_memoized (dep) >= 0)
10955 if (GET_CODE (PATTERN (insn)) == SET)
10957 if (GET_MODE_CLASS
10958 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
10959 || GET_MODE_CLASS
10960 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
10962 enum attr_type attr_type_insn = get_attr_type (insn);
10963 enum attr_type attr_type_dep = get_attr_type (dep);
10965 /* By default all dependencies of the form
10966 s0 = s0 <op> s1
10967 s0 = s0 <op> s2
10968 have an extra latency of 1 cycle because
10969 of the input and output dependency in this
10970 case. However this gets modeled as an true
10971 dependency and hence all these checks. */
10972 if (REG_P (SET_DEST (PATTERN (insn)))
10973 && reg_set_p (SET_DEST (PATTERN (insn)), dep))
10975 /* FMACS is a special case where the dependent
10976 instruction can be issued 3 cycles before
10977 the normal latency in case of an output
10978 dependency. */
10979 if ((attr_type_insn == TYPE_FMACS
10980 || attr_type_insn == TYPE_FMACD)
10981 && (attr_type_dep == TYPE_FMACS
10982 || attr_type_dep == TYPE_FMACD))
10984 if (dep_type == REG_DEP_OUTPUT)
10985 *cost = insn_default_latency (dep) - 3;
10986 else
10987 *cost = insn_default_latency (dep);
10988 return false;
10990 else
10992 if (dep_type == REG_DEP_OUTPUT)
10993 *cost = insn_default_latency (dep) + 1;
10994 else
10995 *cost = insn_default_latency (dep);
10997 return false;
11002 break;
11004 default:
11005 gcc_unreachable ();
11008 return true;
11011 /* Adjust cost hook for FA726TE. */
11012 static bool
11013 fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11014 int * cost)
11016 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11017 have penalty of 3. */
11018 if (dep_type == REG_DEP_TRUE
11019 && recog_memoized (insn) >= 0
11020 && recog_memoized (dep) >= 0
11021 && get_attr_conds (dep) == CONDS_SET)
11023 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11024 if (get_attr_conds (insn) == CONDS_USE
11025 && get_attr_type (insn) != TYPE_BRANCH)
11027 *cost = 3;
11028 return false;
11031 if (GET_CODE (PATTERN (insn)) == COND_EXEC
11032 || get_attr_conds (insn) == CONDS_USE)
11034 *cost = 0;
11035 return false;
11039 return true;
11042 /* Implement TARGET_REGISTER_MOVE_COST.
11044 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11045 it is typically more expensive than a single memory access. We set
11046 the cost to less than two memory accesses so that floating
11047 point to integer conversion does not go through memory. */
11050 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11051 reg_class_t from, reg_class_t to)
11053 if (TARGET_32BIT)
11055 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11056 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11057 return 15;
11058 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11059 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11060 return 4;
11061 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11062 return 20;
11063 else
11064 return 2;
11066 else
11068 if (from == HI_REGS || to == HI_REGS)
11069 return 4;
11070 else
11071 return 2;
11075 /* Implement TARGET_MEMORY_MOVE_COST. */
11078 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11079 bool in ATTRIBUTE_UNUSED)
11081 if (TARGET_32BIT)
11082 return 10;
11083 else
11085 if (GET_MODE_SIZE (mode) < 4)
11086 return 8;
11087 else
11088 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11092 /* Vectorizer cost model implementation. */
11094 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11095 static int
11096 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11097 tree vectype,
11098 int misalign ATTRIBUTE_UNUSED)
11100 unsigned elements;
11102 switch (type_of_cost)
11104 case scalar_stmt:
11105 return current_tune->vec_costs->scalar_stmt_cost;
11107 case scalar_load:
11108 return current_tune->vec_costs->scalar_load_cost;
11110 case scalar_store:
11111 return current_tune->vec_costs->scalar_store_cost;
11113 case vector_stmt:
11114 return current_tune->vec_costs->vec_stmt_cost;
11116 case vector_load:
11117 return current_tune->vec_costs->vec_align_load_cost;
11119 case vector_store:
11120 return current_tune->vec_costs->vec_store_cost;
11122 case vec_to_scalar:
11123 return current_tune->vec_costs->vec_to_scalar_cost;
11125 case scalar_to_vec:
11126 return current_tune->vec_costs->scalar_to_vec_cost;
11128 case unaligned_load:
11129 return current_tune->vec_costs->vec_unalign_load_cost;
11131 case unaligned_store:
11132 return current_tune->vec_costs->vec_unalign_store_cost;
11134 case cond_branch_taken:
11135 return current_tune->vec_costs->cond_taken_branch_cost;
11137 case cond_branch_not_taken:
11138 return current_tune->vec_costs->cond_not_taken_branch_cost;
11140 case vec_perm:
11141 case vec_promote_demote:
11142 return current_tune->vec_costs->vec_stmt_cost;
11144 case vec_construct:
11145 elements = TYPE_VECTOR_SUBPARTS (vectype);
11146 return elements / 2 + 1;
11148 default:
11149 gcc_unreachable ();
11153 /* Implement targetm.vectorize.add_stmt_cost. */
11155 static unsigned
11156 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11157 struct _stmt_vec_info *stmt_info, int misalign,
11158 enum vect_cost_model_location where)
11160 unsigned *cost = (unsigned *) data;
11161 unsigned retval = 0;
11163 if (flag_vect_cost_model)
11165 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11166 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11168 /* Statements in an inner loop relative to the loop being
11169 vectorized are weighted more heavily. The value here is
11170 arbitrary and could potentially be improved with analysis. */
11171 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11172 count *= 50; /* FIXME. */
11174 retval = (unsigned) (count * stmt_cost);
11175 cost[where] += retval;
11178 return retval;
11181 /* Return true if and only if this insn can dual-issue only as older. */
11182 static bool
11183 cortexa7_older_only (rtx_insn *insn)
11185 if (recog_memoized (insn) < 0)
11186 return false;
11188 switch (get_attr_type (insn))
11190 case TYPE_ALU_DSP_REG:
11191 case TYPE_ALU_SREG:
11192 case TYPE_ALUS_SREG:
11193 case TYPE_LOGIC_REG:
11194 case TYPE_LOGICS_REG:
11195 case TYPE_ADC_REG:
11196 case TYPE_ADCS_REG:
11197 case TYPE_ADR:
11198 case TYPE_BFM:
11199 case TYPE_REV:
11200 case TYPE_MVN_REG:
11201 case TYPE_SHIFT_IMM:
11202 case TYPE_SHIFT_REG:
11203 case TYPE_LOAD_BYTE:
11204 case TYPE_LOAD1:
11205 case TYPE_STORE1:
11206 case TYPE_FFARITHS:
11207 case TYPE_FADDS:
11208 case TYPE_FFARITHD:
11209 case TYPE_FADDD:
11210 case TYPE_FMOV:
11211 case TYPE_F_CVT:
11212 case TYPE_FCMPS:
11213 case TYPE_FCMPD:
11214 case TYPE_FCONSTS:
11215 case TYPE_FCONSTD:
11216 case TYPE_FMULS:
11217 case TYPE_FMACS:
11218 case TYPE_FMULD:
11219 case TYPE_FMACD:
11220 case TYPE_FDIVS:
11221 case TYPE_FDIVD:
11222 case TYPE_F_MRC:
11223 case TYPE_F_MRRC:
11224 case TYPE_F_FLAG:
11225 case TYPE_F_LOADS:
11226 case TYPE_F_STORES:
11227 return true;
11228 default:
11229 return false;
11233 /* Return true if and only if this insn can dual-issue as younger. */
11234 static bool
11235 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11237 if (recog_memoized (insn) < 0)
11239 if (verbose > 5)
11240 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11241 return false;
11244 switch (get_attr_type (insn))
11246 case TYPE_ALU_IMM:
11247 case TYPE_ALUS_IMM:
11248 case TYPE_LOGIC_IMM:
11249 case TYPE_LOGICS_IMM:
11250 case TYPE_EXTEND:
11251 case TYPE_MVN_IMM:
11252 case TYPE_MOV_IMM:
11253 case TYPE_MOV_REG:
11254 case TYPE_MOV_SHIFT:
11255 case TYPE_MOV_SHIFT_REG:
11256 case TYPE_BRANCH:
11257 case TYPE_CALL:
11258 return true;
11259 default:
11260 return false;
11265 /* Look for an instruction that can dual issue only as an older
11266 instruction, and move it in front of any instructions that can
11267 dual-issue as younger, while preserving the relative order of all
11268 other instructions in the ready list. This is a hueuristic to help
11269 dual-issue in later cycles, by postponing issue of more flexible
11270 instructions. This heuristic may affect dual issue opportunities
11271 in the current cycle. */
11272 static void
11273 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11274 int *n_readyp, int clock)
11276 int i;
11277 int first_older_only = -1, first_younger = -1;
11279 if (verbose > 5)
11280 fprintf (file,
11281 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11282 clock,
11283 *n_readyp);
11285 /* Traverse the ready list from the head (the instruction to issue
11286 first), and looking for the first instruction that can issue as
11287 younger and the first instruction that can dual-issue only as
11288 older. */
11289 for (i = *n_readyp - 1; i >= 0; i--)
11291 rtx_insn *insn = ready[i];
11292 if (cortexa7_older_only (insn))
11294 first_older_only = i;
11295 if (verbose > 5)
11296 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11297 break;
11299 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11300 first_younger = i;
11303 /* Nothing to reorder because either no younger insn found or insn
11304 that can dual-issue only as older appears before any insn that
11305 can dual-issue as younger. */
11306 if (first_younger == -1)
11308 if (verbose > 5)
11309 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11310 return;
11313 /* Nothing to reorder because no older-only insn in the ready list. */
11314 if (first_older_only == -1)
11316 if (verbose > 5)
11317 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11318 return;
11321 /* Move first_older_only insn before first_younger. */
11322 if (verbose > 5)
11323 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11324 INSN_UID(ready [first_older_only]),
11325 INSN_UID(ready [first_younger]));
11326 rtx_insn *first_older_only_insn = ready [first_older_only];
11327 for (i = first_older_only; i < first_younger; i++)
11329 ready[i] = ready[i+1];
11332 ready[i] = first_older_only_insn;
11333 return;
11336 /* Implement TARGET_SCHED_REORDER. */
11337 static int
11338 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
11339 int clock)
11341 switch (arm_tune)
11343 case TARGET_CPU_cortexa7:
11344 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11345 break;
11346 default:
11347 /* Do nothing for other cores. */
11348 break;
11351 return arm_issue_rate ();
11354 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11355 It corrects the value of COST based on the relationship between
11356 INSN and DEP through the dependence LINK. It returns the new
11357 value. There is a per-core adjust_cost hook to adjust scheduler costs
11358 and the per-core hook can choose to completely override the generic
11359 adjust_cost function. Only put bits of code into arm_adjust_cost that
11360 are common across all cores. */
11361 static int
11362 arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
11363 unsigned int)
11365 rtx i_pat, d_pat;
11367 /* When generating Thumb-1 code, we want to place flag-setting operations
11368 close to a conditional branch which depends on them, so that we can
11369 omit the comparison. */
11370 if (TARGET_THUMB1
11371 && dep_type == 0
11372 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11373 && recog_memoized (dep) >= 0
11374 && get_attr_conds (dep) == CONDS_SET)
11375 return 0;
11377 if (current_tune->sched_adjust_cost != NULL)
11379 if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost))
11380 return cost;
11383 /* XXX Is this strictly true? */
11384 if (dep_type == REG_DEP_ANTI
11385 || dep_type == REG_DEP_OUTPUT)
11386 return 0;
11388 /* Call insns don't incur a stall, even if they follow a load. */
11389 if (dep_type == 0
11390 && CALL_P (insn))
11391 return 1;
11393 if ((i_pat = single_set (insn)) != NULL
11394 && MEM_P (SET_SRC (i_pat))
11395 && (d_pat = single_set (dep)) != NULL
11396 && MEM_P (SET_DEST (d_pat)))
11398 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11399 /* This is a load after a store, there is no conflict if the load reads
11400 from a cached area. Assume that loads from the stack, and from the
11401 constant pool are cached, and that others will miss. This is a
11402 hack. */
11404 if ((GET_CODE (src_mem) == SYMBOL_REF
11405 && CONSTANT_POOL_ADDRESS_P (src_mem))
11406 || reg_mentioned_p (stack_pointer_rtx, src_mem)
11407 || reg_mentioned_p (frame_pointer_rtx, src_mem)
11408 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11409 return 1;
11412 return cost;
11416 arm_max_conditional_execute (void)
11418 return max_insns_skipped;
11421 static int
11422 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11424 if (TARGET_32BIT)
11425 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
11426 else
11427 return (optimize > 0) ? 2 : 0;
11430 static int
11431 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
11433 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11436 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11437 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11438 sequences of non-executed instructions in IT blocks probably take the same
11439 amount of time as executed instructions (and the IT instruction itself takes
11440 space in icache). This function was experimentally determined to give good
11441 results on a popular embedded benchmark. */
11443 static int
11444 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
11446 return (TARGET_32BIT && speed_p) ? 1
11447 : arm_default_branch_cost (speed_p, predictable_p);
11450 static int
11451 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
11453 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11456 static bool fp_consts_inited = false;
11458 static REAL_VALUE_TYPE value_fp0;
11460 static void
11461 init_fp_table (void)
11463 REAL_VALUE_TYPE r;
11465 r = REAL_VALUE_ATOF ("0", DFmode);
11466 value_fp0 = r;
11467 fp_consts_inited = true;
11470 /* Return TRUE if rtx X is a valid immediate FP constant. */
11472 arm_const_double_rtx (rtx x)
11474 const REAL_VALUE_TYPE *r;
11476 if (!fp_consts_inited)
11477 init_fp_table ();
11479 r = CONST_DOUBLE_REAL_VALUE (x);
11480 if (REAL_VALUE_MINUS_ZERO (*r))
11481 return 0;
11483 if (real_equal (r, &value_fp0))
11484 return 1;
11486 return 0;
11489 /* VFPv3 has a fairly wide range of representable immediates, formed from
11490 "quarter-precision" floating-point values. These can be evaluated using this
11491 formula (with ^ for exponentiation):
11493 -1^s * n * 2^-r
11495 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11496 16 <= n <= 31 and 0 <= r <= 7.
11498 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11500 - A (most-significant) is the sign bit.
11501 - BCD are the exponent (encoded as r XOR 3).
11502 - EFGH are the mantissa (encoded as n - 16).
11505 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11506 fconst[sd] instruction, or -1 if X isn't suitable. */
11507 static int
11508 vfp3_const_double_index (rtx x)
11510 REAL_VALUE_TYPE r, m;
11511 int sign, exponent;
11512 unsigned HOST_WIDE_INT mantissa, mant_hi;
11513 unsigned HOST_WIDE_INT mask;
11514 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
11515 bool fail;
11517 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
11518 return -1;
11520 r = *CONST_DOUBLE_REAL_VALUE (x);
11522 /* We can't represent these things, so detect them first. */
11523 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
11524 return -1;
11526 /* Extract sign, exponent and mantissa. */
11527 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
11528 r = real_value_abs (&r);
11529 exponent = REAL_EXP (&r);
11530 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11531 highest (sign) bit, with a fixed binary point at bit point_pos.
11532 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11533 bits for the mantissa, this may fail (low bits would be lost). */
11534 real_ldexp (&m, &r, point_pos - exponent);
11535 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
11536 mantissa = w.elt (0);
11537 mant_hi = w.elt (1);
11539 /* If there are bits set in the low part of the mantissa, we can't
11540 represent this value. */
11541 if (mantissa != 0)
11542 return -1;
11544 /* Now make it so that mantissa contains the most-significant bits, and move
11545 the point_pos to indicate that the least-significant bits have been
11546 discarded. */
11547 point_pos -= HOST_BITS_PER_WIDE_INT;
11548 mantissa = mant_hi;
11550 /* We can permit four significant bits of mantissa only, plus a high bit
11551 which is always 1. */
11552 mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
11553 if ((mantissa & mask) != 0)
11554 return -1;
11556 /* Now we know the mantissa is in range, chop off the unneeded bits. */
11557 mantissa >>= point_pos - 5;
11559 /* The mantissa may be zero. Disallow that case. (It's possible to load the
11560 floating-point immediate zero with Neon using an integer-zero load, but
11561 that case is handled elsewhere.) */
11562 if (mantissa == 0)
11563 return -1;
11565 gcc_assert (mantissa >= 16 && mantissa <= 31);
11567 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11568 normalized significands are in the range [1, 2). (Our mantissa is shifted
11569 left 4 places at this point relative to normalized IEEE754 values). GCC
11570 internally uses [0.5, 1) (see real.c), so the exponent returned from
11571 REAL_EXP must be altered. */
11572 exponent = 5 - exponent;
11574 if (exponent < 0 || exponent > 7)
11575 return -1;
11577 /* Sign, mantissa and exponent are now in the correct form to plug into the
11578 formula described in the comment above. */
11579 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
11582 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
11584 vfp3_const_double_rtx (rtx x)
11586 if (!TARGET_VFP3)
11587 return 0;
11589 return vfp3_const_double_index (x) != -1;
11592 /* Recognize immediates which can be used in various Neon instructions. Legal
11593 immediates are described by the following table (for VMVN variants, the
11594 bitwise inverse of the constant shown is recognized. In either case, VMOV
11595 is output and the correct instruction to use for a given constant is chosen
11596 by the assembler). The constant shown is replicated across all elements of
11597 the destination vector.
11599 insn elems variant constant (binary)
11600 ---- ----- ------- -----------------
11601 vmov i32 0 00000000 00000000 00000000 abcdefgh
11602 vmov i32 1 00000000 00000000 abcdefgh 00000000
11603 vmov i32 2 00000000 abcdefgh 00000000 00000000
11604 vmov i32 3 abcdefgh 00000000 00000000 00000000
11605 vmov i16 4 00000000 abcdefgh
11606 vmov i16 5 abcdefgh 00000000
11607 vmvn i32 6 00000000 00000000 00000000 abcdefgh
11608 vmvn i32 7 00000000 00000000 abcdefgh 00000000
11609 vmvn i32 8 00000000 abcdefgh 00000000 00000000
11610 vmvn i32 9 abcdefgh 00000000 00000000 00000000
11611 vmvn i16 10 00000000 abcdefgh
11612 vmvn i16 11 abcdefgh 00000000
11613 vmov i32 12 00000000 00000000 abcdefgh 11111111
11614 vmvn i32 13 00000000 00000000 abcdefgh 11111111
11615 vmov i32 14 00000000 abcdefgh 11111111 11111111
11616 vmvn i32 15 00000000 abcdefgh 11111111 11111111
11617 vmov i8 16 abcdefgh
11618 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
11619 eeeeeeee ffffffff gggggggg hhhhhhhh
11620 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
11621 vmov f32 19 00000000 00000000 00000000 00000000
11623 For case 18, B = !b. Representable values are exactly those accepted by
11624 vfp3_const_double_index, but are output as floating-point numbers rather
11625 than indices.
11627 For case 19, we will change it to vmov.i32 when assembling.
11629 Variants 0-5 (inclusive) may also be used as immediates for the second
11630 operand of VORR/VBIC instructions.
11632 The INVERSE argument causes the bitwise inverse of the given operand to be
11633 recognized instead (used for recognizing legal immediates for the VAND/VORN
11634 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11635 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11636 output, rather than the real insns vbic/vorr).
11638 INVERSE makes no difference to the recognition of float vectors.
11640 The return value is the variant of immediate as shown in the above table, or
11641 -1 if the given value doesn't match any of the listed patterns.
11643 static int
11644 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
11645 rtx *modconst, int *elementwidth)
11647 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
11648 matches = 1; \
11649 for (i = 0; i < idx; i += (STRIDE)) \
11650 if (!(TEST)) \
11651 matches = 0; \
11652 if (matches) \
11654 immtype = (CLASS); \
11655 elsize = (ELSIZE); \
11656 break; \
11659 unsigned int i, elsize = 0, idx = 0, n_elts;
11660 unsigned int innersize;
11661 unsigned char bytes[16];
11662 int immtype = -1, matches;
11663 unsigned int invmask = inverse ? 0xff : 0;
11664 bool vector = GET_CODE (op) == CONST_VECTOR;
11666 if (vector)
11667 n_elts = CONST_VECTOR_NUNITS (op);
11668 else
11670 n_elts = 1;
11671 if (mode == VOIDmode)
11672 mode = DImode;
11675 innersize = GET_MODE_UNIT_SIZE (mode);
11677 /* Vectors of float constants. */
11678 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
11680 rtx el0 = CONST_VECTOR_ELT (op, 0);
11682 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
11683 return -1;
11685 /* FP16 vectors cannot be represented. */
11686 if (GET_MODE_INNER (mode) == HFmode)
11687 return -1;
11689 /* All elements in the vector must be the same. Note that 0.0 and -0.0
11690 are distinct in this context. */
11691 if (!const_vec_duplicate_p (op))
11692 return -1;
11694 if (modconst)
11695 *modconst = CONST_VECTOR_ELT (op, 0);
11697 if (elementwidth)
11698 *elementwidth = 0;
11700 if (el0 == CONST0_RTX (GET_MODE (el0)))
11701 return 19;
11702 else
11703 return 18;
11706 /* The tricks done in the code below apply for little-endian vector layout.
11707 For big-endian vectors only allow vectors of the form { a, a, a..., a }.
11708 FIXME: Implement logic for big-endian vectors. */
11709 if (BYTES_BIG_ENDIAN && vector && !const_vec_duplicate_p (op))
11710 return -1;
11712 /* Splat vector constant out into a byte vector. */
11713 for (i = 0; i < n_elts; i++)
11715 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
11716 unsigned HOST_WIDE_INT elpart;
11718 gcc_assert (CONST_INT_P (el));
11719 elpart = INTVAL (el);
11721 for (unsigned int byte = 0; byte < innersize; byte++)
11723 bytes[idx++] = (elpart & 0xff) ^ invmask;
11724 elpart >>= BITS_PER_UNIT;
11728 /* Sanity check. */
11729 gcc_assert (idx == GET_MODE_SIZE (mode));
11733 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
11734 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11736 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11737 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11739 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
11740 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11742 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
11743 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
11745 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
11747 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
11749 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
11750 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11752 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11753 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11755 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
11756 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11758 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
11759 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
11761 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
11763 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
11765 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11766 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11768 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11769 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11771 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
11772 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11774 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
11775 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11777 CHECK (1, 8, 16, bytes[i] == bytes[0]);
11779 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
11780 && bytes[i] == bytes[(i + 8) % idx]);
11782 while (0);
11784 if (immtype == -1)
11785 return -1;
11787 if (elementwidth)
11788 *elementwidth = elsize;
11790 if (modconst)
11792 unsigned HOST_WIDE_INT imm = 0;
11794 /* Un-invert bytes of recognized vector, if necessary. */
11795 if (invmask != 0)
11796 for (i = 0; i < idx; i++)
11797 bytes[i] ^= invmask;
11799 if (immtype == 17)
11801 /* FIXME: Broken on 32-bit H_W_I hosts. */
11802 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
11804 for (i = 0; i < 8; i++)
11805 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
11806 << (i * BITS_PER_UNIT);
11808 *modconst = GEN_INT (imm);
11810 else
11812 unsigned HOST_WIDE_INT imm = 0;
11814 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
11815 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
11817 *modconst = GEN_INT (imm);
11821 return immtype;
11822 #undef CHECK
11825 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
11826 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
11827 float elements), and a modified constant (whatever should be output for a
11828 VMOV) in *MODCONST. */
11831 neon_immediate_valid_for_move (rtx op, machine_mode mode,
11832 rtx *modconst, int *elementwidth)
11834 rtx tmpconst;
11835 int tmpwidth;
11836 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
11838 if (retval == -1)
11839 return 0;
11841 if (modconst)
11842 *modconst = tmpconst;
11844 if (elementwidth)
11845 *elementwidth = tmpwidth;
11847 return 1;
11850 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
11851 the immediate is valid, write a constant suitable for using as an operand
11852 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
11853 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
11856 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
11857 rtx *modconst, int *elementwidth)
11859 rtx tmpconst;
11860 int tmpwidth;
11861 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
11863 if (retval < 0 || retval > 5)
11864 return 0;
11866 if (modconst)
11867 *modconst = tmpconst;
11869 if (elementwidth)
11870 *elementwidth = tmpwidth;
11872 return 1;
11875 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
11876 the immediate is valid, write a constant suitable for using as an operand
11877 to VSHR/VSHL to *MODCONST and the corresponding element width to
11878 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
11879 because they have different limitations. */
11882 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
11883 rtx *modconst, int *elementwidth,
11884 bool isleftshift)
11886 unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
11887 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
11888 unsigned HOST_WIDE_INT last_elt = 0;
11889 unsigned HOST_WIDE_INT maxshift;
11891 /* Split vector constant out into a byte vector. */
11892 for (i = 0; i < n_elts; i++)
11894 rtx el = CONST_VECTOR_ELT (op, i);
11895 unsigned HOST_WIDE_INT elpart;
11897 if (CONST_INT_P (el))
11898 elpart = INTVAL (el);
11899 else if (CONST_DOUBLE_P (el))
11900 return 0;
11901 else
11902 gcc_unreachable ();
11904 if (i != 0 && elpart != last_elt)
11905 return 0;
11907 last_elt = elpart;
11910 /* Shift less than element size. */
11911 maxshift = innersize * 8;
11913 if (isleftshift)
11915 /* Left shift immediate value can be from 0 to <size>-1. */
11916 if (last_elt >= maxshift)
11917 return 0;
11919 else
11921 /* Right shift immediate value can be from 1 to <size>. */
11922 if (last_elt == 0 || last_elt > maxshift)
11923 return 0;
11926 if (elementwidth)
11927 *elementwidth = innersize * 8;
11929 if (modconst)
11930 *modconst = CONST_VECTOR_ELT (op, 0);
11932 return 1;
11935 /* Return a string suitable for output of Neon immediate logic operation
11936 MNEM. */
11938 char *
11939 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
11940 int inverse, int quad)
11942 int width, is_valid;
11943 static char templ[40];
11945 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
11947 gcc_assert (is_valid != 0);
11949 if (quad)
11950 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
11951 else
11952 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
11954 return templ;
11957 /* Return a string suitable for output of Neon immediate shift operation
11958 (VSHR or VSHL) MNEM. */
11960 char *
11961 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
11962 machine_mode mode, int quad,
11963 bool isleftshift)
11965 int width, is_valid;
11966 static char templ[40];
11968 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
11969 gcc_assert (is_valid != 0);
11971 if (quad)
11972 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
11973 else
11974 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
11976 return templ;
11979 /* Output a sequence of pairwise operations to implement a reduction.
11980 NOTE: We do "too much work" here, because pairwise operations work on two
11981 registers-worth of operands in one go. Unfortunately we can't exploit those
11982 extra calculations to do the full operation in fewer steps, I don't think.
11983 Although all vector elements of the result but the first are ignored, we
11984 actually calculate the same result in each of the elements. An alternative
11985 such as initially loading a vector with zero to use as each of the second
11986 operands would use up an additional register and take an extra instruction,
11987 for no particular gain. */
11989 void
11990 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
11991 rtx (*reduc) (rtx, rtx, rtx))
11993 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
11994 rtx tmpsum = op1;
11996 for (i = parts / 2; i >= 1; i /= 2)
11998 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
11999 emit_insn (reduc (dest, tmpsum, tmpsum));
12000 tmpsum = dest;
12004 /* If VALS is a vector constant that can be loaded into a register
12005 using VDUP, generate instructions to do so and return an RTX to
12006 assign to the register. Otherwise return NULL_RTX. */
12008 static rtx
12009 neon_vdup_constant (rtx vals)
12011 machine_mode mode = GET_MODE (vals);
12012 machine_mode inner_mode = GET_MODE_INNER (mode);
12013 rtx x;
12015 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12016 return NULL_RTX;
12018 if (!const_vec_duplicate_p (vals, &x))
12019 /* The elements are not all the same. We could handle repeating
12020 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12021 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12022 vdup.i16). */
12023 return NULL_RTX;
12025 /* We can load this constant by using VDUP and a constant in a
12026 single ARM register. This will be cheaper than a vector
12027 load. */
12029 x = copy_to_mode_reg (inner_mode, x);
12030 return gen_rtx_VEC_DUPLICATE (mode, x);
12033 /* Generate code to load VALS, which is a PARALLEL containing only
12034 constants (for vec_init) or CONST_VECTOR, efficiently into a
12035 register. Returns an RTX to copy into the register, or NULL_RTX
12036 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12039 neon_make_constant (rtx vals)
12041 machine_mode mode = GET_MODE (vals);
12042 rtx target;
12043 rtx const_vec = NULL_RTX;
12044 int n_elts = GET_MODE_NUNITS (mode);
12045 int n_const = 0;
12046 int i;
12048 if (GET_CODE (vals) == CONST_VECTOR)
12049 const_vec = vals;
12050 else if (GET_CODE (vals) == PARALLEL)
12052 /* A CONST_VECTOR must contain only CONST_INTs and
12053 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12054 Only store valid constants in a CONST_VECTOR. */
12055 for (i = 0; i < n_elts; ++i)
12057 rtx x = XVECEXP (vals, 0, i);
12058 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12059 n_const++;
12061 if (n_const == n_elts)
12062 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12064 else
12065 gcc_unreachable ();
12067 if (const_vec != NULL
12068 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12069 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12070 return const_vec;
12071 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12072 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12073 pipeline cycle; creating the constant takes one or two ARM
12074 pipeline cycles. */
12075 return target;
12076 else if (const_vec != NULL_RTX)
12077 /* Load from constant pool. On Cortex-A8 this takes two cycles
12078 (for either double or quad vectors). We can not take advantage
12079 of single-cycle VLD1 because we need a PC-relative addressing
12080 mode. */
12081 return const_vec;
12082 else
12083 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12084 We can not construct an initializer. */
12085 return NULL_RTX;
12088 /* Initialize vector TARGET to VALS. */
12090 void
12091 neon_expand_vector_init (rtx target, rtx vals)
12093 machine_mode mode = GET_MODE (target);
12094 machine_mode inner_mode = GET_MODE_INNER (mode);
12095 int n_elts = GET_MODE_NUNITS (mode);
12096 int n_var = 0, one_var = -1;
12097 bool all_same = true;
12098 rtx x, mem;
12099 int i;
12101 for (i = 0; i < n_elts; ++i)
12103 x = XVECEXP (vals, 0, i);
12104 if (!CONSTANT_P (x))
12105 ++n_var, one_var = i;
12107 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12108 all_same = false;
12111 if (n_var == 0)
12113 rtx constant = neon_make_constant (vals);
12114 if (constant != NULL_RTX)
12116 emit_move_insn (target, constant);
12117 return;
12121 /* Splat a single non-constant element if we can. */
12122 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12124 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12125 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
12126 return;
12129 /* One field is non-constant. Load constant then overwrite varying
12130 field. This is more efficient than using the stack. */
12131 if (n_var == 1)
12133 rtx copy = copy_rtx (vals);
12134 rtx index = GEN_INT (one_var);
12136 /* Load constant part of vector, substitute neighboring value for
12137 varying element. */
12138 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12139 neon_expand_vector_init (target, copy);
12141 /* Insert variable. */
12142 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12143 switch (mode)
12145 case V8QImode:
12146 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12147 break;
12148 case V16QImode:
12149 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12150 break;
12151 case V4HImode:
12152 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12153 break;
12154 case V8HImode:
12155 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12156 break;
12157 case V2SImode:
12158 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12159 break;
12160 case V4SImode:
12161 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12162 break;
12163 case V2SFmode:
12164 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12165 break;
12166 case V4SFmode:
12167 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12168 break;
12169 case V2DImode:
12170 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12171 break;
12172 default:
12173 gcc_unreachable ();
12175 return;
12178 /* Construct the vector in memory one field at a time
12179 and load the whole vector. */
12180 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12181 for (i = 0; i < n_elts; i++)
12182 emit_move_insn (adjust_address_nv (mem, inner_mode,
12183 i * GET_MODE_SIZE (inner_mode)),
12184 XVECEXP (vals, 0, i));
12185 emit_move_insn (target, mem);
12188 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12189 ERR if it doesn't. EXP indicates the source location, which includes the
12190 inlining history for intrinsics. */
12192 static void
12193 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12194 const_tree exp, const char *desc)
12196 HOST_WIDE_INT lane;
12198 gcc_assert (CONST_INT_P (operand));
12200 lane = INTVAL (operand);
12202 if (lane < low || lane >= high)
12204 if (exp)
12205 error ("%K%s %wd out of range %wd - %wd",
12206 exp, desc, lane, low, high - 1);
12207 else
12208 error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
12212 /* Bounds-check lanes. */
12214 void
12215 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12216 const_tree exp)
12218 bounds_check (operand, low, high, exp, "lane");
12221 /* Bounds-check constants. */
12223 void
12224 arm_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12226 bounds_check (operand, low, high, NULL_TREE, "constant");
12229 HOST_WIDE_INT
12230 neon_element_bits (machine_mode mode)
12232 return GET_MODE_UNIT_BITSIZE (mode);
12236 /* Predicates for `match_operand' and `match_operator'. */
12238 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12239 WB is true if full writeback address modes are allowed and is false
12240 if limited writeback address modes (POST_INC and PRE_DEC) are
12241 allowed. */
12244 arm_coproc_mem_operand (rtx op, bool wb)
12246 rtx ind;
12248 /* Reject eliminable registers. */
12249 if (! (reload_in_progress || reload_completed || lra_in_progress)
12250 && ( reg_mentioned_p (frame_pointer_rtx, op)
12251 || reg_mentioned_p (arg_pointer_rtx, op)
12252 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12253 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12254 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12255 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12256 return FALSE;
12258 /* Constants are converted into offsets from labels. */
12259 if (!MEM_P (op))
12260 return FALSE;
12262 ind = XEXP (op, 0);
12264 if (reload_completed
12265 && (GET_CODE (ind) == LABEL_REF
12266 || (GET_CODE (ind) == CONST
12267 && GET_CODE (XEXP (ind, 0)) == PLUS
12268 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12269 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12270 return TRUE;
12272 /* Match: (mem (reg)). */
12273 if (REG_P (ind))
12274 return arm_address_register_rtx_p (ind, 0);
12276 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12277 acceptable in any case (subject to verification by
12278 arm_address_register_rtx_p). We need WB to be true to accept
12279 PRE_INC and POST_DEC. */
12280 if (GET_CODE (ind) == POST_INC
12281 || GET_CODE (ind) == PRE_DEC
12282 || (wb
12283 && (GET_CODE (ind) == PRE_INC
12284 || GET_CODE (ind) == POST_DEC)))
12285 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12287 if (wb
12288 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12289 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12290 && GET_CODE (XEXP (ind, 1)) == PLUS
12291 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12292 ind = XEXP (ind, 1);
12294 /* Match:
12295 (plus (reg)
12296 (const)). */
12297 if (GET_CODE (ind) == PLUS
12298 && REG_P (XEXP (ind, 0))
12299 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12300 && CONST_INT_P (XEXP (ind, 1))
12301 && INTVAL (XEXP (ind, 1)) > -1024
12302 && INTVAL (XEXP (ind, 1)) < 1024
12303 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12304 return TRUE;
12306 return FALSE;
12309 /* Return TRUE if OP is a memory operand which we can load or store a vector
12310 to/from. TYPE is one of the following values:
12311 0 - Vector load/stor (vldr)
12312 1 - Core registers (ldm)
12313 2 - Element/structure loads (vld1)
12316 neon_vector_mem_operand (rtx op, int type, bool strict)
12318 rtx ind;
12320 /* Reject eliminable registers. */
12321 if (strict && ! (reload_in_progress || reload_completed)
12322 && (reg_mentioned_p (frame_pointer_rtx, op)
12323 || reg_mentioned_p (arg_pointer_rtx, op)
12324 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12325 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12326 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12327 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12328 return FALSE;
12330 /* Constants are converted into offsets from labels. */
12331 if (!MEM_P (op))
12332 return FALSE;
12334 ind = XEXP (op, 0);
12336 if (reload_completed
12337 && (GET_CODE (ind) == LABEL_REF
12338 || (GET_CODE (ind) == CONST
12339 && GET_CODE (XEXP (ind, 0)) == PLUS
12340 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12341 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12342 return TRUE;
12344 /* Match: (mem (reg)). */
12345 if (REG_P (ind))
12346 return arm_address_register_rtx_p (ind, 0);
12348 /* Allow post-increment with Neon registers. */
12349 if ((type != 1 && GET_CODE (ind) == POST_INC)
12350 || (type == 0 && GET_CODE (ind) == PRE_DEC))
12351 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12353 /* Allow post-increment by register for VLDn */
12354 if (type == 2 && GET_CODE (ind) == POST_MODIFY
12355 && GET_CODE (XEXP (ind, 1)) == PLUS
12356 && REG_P (XEXP (XEXP (ind, 1), 1)))
12357 return true;
12359 /* Match:
12360 (plus (reg)
12361 (const)). */
12362 if (type == 0
12363 && GET_CODE (ind) == PLUS
12364 && REG_P (XEXP (ind, 0))
12365 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12366 && CONST_INT_P (XEXP (ind, 1))
12367 && INTVAL (XEXP (ind, 1)) > -1024
12368 /* For quad modes, we restrict the constant offset to be slightly less
12369 than what the instruction format permits. We have no such constraint
12370 on double mode offsets. (This must match arm_legitimate_index_p.) */
12371 && (INTVAL (XEXP (ind, 1))
12372 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12373 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12374 return TRUE;
12376 return FALSE;
12379 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12380 type. */
12382 neon_struct_mem_operand (rtx op)
12384 rtx ind;
12386 /* Reject eliminable registers. */
12387 if (! (reload_in_progress || reload_completed)
12388 && ( reg_mentioned_p (frame_pointer_rtx, op)
12389 || reg_mentioned_p (arg_pointer_rtx, op)
12390 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12391 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12392 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12393 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12394 return FALSE;
12396 /* Constants are converted into offsets from labels. */
12397 if (!MEM_P (op))
12398 return FALSE;
12400 ind = XEXP (op, 0);
12402 if (reload_completed
12403 && (GET_CODE (ind) == LABEL_REF
12404 || (GET_CODE (ind) == CONST
12405 && GET_CODE (XEXP (ind, 0)) == PLUS
12406 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12407 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12408 return TRUE;
12410 /* Match: (mem (reg)). */
12411 if (REG_P (ind))
12412 return arm_address_register_rtx_p (ind, 0);
12414 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12415 if (GET_CODE (ind) == POST_INC
12416 || GET_CODE (ind) == PRE_DEC)
12417 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12419 return FALSE;
12422 /* Return true if X is a register that will be eliminated later on. */
12424 arm_eliminable_register (rtx x)
12426 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
12427 || REGNO (x) == ARG_POINTER_REGNUM
12428 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
12429 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
12432 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12433 coprocessor registers. Otherwise return NO_REGS. */
12435 enum reg_class
12436 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
12438 if (mode == HFmode)
12440 if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
12441 return GENERAL_REGS;
12442 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
12443 return NO_REGS;
12444 return GENERAL_REGS;
12447 /* The neon move patterns handle all legitimate vector and struct
12448 addresses. */
12449 if (TARGET_NEON
12450 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
12451 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
12452 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
12453 || VALID_NEON_STRUCT_MODE (mode)))
12454 return NO_REGS;
12456 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
12457 return NO_REGS;
12459 return GENERAL_REGS;
12462 /* Values which must be returned in the most-significant end of the return
12463 register. */
12465 static bool
12466 arm_return_in_msb (const_tree valtype)
12468 return (TARGET_AAPCS_BASED
12469 && BYTES_BIG_ENDIAN
12470 && (AGGREGATE_TYPE_P (valtype)
12471 || TREE_CODE (valtype) == COMPLEX_TYPE
12472 || FIXED_POINT_TYPE_P (valtype)));
12475 /* Return TRUE if X references a SYMBOL_REF. */
12477 symbol_mentioned_p (rtx x)
12479 const char * fmt;
12480 int i;
12482 if (GET_CODE (x) == SYMBOL_REF)
12483 return 1;
12485 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12486 are constant offsets, not symbols. */
12487 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12488 return 0;
12490 fmt = GET_RTX_FORMAT (GET_CODE (x));
12492 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12494 if (fmt[i] == 'E')
12496 int j;
12498 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12499 if (symbol_mentioned_p (XVECEXP (x, i, j)))
12500 return 1;
12502 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
12503 return 1;
12506 return 0;
12509 /* Return TRUE if X references a LABEL_REF. */
12511 label_mentioned_p (rtx x)
12513 const char * fmt;
12514 int i;
12516 if (GET_CODE (x) == LABEL_REF)
12517 return 1;
12519 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12520 instruction, but they are constant offsets, not symbols. */
12521 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12522 return 0;
12524 fmt = GET_RTX_FORMAT (GET_CODE (x));
12525 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12527 if (fmt[i] == 'E')
12529 int j;
12531 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12532 if (label_mentioned_p (XVECEXP (x, i, j)))
12533 return 1;
12535 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
12536 return 1;
12539 return 0;
12543 tls_mentioned_p (rtx x)
12545 switch (GET_CODE (x))
12547 case CONST:
12548 return tls_mentioned_p (XEXP (x, 0));
12550 case UNSPEC:
12551 if (XINT (x, 1) == UNSPEC_TLS)
12552 return 1;
12554 /* Fall through. */
12555 default:
12556 return 0;
12560 /* Must not copy any rtx that uses a pc-relative address.
12561 Also, disallow copying of load-exclusive instructions that
12562 may appear after splitting of compare-and-swap-style operations
12563 so as to prevent those loops from being transformed away from their
12564 canonical forms (see PR 69904). */
12566 static bool
12567 arm_cannot_copy_insn_p (rtx_insn *insn)
12569 /* The tls call insn cannot be copied, as it is paired with a data
12570 word. */
12571 if (recog_memoized (insn) == CODE_FOR_tlscall)
12572 return true;
12574 subrtx_iterator::array_type array;
12575 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
12577 const_rtx x = *iter;
12578 if (GET_CODE (x) == UNSPEC
12579 && (XINT (x, 1) == UNSPEC_PIC_BASE
12580 || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
12581 return true;
12584 rtx set = single_set (insn);
12585 if (set)
12587 rtx src = SET_SRC (set);
12588 if (GET_CODE (src) == ZERO_EXTEND)
12589 src = XEXP (src, 0);
12591 /* Catch the load-exclusive and load-acquire operations. */
12592 if (GET_CODE (src) == UNSPEC_VOLATILE
12593 && (XINT (src, 1) == VUNSPEC_LL
12594 || XINT (src, 1) == VUNSPEC_LAX))
12595 return true;
12597 return false;
12600 enum rtx_code
12601 minmax_code (rtx x)
12603 enum rtx_code code = GET_CODE (x);
12605 switch (code)
12607 case SMAX:
12608 return GE;
12609 case SMIN:
12610 return LE;
12611 case UMIN:
12612 return LEU;
12613 case UMAX:
12614 return GEU;
12615 default:
12616 gcc_unreachable ();
12620 /* Match pair of min/max operators that can be implemented via usat/ssat. */
12622 bool
12623 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
12624 int *mask, bool *signed_sat)
12626 /* The high bound must be a power of two minus one. */
12627 int log = exact_log2 (INTVAL (hi_bound) + 1);
12628 if (log == -1)
12629 return false;
12631 /* The low bound is either zero (for usat) or one less than the
12632 negation of the high bound (for ssat). */
12633 if (INTVAL (lo_bound) == 0)
12635 if (mask)
12636 *mask = log;
12637 if (signed_sat)
12638 *signed_sat = false;
12640 return true;
12643 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
12645 if (mask)
12646 *mask = log + 1;
12647 if (signed_sat)
12648 *signed_sat = true;
12650 return true;
12653 return false;
12656 /* Return 1 if memory locations are adjacent. */
12658 adjacent_mem_locations (rtx a, rtx b)
12660 /* We don't guarantee to preserve the order of these memory refs. */
12661 if (volatile_refs_p (a) || volatile_refs_p (b))
12662 return 0;
12664 if ((REG_P (XEXP (a, 0))
12665 || (GET_CODE (XEXP (a, 0)) == PLUS
12666 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
12667 && (REG_P (XEXP (b, 0))
12668 || (GET_CODE (XEXP (b, 0)) == PLUS
12669 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
12671 HOST_WIDE_INT val0 = 0, val1 = 0;
12672 rtx reg0, reg1;
12673 int val_diff;
12675 if (GET_CODE (XEXP (a, 0)) == PLUS)
12677 reg0 = XEXP (XEXP (a, 0), 0);
12678 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
12680 else
12681 reg0 = XEXP (a, 0);
12683 if (GET_CODE (XEXP (b, 0)) == PLUS)
12685 reg1 = XEXP (XEXP (b, 0), 0);
12686 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
12688 else
12689 reg1 = XEXP (b, 0);
12691 /* Don't accept any offset that will require multiple
12692 instructions to handle, since this would cause the
12693 arith_adjacentmem pattern to output an overlong sequence. */
12694 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
12695 return 0;
12697 /* Don't allow an eliminable register: register elimination can make
12698 the offset too large. */
12699 if (arm_eliminable_register (reg0))
12700 return 0;
12702 val_diff = val1 - val0;
12704 if (arm_ld_sched)
12706 /* If the target has load delay slots, then there's no benefit
12707 to using an ldm instruction unless the offset is zero and
12708 we are optimizing for size. */
12709 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
12710 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
12711 && (val_diff == 4 || val_diff == -4));
12714 return ((REGNO (reg0) == REGNO (reg1))
12715 && (val_diff == 4 || val_diff == -4));
12718 return 0;
12721 /* Return true if OP is a valid load or store multiple operation. LOAD is true
12722 for load operations, false for store operations. CONSECUTIVE is true
12723 if the register numbers in the operation must be consecutive in the register
12724 bank. RETURN_PC is true if value is to be loaded in PC.
12725 The pattern we are trying to match for load is:
12726 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
12727 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
12730 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
12732 where
12733 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
12734 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
12735 3. If consecutive is TRUE, then for kth register being loaded,
12736 REGNO (R_dk) = REGNO (R_d0) + k.
12737 The pattern for store is similar. */
12738 bool
12739 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
12740 bool consecutive, bool return_pc)
12742 HOST_WIDE_INT count = XVECLEN (op, 0);
12743 rtx reg, mem, addr;
12744 unsigned regno;
12745 unsigned first_regno;
12746 HOST_WIDE_INT i = 1, base = 0, offset = 0;
12747 rtx elt;
12748 bool addr_reg_in_reglist = false;
12749 bool update = false;
12750 int reg_increment;
12751 int offset_adj;
12752 int regs_per_val;
12754 /* If not in SImode, then registers must be consecutive
12755 (e.g., VLDM instructions for DFmode). */
12756 gcc_assert ((mode == SImode) || consecutive);
12757 /* Setting return_pc for stores is illegal. */
12758 gcc_assert (!return_pc || load);
12760 /* Set up the increments and the regs per val based on the mode. */
12761 reg_increment = GET_MODE_SIZE (mode);
12762 regs_per_val = reg_increment / 4;
12763 offset_adj = return_pc ? 1 : 0;
12765 if (count <= 1
12766 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
12767 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
12768 return false;
12770 /* Check if this is a write-back. */
12771 elt = XVECEXP (op, 0, offset_adj);
12772 if (GET_CODE (SET_SRC (elt)) == PLUS)
12774 i++;
12775 base = 1;
12776 update = true;
12778 /* The offset adjustment must be the number of registers being
12779 popped times the size of a single register. */
12780 if (!REG_P (SET_DEST (elt))
12781 || !REG_P (XEXP (SET_SRC (elt), 0))
12782 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
12783 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
12784 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
12785 ((count - 1 - offset_adj) * reg_increment))
12786 return false;
12789 i = i + offset_adj;
12790 base = base + offset_adj;
12791 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
12792 success depends on the type: VLDM can do just one reg,
12793 LDM must do at least two. */
12794 if ((count <= i) && (mode == SImode))
12795 return false;
12797 elt = XVECEXP (op, 0, i - 1);
12798 if (GET_CODE (elt) != SET)
12799 return false;
12801 if (load)
12803 reg = SET_DEST (elt);
12804 mem = SET_SRC (elt);
12806 else
12808 reg = SET_SRC (elt);
12809 mem = SET_DEST (elt);
12812 if (!REG_P (reg) || !MEM_P (mem))
12813 return false;
12815 regno = REGNO (reg);
12816 first_regno = regno;
12817 addr = XEXP (mem, 0);
12818 if (GET_CODE (addr) == PLUS)
12820 if (!CONST_INT_P (XEXP (addr, 1)))
12821 return false;
12823 offset = INTVAL (XEXP (addr, 1));
12824 addr = XEXP (addr, 0);
12827 if (!REG_P (addr))
12828 return false;
12830 /* Don't allow SP to be loaded unless it is also the base register. It
12831 guarantees that SP is reset correctly when an LDM instruction
12832 is interrupted. Otherwise, we might end up with a corrupt stack. */
12833 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
12834 return false;
12836 for (; i < count; i++)
12838 elt = XVECEXP (op, 0, i);
12839 if (GET_CODE (elt) != SET)
12840 return false;
12842 if (load)
12844 reg = SET_DEST (elt);
12845 mem = SET_SRC (elt);
12847 else
12849 reg = SET_SRC (elt);
12850 mem = SET_DEST (elt);
12853 if (!REG_P (reg)
12854 || GET_MODE (reg) != mode
12855 || REGNO (reg) <= regno
12856 || (consecutive
12857 && (REGNO (reg) !=
12858 (unsigned int) (first_regno + regs_per_val * (i - base))))
12859 /* Don't allow SP to be loaded unless it is also the base register. It
12860 guarantees that SP is reset correctly when an LDM instruction
12861 is interrupted. Otherwise, we might end up with a corrupt stack. */
12862 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
12863 || !MEM_P (mem)
12864 || GET_MODE (mem) != mode
12865 || ((GET_CODE (XEXP (mem, 0)) != PLUS
12866 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
12867 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
12868 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
12869 offset + (i - base) * reg_increment))
12870 && (!REG_P (XEXP (mem, 0))
12871 || offset + (i - base) * reg_increment != 0)))
12872 return false;
12874 regno = REGNO (reg);
12875 if (regno == REGNO (addr))
12876 addr_reg_in_reglist = true;
12879 if (load)
12881 if (update && addr_reg_in_reglist)
12882 return false;
12884 /* For Thumb-1, address register is always modified - either by write-back
12885 or by explicit load. If the pattern does not describe an update,
12886 then the address register must be in the list of loaded registers. */
12887 if (TARGET_THUMB1)
12888 return update || addr_reg_in_reglist;
12891 return true;
12894 /* Return true iff it would be profitable to turn a sequence of NOPS loads
12895 or stores (depending on IS_STORE) into a load-multiple or store-multiple
12896 instruction. ADD_OFFSET is nonzero if the base address register needs
12897 to be modified with an add instruction before we can use it. */
12899 static bool
12900 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
12901 int nops, HOST_WIDE_INT add_offset)
12903 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
12904 if the offset isn't small enough. The reason 2 ldrs are faster
12905 is because these ARMs are able to do more than one cache access
12906 in a single cycle. The ARM9 and StrongARM have Harvard caches,
12907 whilst the ARM8 has a double bandwidth cache. This means that
12908 these cores can do both an instruction fetch and a data fetch in
12909 a single cycle, so the trick of calculating the address into a
12910 scratch register (one of the result regs) and then doing a load
12911 multiple actually becomes slower (and no smaller in code size).
12912 That is the transformation
12914 ldr rd1, [rbase + offset]
12915 ldr rd2, [rbase + offset + 4]
12919 add rd1, rbase, offset
12920 ldmia rd1, {rd1, rd2}
12922 produces worse code -- '3 cycles + any stalls on rd2' instead of
12923 '2 cycles + any stalls on rd2'. On ARMs with only one cache
12924 access per cycle, the first sequence could never complete in less
12925 than 6 cycles, whereas the ldm sequence would only take 5 and
12926 would make better use of sequential accesses if not hitting the
12927 cache.
12929 We cheat here and test 'arm_ld_sched' which we currently know to
12930 only be true for the ARM8, ARM9 and StrongARM. If this ever
12931 changes, then the test below needs to be reworked. */
12932 if (nops == 2 && arm_ld_sched && add_offset != 0)
12933 return false;
12935 /* XScale has load-store double instructions, but they have stricter
12936 alignment requirements than load-store multiple, so we cannot
12937 use them.
12939 For XScale ldm requires 2 + NREGS cycles to complete and blocks
12940 the pipeline until completion.
12942 NREGS CYCLES
12948 An ldr instruction takes 1-3 cycles, but does not block the
12949 pipeline.
12951 NREGS CYCLES
12952 1 1-3
12953 2 2-6
12954 3 3-9
12955 4 4-12
12957 Best case ldr will always win. However, the more ldr instructions
12958 we issue, the less likely we are to be able to schedule them well.
12959 Using ldr instructions also increases code size.
12961 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
12962 for counts of 3 or 4 regs. */
12963 if (nops <= 2 && arm_tune_xscale && !optimize_size)
12964 return false;
12965 return true;
12968 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
12969 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
12970 an array ORDER which describes the sequence to use when accessing the
12971 offsets that produces an ascending order. In this sequence, each
12972 offset must be larger by exactly 4 than the previous one. ORDER[0]
12973 must have been filled in with the lowest offset by the caller.
12974 If UNSORTED_REGS is nonnull, it is an array of register numbers that
12975 we use to verify that ORDER produces an ascending order of registers.
12976 Return true if it was possible to construct such an order, false if
12977 not. */
12979 static bool
12980 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
12981 int *unsorted_regs)
12983 int i;
12984 for (i = 1; i < nops; i++)
12986 int j;
12988 order[i] = order[i - 1];
12989 for (j = 0; j < nops; j++)
12990 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
12992 /* We must find exactly one offset that is higher than the
12993 previous one by 4. */
12994 if (order[i] != order[i - 1])
12995 return false;
12996 order[i] = j;
12998 if (order[i] == order[i - 1])
12999 return false;
13000 /* The register numbers must be ascending. */
13001 if (unsorted_regs != NULL
13002 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13003 return false;
13005 return true;
13008 /* Used to determine in a peephole whether a sequence of load
13009 instructions can be changed into a load-multiple instruction.
13010 NOPS is the number of separate load instructions we are examining. The
13011 first NOPS entries in OPERANDS are the destination registers, the
13012 next NOPS entries are memory operands. If this function is
13013 successful, *BASE is set to the common base register of the memory
13014 accesses; *LOAD_OFFSET is set to the first memory location's offset
13015 from that base register.
13016 REGS is an array filled in with the destination register numbers.
13017 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13018 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13019 the sequence of registers in REGS matches the loads from ascending memory
13020 locations, and the function verifies that the register numbers are
13021 themselves ascending. If CHECK_REGS is false, the register numbers
13022 are stored in the order they are found in the operands. */
13023 static int
13024 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13025 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13027 int unsorted_regs[MAX_LDM_STM_OPS];
13028 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13029 int order[MAX_LDM_STM_OPS];
13030 rtx base_reg_rtx = NULL;
13031 int base_reg = -1;
13032 int i, ldm_case;
13034 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13035 easily extended if required. */
13036 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13038 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13040 /* Loop over the operands and check that the memory references are
13041 suitable (i.e. immediate offsets from the same base register). At
13042 the same time, extract the target register, and the memory
13043 offsets. */
13044 for (i = 0; i < nops; i++)
13046 rtx reg;
13047 rtx offset;
13049 /* Convert a subreg of a mem into the mem itself. */
13050 if (GET_CODE (operands[nops + i]) == SUBREG)
13051 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13053 gcc_assert (MEM_P (operands[nops + i]));
13055 /* Don't reorder volatile memory references; it doesn't seem worth
13056 looking for the case where the order is ok anyway. */
13057 if (MEM_VOLATILE_P (operands[nops + i]))
13058 return 0;
13060 offset = const0_rtx;
13062 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13063 || (GET_CODE (reg) == SUBREG
13064 && REG_P (reg = SUBREG_REG (reg))))
13065 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13066 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13067 || (GET_CODE (reg) == SUBREG
13068 && REG_P (reg = SUBREG_REG (reg))))
13069 && (CONST_INT_P (offset
13070 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13072 if (i == 0)
13074 base_reg = REGNO (reg);
13075 base_reg_rtx = reg;
13076 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13077 return 0;
13079 else if (base_reg != (int) REGNO (reg))
13080 /* Not addressed from the same base register. */
13081 return 0;
13083 unsorted_regs[i] = (REG_P (operands[i])
13084 ? REGNO (operands[i])
13085 : REGNO (SUBREG_REG (operands[i])));
13087 /* If it isn't an integer register, or if it overwrites the
13088 base register but isn't the last insn in the list, then
13089 we can't do this. */
13090 if (unsorted_regs[i] < 0
13091 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13092 || unsorted_regs[i] > 14
13093 || (i != nops - 1 && unsorted_regs[i] == base_reg))
13094 return 0;
13096 /* Don't allow SP to be loaded unless it is also the base
13097 register. It guarantees that SP is reset correctly when
13098 an LDM instruction is interrupted. Otherwise, we might
13099 end up with a corrupt stack. */
13100 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13101 return 0;
13103 unsorted_offsets[i] = INTVAL (offset);
13104 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13105 order[0] = i;
13107 else
13108 /* Not a suitable memory address. */
13109 return 0;
13112 /* All the useful information has now been extracted from the
13113 operands into unsorted_regs and unsorted_offsets; additionally,
13114 order[0] has been set to the lowest offset in the list. Sort
13115 the offsets into order, verifying that they are adjacent, and
13116 check that the register numbers are ascending. */
13117 if (!compute_offset_order (nops, unsorted_offsets, order,
13118 check_regs ? unsorted_regs : NULL))
13119 return 0;
13121 if (saved_order)
13122 memcpy (saved_order, order, sizeof order);
13124 if (base)
13126 *base = base_reg;
13128 for (i = 0; i < nops; i++)
13129 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13131 *load_offset = unsorted_offsets[order[0]];
13134 if (TARGET_THUMB1
13135 && !peep2_reg_dead_p (nops, base_reg_rtx))
13136 return 0;
13138 if (unsorted_offsets[order[0]] == 0)
13139 ldm_case = 1; /* ldmia */
13140 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13141 ldm_case = 2; /* ldmib */
13142 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13143 ldm_case = 3; /* ldmda */
13144 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13145 ldm_case = 4; /* ldmdb */
13146 else if (const_ok_for_arm (unsorted_offsets[order[0]])
13147 || const_ok_for_arm (-unsorted_offsets[order[0]]))
13148 ldm_case = 5;
13149 else
13150 return 0;
13152 if (!multiple_operation_profitable_p (false, nops,
13153 ldm_case == 5
13154 ? unsorted_offsets[order[0]] : 0))
13155 return 0;
13157 return ldm_case;
13160 /* Used to determine in a peephole whether a sequence of store instructions can
13161 be changed into a store-multiple instruction.
13162 NOPS is the number of separate store instructions we are examining.
13163 NOPS_TOTAL is the total number of instructions recognized by the peephole
13164 pattern.
13165 The first NOPS entries in OPERANDS are the source registers, the next
13166 NOPS entries are memory operands. If this function is successful, *BASE is
13167 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13168 to the first memory location's offset from that base register. REGS is an
13169 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13170 likewise filled with the corresponding rtx's.
13171 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13172 numbers to an ascending order of stores.
13173 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13174 from ascending memory locations, and the function verifies that the register
13175 numbers are themselves ascending. If CHECK_REGS is false, the register
13176 numbers are stored in the order they are found in the operands. */
13177 static int
13178 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13179 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13180 HOST_WIDE_INT *load_offset, bool check_regs)
13182 int unsorted_regs[MAX_LDM_STM_OPS];
13183 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13184 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13185 int order[MAX_LDM_STM_OPS];
13186 int base_reg = -1;
13187 rtx base_reg_rtx = NULL;
13188 int i, stm_case;
13190 /* Write back of base register is currently only supported for Thumb 1. */
13191 int base_writeback = TARGET_THUMB1;
13193 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13194 easily extended if required. */
13195 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13197 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13199 /* Loop over the operands and check that the memory references are
13200 suitable (i.e. immediate offsets from the same base register). At
13201 the same time, extract the target register, and the memory
13202 offsets. */
13203 for (i = 0; i < nops; i++)
13205 rtx reg;
13206 rtx offset;
13208 /* Convert a subreg of a mem into the mem itself. */
13209 if (GET_CODE (operands[nops + i]) == SUBREG)
13210 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13212 gcc_assert (MEM_P (operands[nops + i]));
13214 /* Don't reorder volatile memory references; it doesn't seem worth
13215 looking for the case where the order is ok anyway. */
13216 if (MEM_VOLATILE_P (operands[nops + i]))
13217 return 0;
13219 offset = const0_rtx;
13221 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13222 || (GET_CODE (reg) == SUBREG
13223 && REG_P (reg = SUBREG_REG (reg))))
13224 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13225 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13226 || (GET_CODE (reg) == SUBREG
13227 && REG_P (reg = SUBREG_REG (reg))))
13228 && (CONST_INT_P (offset
13229 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13231 unsorted_reg_rtxs[i] = (REG_P (operands[i])
13232 ? operands[i] : SUBREG_REG (operands[i]));
13233 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13235 if (i == 0)
13237 base_reg = REGNO (reg);
13238 base_reg_rtx = reg;
13239 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13240 return 0;
13242 else if (base_reg != (int) REGNO (reg))
13243 /* Not addressed from the same base register. */
13244 return 0;
13246 /* If it isn't an integer register, then we can't do this. */
13247 if (unsorted_regs[i] < 0
13248 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13249 /* The effects are unpredictable if the base register is
13250 both updated and stored. */
13251 || (base_writeback && unsorted_regs[i] == base_reg)
13252 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13253 || unsorted_regs[i] > 14)
13254 return 0;
13256 unsorted_offsets[i] = INTVAL (offset);
13257 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13258 order[0] = i;
13260 else
13261 /* Not a suitable memory address. */
13262 return 0;
13265 /* All the useful information has now been extracted from the
13266 operands into unsorted_regs and unsorted_offsets; additionally,
13267 order[0] has been set to the lowest offset in the list. Sort
13268 the offsets into order, verifying that they are adjacent, and
13269 check that the register numbers are ascending. */
13270 if (!compute_offset_order (nops, unsorted_offsets, order,
13271 check_regs ? unsorted_regs : NULL))
13272 return 0;
13274 if (saved_order)
13275 memcpy (saved_order, order, sizeof order);
13277 if (base)
13279 *base = base_reg;
13281 for (i = 0; i < nops; i++)
13283 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13284 if (reg_rtxs)
13285 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13288 *load_offset = unsorted_offsets[order[0]];
13291 if (TARGET_THUMB1
13292 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13293 return 0;
13295 if (unsorted_offsets[order[0]] == 0)
13296 stm_case = 1; /* stmia */
13297 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13298 stm_case = 2; /* stmib */
13299 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13300 stm_case = 3; /* stmda */
13301 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13302 stm_case = 4; /* stmdb */
13303 else
13304 return 0;
13306 if (!multiple_operation_profitable_p (false, nops, 0))
13307 return 0;
13309 return stm_case;
13312 /* Routines for use in generating RTL. */
13314 /* Generate a load-multiple instruction. COUNT is the number of loads in
13315 the instruction; REGS and MEMS are arrays containing the operands.
13316 BASEREG is the base register to be used in addressing the memory operands.
13317 WBACK_OFFSET is nonzero if the instruction should update the base
13318 register. */
13320 static rtx
13321 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13322 HOST_WIDE_INT wback_offset)
13324 int i = 0, j;
13325 rtx result;
13327 if (!multiple_operation_profitable_p (false, count, 0))
13329 rtx seq;
13331 start_sequence ();
13333 for (i = 0; i < count; i++)
13334 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13336 if (wback_offset != 0)
13337 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13339 seq = get_insns ();
13340 end_sequence ();
13342 return seq;
13345 result = gen_rtx_PARALLEL (VOIDmode,
13346 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13347 if (wback_offset != 0)
13349 XVECEXP (result, 0, 0)
13350 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13351 i = 1;
13352 count++;
13355 for (j = 0; i < count; i++, j++)
13356 XVECEXP (result, 0, i)
13357 = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
13359 return result;
13362 /* Generate a store-multiple instruction. COUNT is the number of stores in
13363 the instruction; REGS and MEMS are arrays containing the operands.
13364 BASEREG is the base register to be used in addressing the memory operands.
13365 WBACK_OFFSET is nonzero if the instruction should update the base
13366 register. */
13368 static rtx
13369 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13370 HOST_WIDE_INT wback_offset)
13372 int i = 0, j;
13373 rtx result;
13375 if (GET_CODE (basereg) == PLUS)
13376 basereg = XEXP (basereg, 0);
13378 if (!multiple_operation_profitable_p (false, count, 0))
13380 rtx seq;
13382 start_sequence ();
13384 for (i = 0; i < count; i++)
13385 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13387 if (wback_offset != 0)
13388 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13390 seq = get_insns ();
13391 end_sequence ();
13393 return seq;
13396 result = gen_rtx_PARALLEL (VOIDmode,
13397 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13398 if (wback_offset != 0)
13400 XVECEXP (result, 0, 0)
13401 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13402 i = 1;
13403 count++;
13406 for (j = 0; i < count; i++, j++)
13407 XVECEXP (result, 0, i)
13408 = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
13410 return result;
13413 /* Generate either a load-multiple or a store-multiple instruction. This
13414 function can be used in situations where we can start with a single MEM
13415 rtx and adjust its address upwards.
13416 COUNT is the number of operations in the instruction, not counting a
13417 possible update of the base register. REGS is an array containing the
13418 register operands.
13419 BASEREG is the base register to be used in addressing the memory operands,
13420 which are constructed from BASEMEM.
13421 WRITE_BACK specifies whether the generated instruction should include an
13422 update of the base register.
13423 OFFSETP is used to pass an offset to and from this function; this offset
13424 is not used when constructing the address (instead BASEMEM should have an
13425 appropriate offset in its address), it is used only for setting
13426 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13428 static rtx
13429 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
13430 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
13432 rtx mems[MAX_LDM_STM_OPS];
13433 HOST_WIDE_INT offset = *offsetp;
13434 int i;
13436 gcc_assert (count <= MAX_LDM_STM_OPS);
13438 if (GET_CODE (basereg) == PLUS)
13439 basereg = XEXP (basereg, 0);
13441 for (i = 0; i < count; i++)
13443 rtx addr = plus_constant (Pmode, basereg, i * 4);
13444 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
13445 offset += 4;
13448 if (write_back)
13449 *offsetp = offset;
13451 if (is_load)
13452 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
13453 write_back ? 4 * count : 0);
13454 else
13455 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
13456 write_back ? 4 * count : 0);
13460 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
13461 rtx basemem, HOST_WIDE_INT *offsetp)
13463 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
13464 offsetp);
13468 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
13469 rtx basemem, HOST_WIDE_INT *offsetp)
13471 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
13472 offsetp);
13475 /* Called from a peephole2 expander to turn a sequence of loads into an
13476 LDM instruction. OPERANDS are the operands found by the peephole matcher;
13477 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
13478 is true if we can reorder the registers because they are used commutatively
13479 subsequently.
13480 Returns true iff we could generate a new instruction. */
13482 bool
13483 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
13485 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13486 rtx mems[MAX_LDM_STM_OPS];
13487 int i, j, base_reg;
13488 rtx base_reg_rtx;
13489 HOST_WIDE_INT offset;
13490 int write_back = FALSE;
13491 int ldm_case;
13492 rtx addr;
13494 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
13495 &base_reg, &offset, !sort_regs);
13497 if (ldm_case == 0)
13498 return false;
13500 if (sort_regs)
13501 for (i = 0; i < nops - 1; i++)
13502 for (j = i + 1; j < nops; j++)
13503 if (regs[i] > regs[j])
13505 int t = regs[i];
13506 regs[i] = regs[j];
13507 regs[j] = t;
13509 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13511 if (TARGET_THUMB1)
13513 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
13514 gcc_assert (ldm_case == 1 || ldm_case == 5);
13515 write_back = TRUE;
13518 if (ldm_case == 5)
13520 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
13521 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
13522 offset = 0;
13523 if (!TARGET_THUMB1)
13525 base_reg = regs[0];
13526 base_reg_rtx = newbase;
13530 for (i = 0; i < nops; i++)
13532 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13533 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13534 SImode, addr, 0);
13536 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
13537 write_back ? offset + i * 4 : 0));
13538 return true;
13541 /* Called from a peephole2 expander to turn a sequence of stores into an
13542 STM instruction. OPERANDS are the operands found by the peephole matcher;
13543 NOPS indicates how many separate stores we are trying to combine.
13544 Returns true iff we could generate a new instruction. */
13546 bool
13547 gen_stm_seq (rtx *operands, int nops)
13549 int i;
13550 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13551 rtx mems[MAX_LDM_STM_OPS];
13552 int base_reg;
13553 rtx base_reg_rtx;
13554 HOST_WIDE_INT offset;
13555 int write_back = FALSE;
13556 int stm_case;
13557 rtx addr;
13558 bool base_reg_dies;
13560 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
13561 mem_order, &base_reg, &offset, true);
13563 if (stm_case == 0)
13564 return false;
13566 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13568 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
13569 if (TARGET_THUMB1)
13571 gcc_assert (base_reg_dies);
13572 write_back = TRUE;
13575 if (stm_case == 5)
13577 gcc_assert (base_reg_dies);
13578 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13579 offset = 0;
13582 addr = plus_constant (Pmode, base_reg_rtx, offset);
13584 for (i = 0; i < nops; i++)
13586 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13587 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13588 SImode, addr, 0);
13590 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
13591 write_back ? offset + i * 4 : 0));
13592 return true;
13595 /* Called from a peephole2 expander to turn a sequence of stores that are
13596 preceded by constant loads into an STM instruction. OPERANDS are the
13597 operands found by the peephole matcher; NOPS indicates how many
13598 separate stores we are trying to combine; there are 2 * NOPS
13599 instructions in the peephole.
13600 Returns true iff we could generate a new instruction. */
13602 bool
13603 gen_const_stm_seq (rtx *operands, int nops)
13605 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
13606 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13607 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
13608 rtx mems[MAX_LDM_STM_OPS];
13609 int base_reg;
13610 rtx base_reg_rtx;
13611 HOST_WIDE_INT offset;
13612 int write_back = FALSE;
13613 int stm_case;
13614 rtx addr;
13615 bool base_reg_dies;
13616 int i, j;
13617 HARD_REG_SET allocated;
13619 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
13620 mem_order, &base_reg, &offset, false);
13622 if (stm_case == 0)
13623 return false;
13625 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
13627 /* If the same register is used more than once, try to find a free
13628 register. */
13629 CLEAR_HARD_REG_SET (allocated);
13630 for (i = 0; i < nops; i++)
13632 for (j = i + 1; j < nops; j++)
13633 if (regs[i] == regs[j])
13635 rtx t = peep2_find_free_register (0, nops * 2,
13636 TARGET_THUMB1 ? "l" : "r",
13637 SImode, &allocated);
13638 if (t == NULL_RTX)
13639 return false;
13640 reg_rtxs[i] = t;
13641 regs[i] = REGNO (t);
13645 /* Compute an ordering that maps the register numbers to an ascending
13646 sequence. */
13647 reg_order[0] = 0;
13648 for (i = 0; i < nops; i++)
13649 if (regs[i] < regs[reg_order[0]])
13650 reg_order[0] = i;
13652 for (i = 1; i < nops; i++)
13654 int this_order = reg_order[i - 1];
13655 for (j = 0; j < nops; j++)
13656 if (regs[j] > regs[reg_order[i - 1]]
13657 && (this_order == reg_order[i - 1]
13658 || regs[j] < regs[this_order]))
13659 this_order = j;
13660 reg_order[i] = this_order;
13663 /* Ensure that registers that must be live after the instruction end
13664 up with the correct value. */
13665 for (i = 0; i < nops; i++)
13667 int this_order = reg_order[i];
13668 if ((this_order != mem_order[i]
13669 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
13670 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
13671 return false;
13674 /* Load the constants. */
13675 for (i = 0; i < nops; i++)
13677 rtx op = operands[2 * nops + mem_order[i]];
13678 sorted_regs[i] = regs[reg_order[i]];
13679 emit_move_insn (reg_rtxs[reg_order[i]], op);
13682 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13684 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
13685 if (TARGET_THUMB1)
13687 gcc_assert (base_reg_dies);
13688 write_back = TRUE;
13691 if (stm_case == 5)
13693 gcc_assert (base_reg_dies);
13694 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13695 offset = 0;
13698 addr = plus_constant (Pmode, base_reg_rtx, offset);
13700 for (i = 0; i < nops; i++)
13702 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13703 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13704 SImode, addr, 0);
13706 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
13707 write_back ? offset + i * 4 : 0));
13708 return true;
13711 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
13712 unaligned copies on processors which support unaligned semantics for those
13713 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
13714 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
13715 An interleave factor of 1 (the minimum) will perform no interleaving.
13716 Load/store multiple are used for aligned addresses where possible. */
13718 static void
13719 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
13720 HOST_WIDE_INT length,
13721 unsigned int interleave_factor)
13723 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
13724 int *regnos = XALLOCAVEC (int, interleave_factor);
13725 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
13726 HOST_WIDE_INT i, j;
13727 HOST_WIDE_INT remaining = length, words;
13728 rtx halfword_tmp = NULL, byte_tmp = NULL;
13729 rtx dst, src;
13730 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
13731 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
13732 HOST_WIDE_INT srcoffset, dstoffset;
13733 HOST_WIDE_INT src_autoinc, dst_autoinc;
13734 rtx mem, addr;
13736 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
13738 /* Use hard registers if we have aligned source or destination so we can use
13739 load/store multiple with contiguous registers. */
13740 if (dst_aligned || src_aligned)
13741 for (i = 0; i < interleave_factor; i++)
13742 regs[i] = gen_rtx_REG (SImode, i);
13743 else
13744 for (i = 0; i < interleave_factor; i++)
13745 regs[i] = gen_reg_rtx (SImode);
13747 dst = copy_addr_to_reg (XEXP (dstbase, 0));
13748 src = copy_addr_to_reg (XEXP (srcbase, 0));
13750 srcoffset = dstoffset = 0;
13752 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
13753 For copying the last bytes we want to subtract this offset again. */
13754 src_autoinc = dst_autoinc = 0;
13756 for (i = 0; i < interleave_factor; i++)
13757 regnos[i] = i;
13759 /* Copy BLOCK_SIZE_BYTES chunks. */
13761 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
13763 /* Load words. */
13764 if (src_aligned && interleave_factor > 1)
13766 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
13767 TRUE, srcbase, &srcoffset));
13768 src_autoinc += UNITS_PER_WORD * interleave_factor;
13770 else
13772 for (j = 0; j < interleave_factor; j++)
13774 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
13775 - src_autoinc));
13776 mem = adjust_automodify_address (srcbase, SImode, addr,
13777 srcoffset + j * UNITS_PER_WORD);
13778 emit_insn (gen_unaligned_loadsi (regs[j], mem));
13780 srcoffset += block_size_bytes;
13783 /* Store words. */
13784 if (dst_aligned && interleave_factor > 1)
13786 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
13787 TRUE, dstbase, &dstoffset));
13788 dst_autoinc += UNITS_PER_WORD * interleave_factor;
13790 else
13792 for (j = 0; j < interleave_factor; j++)
13794 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
13795 - dst_autoinc));
13796 mem = adjust_automodify_address (dstbase, SImode, addr,
13797 dstoffset + j * UNITS_PER_WORD);
13798 emit_insn (gen_unaligned_storesi (mem, regs[j]));
13800 dstoffset += block_size_bytes;
13803 remaining -= block_size_bytes;
13806 /* Copy any whole words left (note these aren't interleaved with any
13807 subsequent halfword/byte load/stores in the interests of simplicity). */
13809 words = remaining / UNITS_PER_WORD;
13811 gcc_assert (words < interleave_factor);
13813 if (src_aligned && words > 1)
13815 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
13816 &srcoffset));
13817 src_autoinc += UNITS_PER_WORD * words;
13819 else
13821 for (j = 0; j < words; j++)
13823 addr = plus_constant (Pmode, src,
13824 srcoffset + j * UNITS_PER_WORD - src_autoinc);
13825 mem = adjust_automodify_address (srcbase, SImode, addr,
13826 srcoffset + j * UNITS_PER_WORD);
13827 if (src_aligned)
13828 emit_move_insn (regs[j], mem);
13829 else
13830 emit_insn (gen_unaligned_loadsi (regs[j], mem));
13832 srcoffset += words * UNITS_PER_WORD;
13835 if (dst_aligned && words > 1)
13837 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
13838 &dstoffset));
13839 dst_autoinc += words * UNITS_PER_WORD;
13841 else
13843 for (j = 0; j < words; j++)
13845 addr = plus_constant (Pmode, dst,
13846 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
13847 mem = adjust_automodify_address (dstbase, SImode, addr,
13848 dstoffset + j * UNITS_PER_WORD);
13849 if (dst_aligned)
13850 emit_move_insn (mem, regs[j]);
13851 else
13852 emit_insn (gen_unaligned_storesi (mem, regs[j]));
13854 dstoffset += words * UNITS_PER_WORD;
13857 remaining -= words * UNITS_PER_WORD;
13859 gcc_assert (remaining < 4);
13861 /* Copy a halfword if necessary. */
13863 if (remaining >= 2)
13865 halfword_tmp = gen_reg_rtx (SImode);
13867 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
13868 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
13869 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
13871 /* Either write out immediately, or delay until we've loaded the last
13872 byte, depending on interleave factor. */
13873 if (interleave_factor == 1)
13875 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
13876 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
13877 emit_insn (gen_unaligned_storehi (mem,
13878 gen_lowpart (HImode, halfword_tmp)));
13879 halfword_tmp = NULL;
13880 dstoffset += 2;
13883 remaining -= 2;
13884 srcoffset += 2;
13887 gcc_assert (remaining < 2);
13889 /* Copy last byte. */
13891 if ((remaining & 1) != 0)
13893 byte_tmp = gen_reg_rtx (SImode);
13895 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
13896 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
13897 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
13899 if (interleave_factor == 1)
13901 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
13902 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
13903 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
13904 byte_tmp = NULL;
13905 dstoffset++;
13908 remaining--;
13909 srcoffset++;
13912 /* Store last halfword if we haven't done so already. */
13914 if (halfword_tmp)
13916 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
13917 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
13918 emit_insn (gen_unaligned_storehi (mem,
13919 gen_lowpart (HImode, halfword_tmp)));
13920 dstoffset += 2;
13923 /* Likewise for last byte. */
13925 if (byte_tmp)
13927 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
13928 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
13929 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
13930 dstoffset++;
13933 gcc_assert (remaining == 0 && srcoffset == dstoffset);
13936 /* From mips_adjust_block_mem:
13938 Helper function for doing a loop-based block operation on memory
13939 reference MEM. Each iteration of the loop will operate on LENGTH
13940 bytes of MEM.
13942 Create a new base register for use within the loop and point it to
13943 the start of MEM. Create a new memory reference that uses this
13944 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
13946 static void
13947 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
13948 rtx *loop_mem)
13950 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
13952 /* Although the new mem does not refer to a known location,
13953 it does keep up to LENGTH bytes of alignment. */
13954 *loop_mem = change_address (mem, BLKmode, *loop_reg);
13955 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
13958 /* From mips_block_move_loop:
13960 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
13961 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
13962 the memory regions do not overlap. */
13964 static void
13965 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
13966 unsigned int interleave_factor,
13967 HOST_WIDE_INT bytes_per_iter)
13969 rtx src_reg, dest_reg, final_src, test;
13970 HOST_WIDE_INT leftover;
13972 leftover = length % bytes_per_iter;
13973 length -= leftover;
13975 /* Create registers and memory references for use within the loop. */
13976 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
13977 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
13979 /* Calculate the value that SRC_REG should have after the last iteration of
13980 the loop. */
13981 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
13982 0, 0, OPTAB_WIDEN);
13984 /* Emit the start of the loop. */
13985 rtx_code_label *label = gen_label_rtx ();
13986 emit_label (label);
13988 /* Emit the loop body. */
13989 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
13990 interleave_factor);
13992 /* Move on to the next block. */
13993 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
13994 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
13996 /* Emit the loop condition. */
13997 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
13998 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14000 /* Mop up any left-over bytes. */
14001 if (leftover)
14002 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14005 /* Emit a block move when either the source or destination is unaligned (not
14006 aligned to a four-byte boundary). This may need further tuning depending on
14007 core type, optimize_size setting, etc. */
14009 static int
14010 arm_movmemqi_unaligned (rtx *operands)
14012 HOST_WIDE_INT length = INTVAL (operands[2]);
14014 if (optimize_size)
14016 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14017 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14018 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14019 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14020 or dst_aligned though: allow more interleaving in those cases since the
14021 resulting code can be smaller. */
14022 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14023 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14025 if (length > 12)
14026 arm_block_move_unaligned_loop (operands[0], operands[1], length,
14027 interleave_factor, bytes_per_iter);
14028 else
14029 arm_block_move_unaligned_straight (operands[0], operands[1], length,
14030 interleave_factor);
14032 else
14034 /* Note that the loop created by arm_block_move_unaligned_loop may be
14035 subject to loop unrolling, which makes tuning this condition a little
14036 redundant. */
14037 if (length > 32)
14038 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14039 else
14040 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14043 return 1;
14047 arm_gen_movmemqi (rtx *operands)
14049 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14050 HOST_WIDE_INT srcoffset, dstoffset;
14051 int i;
14052 rtx src, dst, srcbase, dstbase;
14053 rtx part_bytes_reg = NULL;
14054 rtx mem;
14056 if (!CONST_INT_P (operands[2])
14057 || !CONST_INT_P (operands[3])
14058 || INTVAL (operands[2]) > 64)
14059 return 0;
14061 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14062 return arm_movmemqi_unaligned (operands);
14064 if (INTVAL (operands[3]) & 3)
14065 return 0;
14067 dstbase = operands[0];
14068 srcbase = operands[1];
14070 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14071 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14073 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14074 out_words_to_go = INTVAL (operands[2]) / 4;
14075 last_bytes = INTVAL (operands[2]) & 3;
14076 dstoffset = srcoffset = 0;
14078 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14079 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14081 for (i = 0; in_words_to_go >= 2; i+=4)
14083 if (in_words_to_go > 4)
14084 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14085 TRUE, srcbase, &srcoffset));
14086 else
14087 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14088 src, FALSE, srcbase,
14089 &srcoffset));
14091 if (out_words_to_go)
14093 if (out_words_to_go > 4)
14094 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14095 TRUE, dstbase, &dstoffset));
14096 else if (out_words_to_go != 1)
14097 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14098 out_words_to_go, dst,
14099 (last_bytes == 0
14100 ? FALSE : TRUE),
14101 dstbase, &dstoffset));
14102 else
14104 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14105 emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
14106 if (last_bytes != 0)
14108 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14109 dstoffset += 4;
14114 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14115 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14118 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14119 if (out_words_to_go)
14121 rtx sreg;
14123 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14124 sreg = copy_to_reg (mem);
14126 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14127 emit_move_insn (mem, sreg);
14128 in_words_to_go--;
14130 gcc_assert (!in_words_to_go); /* Sanity check */
14133 if (in_words_to_go)
14135 gcc_assert (in_words_to_go > 0);
14137 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14138 part_bytes_reg = copy_to_mode_reg (SImode, mem);
14141 gcc_assert (!last_bytes || part_bytes_reg);
14143 if (BYTES_BIG_ENDIAN && last_bytes)
14145 rtx tmp = gen_reg_rtx (SImode);
14147 /* The bytes we want are in the top end of the word. */
14148 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14149 GEN_INT (8 * (4 - last_bytes))));
14150 part_bytes_reg = tmp;
14152 while (last_bytes)
14154 mem = adjust_automodify_address (dstbase, QImode,
14155 plus_constant (Pmode, dst,
14156 last_bytes - 1),
14157 dstoffset + last_bytes - 1);
14158 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14160 if (--last_bytes)
14162 tmp = gen_reg_rtx (SImode);
14163 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14164 part_bytes_reg = tmp;
14169 else
14171 if (last_bytes > 1)
14173 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14174 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14175 last_bytes -= 2;
14176 if (last_bytes)
14178 rtx tmp = gen_reg_rtx (SImode);
14179 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14180 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14181 part_bytes_reg = tmp;
14182 dstoffset += 2;
14186 if (last_bytes)
14188 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14189 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14193 return 1;
14196 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14197 by mode size. */
14198 inline static rtx
14199 next_consecutive_mem (rtx mem)
14201 machine_mode mode = GET_MODE (mem);
14202 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14203 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14205 return adjust_automodify_address (mem, mode, addr, offset);
14208 /* Copy using LDRD/STRD instructions whenever possible.
14209 Returns true upon success. */
14210 bool
14211 gen_movmem_ldrd_strd (rtx *operands)
14213 unsigned HOST_WIDE_INT len;
14214 HOST_WIDE_INT align;
14215 rtx src, dst, base;
14216 rtx reg0;
14217 bool src_aligned, dst_aligned;
14218 bool src_volatile, dst_volatile;
14220 gcc_assert (CONST_INT_P (operands[2]));
14221 gcc_assert (CONST_INT_P (operands[3]));
14223 len = UINTVAL (operands[2]);
14224 if (len > 64)
14225 return false;
14227 /* Maximum alignment we can assume for both src and dst buffers. */
14228 align = INTVAL (operands[3]);
14230 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14231 return false;
14233 /* Place src and dst addresses in registers
14234 and update the corresponding mem rtx. */
14235 dst = operands[0];
14236 dst_volatile = MEM_VOLATILE_P (dst);
14237 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14238 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14239 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14241 src = operands[1];
14242 src_volatile = MEM_VOLATILE_P (src);
14243 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14244 base = copy_to_mode_reg (SImode, XEXP (src, 0));
14245 src = adjust_automodify_address (src, VOIDmode, base, 0);
14247 if (!unaligned_access && !(src_aligned && dst_aligned))
14248 return false;
14250 if (src_volatile || dst_volatile)
14251 return false;
14253 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14254 if (!(dst_aligned || src_aligned))
14255 return arm_gen_movmemqi (operands);
14257 /* If the either src or dst is unaligned we'll be accessing it as pairs
14258 of unaligned SImode accesses. Otherwise we can generate DImode
14259 ldrd/strd instructions. */
14260 src = adjust_address (src, src_aligned ? DImode : SImode, 0);
14261 dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
14263 while (len >= 8)
14265 len -= 8;
14266 reg0 = gen_reg_rtx (DImode);
14267 rtx low_reg = NULL_RTX;
14268 rtx hi_reg = NULL_RTX;
14270 if (!src_aligned || !dst_aligned)
14272 low_reg = gen_lowpart (SImode, reg0);
14273 hi_reg = gen_highpart_mode (SImode, DImode, reg0);
14275 if (src_aligned)
14276 emit_move_insn (reg0, src);
14277 else
14279 emit_insn (gen_unaligned_loadsi (low_reg, src));
14280 src = next_consecutive_mem (src);
14281 emit_insn (gen_unaligned_loadsi (hi_reg, src));
14284 if (dst_aligned)
14285 emit_move_insn (dst, reg0);
14286 else
14288 emit_insn (gen_unaligned_storesi (dst, low_reg));
14289 dst = next_consecutive_mem (dst);
14290 emit_insn (gen_unaligned_storesi (dst, hi_reg));
14293 src = next_consecutive_mem (src);
14294 dst = next_consecutive_mem (dst);
14297 gcc_assert (len < 8);
14298 if (len >= 4)
14300 /* More than a word but less than a double-word to copy. Copy a word. */
14301 reg0 = gen_reg_rtx (SImode);
14302 src = adjust_address (src, SImode, 0);
14303 dst = adjust_address (dst, SImode, 0);
14304 if (src_aligned)
14305 emit_move_insn (reg0, src);
14306 else
14307 emit_insn (gen_unaligned_loadsi (reg0, src));
14309 if (dst_aligned)
14310 emit_move_insn (dst, reg0);
14311 else
14312 emit_insn (gen_unaligned_storesi (dst, reg0));
14314 src = next_consecutive_mem (src);
14315 dst = next_consecutive_mem (dst);
14316 len -= 4;
14319 if (len == 0)
14320 return true;
14322 /* Copy the remaining bytes. */
14323 if (len >= 2)
14325 dst = adjust_address (dst, HImode, 0);
14326 src = adjust_address (src, HImode, 0);
14327 reg0 = gen_reg_rtx (SImode);
14328 if (src_aligned)
14329 emit_insn (gen_zero_extendhisi2 (reg0, src));
14330 else
14331 emit_insn (gen_unaligned_loadhiu (reg0, src));
14333 if (dst_aligned)
14334 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14335 else
14336 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14338 src = next_consecutive_mem (src);
14339 dst = next_consecutive_mem (dst);
14340 if (len == 2)
14341 return true;
14344 dst = adjust_address (dst, QImode, 0);
14345 src = adjust_address (src, QImode, 0);
14346 reg0 = gen_reg_rtx (QImode);
14347 emit_move_insn (reg0, src);
14348 emit_move_insn (dst, reg0);
14349 return true;
14352 /* Select a dominance comparison mode if possible for a test of the general
14353 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14354 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14355 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14356 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14357 In all cases OP will be either EQ or NE, but we don't need to know which
14358 here. If we are unable to support a dominance comparison we return
14359 CC mode. This will then fail to match for the RTL expressions that
14360 generate this call. */
14361 machine_mode
14362 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14364 enum rtx_code cond1, cond2;
14365 int swapped = 0;
14367 /* Currently we will probably get the wrong result if the individual
14368 comparisons are not simple. This also ensures that it is safe to
14369 reverse a comparison if necessary. */
14370 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14371 != CCmode)
14372 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14373 != CCmode))
14374 return CCmode;
14376 /* The if_then_else variant of this tests the second condition if the
14377 first passes, but is true if the first fails. Reverse the first
14378 condition to get a true "inclusive-or" expression. */
14379 if (cond_or == DOM_CC_NX_OR_Y)
14380 cond1 = reverse_condition (cond1);
14382 /* If the comparisons are not equal, and one doesn't dominate the other,
14383 then we can't do this. */
14384 if (cond1 != cond2
14385 && !comparison_dominates_p (cond1, cond2)
14386 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14387 return CCmode;
14389 if (swapped)
14390 std::swap (cond1, cond2);
14392 switch (cond1)
14394 case EQ:
14395 if (cond_or == DOM_CC_X_AND_Y)
14396 return CC_DEQmode;
14398 switch (cond2)
14400 case EQ: return CC_DEQmode;
14401 case LE: return CC_DLEmode;
14402 case LEU: return CC_DLEUmode;
14403 case GE: return CC_DGEmode;
14404 case GEU: return CC_DGEUmode;
14405 default: gcc_unreachable ();
14408 case LT:
14409 if (cond_or == DOM_CC_X_AND_Y)
14410 return CC_DLTmode;
14412 switch (cond2)
14414 case LT:
14415 return CC_DLTmode;
14416 case LE:
14417 return CC_DLEmode;
14418 case NE:
14419 return CC_DNEmode;
14420 default:
14421 gcc_unreachable ();
14424 case GT:
14425 if (cond_or == DOM_CC_X_AND_Y)
14426 return CC_DGTmode;
14428 switch (cond2)
14430 case GT:
14431 return CC_DGTmode;
14432 case GE:
14433 return CC_DGEmode;
14434 case NE:
14435 return CC_DNEmode;
14436 default:
14437 gcc_unreachable ();
14440 case LTU:
14441 if (cond_or == DOM_CC_X_AND_Y)
14442 return CC_DLTUmode;
14444 switch (cond2)
14446 case LTU:
14447 return CC_DLTUmode;
14448 case LEU:
14449 return CC_DLEUmode;
14450 case NE:
14451 return CC_DNEmode;
14452 default:
14453 gcc_unreachable ();
14456 case GTU:
14457 if (cond_or == DOM_CC_X_AND_Y)
14458 return CC_DGTUmode;
14460 switch (cond2)
14462 case GTU:
14463 return CC_DGTUmode;
14464 case GEU:
14465 return CC_DGEUmode;
14466 case NE:
14467 return CC_DNEmode;
14468 default:
14469 gcc_unreachable ();
14472 /* The remaining cases only occur when both comparisons are the
14473 same. */
14474 case NE:
14475 gcc_assert (cond1 == cond2);
14476 return CC_DNEmode;
14478 case LE:
14479 gcc_assert (cond1 == cond2);
14480 return CC_DLEmode;
14482 case GE:
14483 gcc_assert (cond1 == cond2);
14484 return CC_DGEmode;
14486 case LEU:
14487 gcc_assert (cond1 == cond2);
14488 return CC_DLEUmode;
14490 case GEU:
14491 gcc_assert (cond1 == cond2);
14492 return CC_DGEUmode;
14494 default:
14495 gcc_unreachable ();
14499 machine_mode
14500 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
14502 /* All floating point compares return CCFP if it is an equality
14503 comparison, and CCFPE otherwise. */
14504 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14506 switch (op)
14508 case EQ:
14509 case NE:
14510 case UNORDERED:
14511 case ORDERED:
14512 case UNLT:
14513 case UNLE:
14514 case UNGT:
14515 case UNGE:
14516 case UNEQ:
14517 case LTGT:
14518 return CCFPmode;
14520 case LT:
14521 case LE:
14522 case GT:
14523 case GE:
14524 return CCFPEmode;
14526 default:
14527 gcc_unreachable ();
14531 /* A compare with a shifted operand. Because of canonicalization, the
14532 comparison will have to be swapped when we emit the assembler. */
14533 if (GET_MODE (y) == SImode
14534 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14535 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14536 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
14537 || GET_CODE (x) == ROTATERT))
14538 return CC_SWPmode;
14540 /* This operation is performed swapped, but since we only rely on the Z
14541 flag we don't need an additional mode. */
14542 if (GET_MODE (y) == SImode
14543 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14544 && GET_CODE (x) == NEG
14545 && (op == EQ || op == NE))
14546 return CC_Zmode;
14548 /* This is a special case that is used by combine to allow a
14549 comparison of a shifted byte load to be split into a zero-extend
14550 followed by a comparison of the shifted integer (only valid for
14551 equalities and unsigned inequalities). */
14552 if (GET_MODE (x) == SImode
14553 && GET_CODE (x) == ASHIFT
14554 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
14555 && GET_CODE (XEXP (x, 0)) == SUBREG
14556 && MEM_P (SUBREG_REG (XEXP (x, 0)))
14557 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
14558 && (op == EQ || op == NE
14559 || op == GEU || op == GTU || op == LTU || op == LEU)
14560 && CONST_INT_P (y))
14561 return CC_Zmode;
14563 /* A construct for a conditional compare, if the false arm contains
14564 0, then both conditions must be true, otherwise either condition
14565 must be true. Not all conditions are possible, so CCmode is
14566 returned if it can't be done. */
14567 if (GET_CODE (x) == IF_THEN_ELSE
14568 && (XEXP (x, 2) == const0_rtx
14569 || XEXP (x, 2) == const1_rtx)
14570 && COMPARISON_P (XEXP (x, 0))
14571 && COMPARISON_P (XEXP (x, 1)))
14572 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14573 INTVAL (XEXP (x, 2)));
14575 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
14576 if (GET_CODE (x) == AND
14577 && (op == EQ || op == NE)
14578 && COMPARISON_P (XEXP (x, 0))
14579 && COMPARISON_P (XEXP (x, 1)))
14580 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14581 DOM_CC_X_AND_Y);
14583 if (GET_CODE (x) == IOR
14584 && (op == EQ || op == NE)
14585 && COMPARISON_P (XEXP (x, 0))
14586 && COMPARISON_P (XEXP (x, 1)))
14587 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14588 DOM_CC_X_OR_Y);
14590 /* An operation (on Thumb) where we want to test for a single bit.
14591 This is done by shifting that bit up into the top bit of a
14592 scratch register; we can then branch on the sign bit. */
14593 if (TARGET_THUMB1
14594 && GET_MODE (x) == SImode
14595 && (op == EQ || op == NE)
14596 && GET_CODE (x) == ZERO_EXTRACT
14597 && XEXP (x, 1) == const1_rtx)
14598 return CC_Nmode;
14600 /* An operation that sets the condition codes as a side-effect, the
14601 V flag is not set correctly, so we can only use comparisons where
14602 this doesn't matter. (For LT and GE we can use "mi" and "pl"
14603 instead.) */
14604 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
14605 if (GET_MODE (x) == SImode
14606 && y == const0_rtx
14607 && (op == EQ || op == NE || op == LT || op == GE)
14608 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
14609 || GET_CODE (x) == AND || GET_CODE (x) == IOR
14610 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
14611 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
14612 || GET_CODE (x) == LSHIFTRT
14613 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14614 || GET_CODE (x) == ROTATERT
14615 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
14616 return CC_NOOVmode;
14618 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
14619 return CC_Zmode;
14621 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
14622 && GET_CODE (x) == PLUS
14623 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
14624 return CC_Cmode;
14626 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
14628 switch (op)
14630 case EQ:
14631 case NE:
14632 /* A DImode comparison against zero can be implemented by
14633 or'ing the two halves together. */
14634 if (y == const0_rtx)
14635 return CC_Zmode;
14637 /* We can do an equality test in three Thumb instructions. */
14638 if (!TARGET_32BIT)
14639 return CC_Zmode;
14641 /* FALLTHROUGH */
14643 case LTU:
14644 case LEU:
14645 case GTU:
14646 case GEU:
14647 /* DImode unsigned comparisons can be implemented by cmp +
14648 cmpeq without a scratch register. Not worth doing in
14649 Thumb-2. */
14650 if (TARGET_32BIT)
14651 return CC_CZmode;
14653 /* FALLTHROUGH */
14655 case LT:
14656 case LE:
14657 case GT:
14658 case GE:
14659 /* DImode signed and unsigned comparisons can be implemented
14660 by cmp + sbcs with a scratch register, but that does not
14661 set the Z flag - we must reverse GT/LE/GTU/LEU. */
14662 gcc_assert (op != EQ && op != NE);
14663 return CC_NCVmode;
14665 default:
14666 gcc_unreachable ();
14670 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
14671 return GET_MODE (x);
14673 return CCmode;
14676 /* X and Y are two things to compare using CODE. Emit the compare insn and
14677 return the rtx for register 0 in the proper mode. FP means this is a
14678 floating point compare: I don't think that it is needed on the arm. */
14680 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
14682 machine_mode mode;
14683 rtx cc_reg;
14684 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
14686 /* We might have X as a constant, Y as a register because of the predicates
14687 used for cmpdi. If so, force X to a register here. */
14688 if (dimode_comparison && !REG_P (x))
14689 x = force_reg (DImode, x);
14691 mode = SELECT_CC_MODE (code, x, y);
14692 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
14694 if (dimode_comparison
14695 && mode != CC_CZmode)
14697 rtx clobber, set;
14699 /* To compare two non-zero values for equality, XOR them and
14700 then compare against zero. Not used for ARM mode; there
14701 CC_CZmode is cheaper. */
14702 if (mode == CC_Zmode && y != const0_rtx)
14704 gcc_assert (!reload_completed);
14705 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
14706 y = const0_rtx;
14709 /* A scratch register is required. */
14710 if (reload_completed)
14711 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
14712 else
14713 scratch = gen_rtx_SCRATCH (SImode);
14715 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
14716 set = gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y));
14717 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
14719 else
14720 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
14722 return cc_reg;
14725 /* Generate a sequence of insns that will generate the correct return
14726 address mask depending on the physical architecture that the program
14727 is running on. */
14729 arm_gen_return_addr_mask (void)
14731 rtx reg = gen_reg_rtx (Pmode);
14733 emit_insn (gen_return_addr_mask (reg));
14734 return reg;
14737 void
14738 arm_reload_in_hi (rtx *operands)
14740 rtx ref = operands[1];
14741 rtx base, scratch;
14742 HOST_WIDE_INT offset = 0;
14744 if (GET_CODE (ref) == SUBREG)
14746 offset = SUBREG_BYTE (ref);
14747 ref = SUBREG_REG (ref);
14750 if (REG_P (ref))
14752 /* We have a pseudo which has been spilt onto the stack; there
14753 are two cases here: the first where there is a simple
14754 stack-slot replacement and a second where the stack-slot is
14755 out of range, or is used as a subreg. */
14756 if (reg_equiv_mem (REGNO (ref)))
14758 ref = reg_equiv_mem (REGNO (ref));
14759 base = find_replacement (&XEXP (ref, 0));
14761 else
14762 /* The slot is out of range, or was dressed up in a SUBREG. */
14763 base = reg_equiv_address (REGNO (ref));
14765 /* PR 62554: If there is no equivalent memory location then just move
14766 the value as an SImode register move. This happens when the target
14767 architecture variant does not have an HImode register move. */
14768 if (base == NULL)
14770 gcc_assert (REG_P (operands[0]));
14771 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
14772 gen_rtx_SUBREG (SImode, ref, 0)));
14773 return;
14776 else
14777 base = find_replacement (&XEXP (ref, 0));
14779 /* Handle the case where the address is too complex to be offset by 1. */
14780 if (GET_CODE (base) == MINUS
14781 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
14783 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14785 emit_set_insn (base_plus, base);
14786 base = base_plus;
14788 else if (GET_CODE (base) == PLUS)
14790 /* The addend must be CONST_INT, or we would have dealt with it above. */
14791 HOST_WIDE_INT hi, lo;
14793 offset += INTVAL (XEXP (base, 1));
14794 base = XEXP (base, 0);
14796 /* Rework the address into a legal sequence of insns. */
14797 /* Valid range for lo is -4095 -> 4095 */
14798 lo = (offset >= 0
14799 ? (offset & 0xfff)
14800 : -((-offset) & 0xfff));
14802 /* Corner case, if lo is the max offset then we would be out of range
14803 once we have added the additional 1 below, so bump the msb into the
14804 pre-loading insn(s). */
14805 if (lo == 4095)
14806 lo &= 0x7ff;
14808 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
14809 ^ (HOST_WIDE_INT) 0x80000000)
14810 - (HOST_WIDE_INT) 0x80000000);
14812 gcc_assert (hi + lo == offset);
14814 if (hi != 0)
14816 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14818 /* Get the base address; addsi3 knows how to handle constants
14819 that require more than one insn. */
14820 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
14821 base = base_plus;
14822 offset = lo;
14826 /* Operands[2] may overlap operands[0] (though it won't overlap
14827 operands[1]), that's why we asked for a DImode reg -- so we can
14828 use the bit that does not overlap. */
14829 if (REGNO (operands[2]) == REGNO (operands[0]))
14830 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14831 else
14832 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
14834 emit_insn (gen_zero_extendqisi2 (scratch,
14835 gen_rtx_MEM (QImode,
14836 plus_constant (Pmode, base,
14837 offset))));
14838 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
14839 gen_rtx_MEM (QImode,
14840 plus_constant (Pmode, base,
14841 offset + 1))));
14842 if (!BYTES_BIG_ENDIAN)
14843 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
14844 gen_rtx_IOR (SImode,
14845 gen_rtx_ASHIFT
14846 (SImode,
14847 gen_rtx_SUBREG (SImode, operands[0], 0),
14848 GEN_INT (8)),
14849 scratch));
14850 else
14851 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
14852 gen_rtx_IOR (SImode,
14853 gen_rtx_ASHIFT (SImode, scratch,
14854 GEN_INT (8)),
14855 gen_rtx_SUBREG (SImode, operands[0], 0)));
14858 /* Handle storing a half-word to memory during reload by synthesizing as two
14859 byte stores. Take care not to clobber the input values until after we
14860 have moved them somewhere safe. This code assumes that if the DImode
14861 scratch in operands[2] overlaps either the input value or output address
14862 in some way, then that value must die in this insn (we absolutely need
14863 two scratch registers for some corner cases). */
14864 void
14865 arm_reload_out_hi (rtx *operands)
14867 rtx ref = operands[0];
14868 rtx outval = operands[1];
14869 rtx base, scratch;
14870 HOST_WIDE_INT offset = 0;
14872 if (GET_CODE (ref) == SUBREG)
14874 offset = SUBREG_BYTE (ref);
14875 ref = SUBREG_REG (ref);
14878 if (REG_P (ref))
14880 /* We have a pseudo which has been spilt onto the stack; there
14881 are two cases here: the first where there is a simple
14882 stack-slot replacement and a second where the stack-slot is
14883 out of range, or is used as a subreg. */
14884 if (reg_equiv_mem (REGNO (ref)))
14886 ref = reg_equiv_mem (REGNO (ref));
14887 base = find_replacement (&XEXP (ref, 0));
14889 else
14890 /* The slot is out of range, or was dressed up in a SUBREG. */
14891 base = reg_equiv_address (REGNO (ref));
14893 /* PR 62254: If there is no equivalent memory location then just move
14894 the value as an SImode register move. This happens when the target
14895 architecture variant does not have an HImode register move. */
14896 if (base == NULL)
14898 gcc_assert (REG_P (outval) || SUBREG_P (outval));
14900 if (REG_P (outval))
14902 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
14903 gen_rtx_SUBREG (SImode, outval, 0)));
14905 else /* SUBREG_P (outval) */
14907 if (GET_MODE (SUBREG_REG (outval)) == SImode)
14908 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
14909 SUBREG_REG (outval)));
14910 else
14911 /* FIXME: Handle other cases ? */
14912 gcc_unreachable ();
14914 return;
14917 else
14918 base = find_replacement (&XEXP (ref, 0));
14920 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
14922 /* Handle the case where the address is too complex to be offset by 1. */
14923 if (GET_CODE (base) == MINUS
14924 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
14926 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14928 /* Be careful not to destroy OUTVAL. */
14929 if (reg_overlap_mentioned_p (base_plus, outval))
14931 /* Updating base_plus might destroy outval, see if we can
14932 swap the scratch and base_plus. */
14933 if (!reg_overlap_mentioned_p (scratch, outval))
14934 std::swap (scratch, base_plus);
14935 else
14937 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
14939 /* Be conservative and copy OUTVAL into the scratch now,
14940 this should only be necessary if outval is a subreg
14941 of something larger than a word. */
14942 /* XXX Might this clobber base? I can't see how it can,
14943 since scratch is known to overlap with OUTVAL, and
14944 must be wider than a word. */
14945 emit_insn (gen_movhi (scratch_hi, outval));
14946 outval = scratch_hi;
14950 emit_set_insn (base_plus, base);
14951 base = base_plus;
14953 else if (GET_CODE (base) == PLUS)
14955 /* The addend must be CONST_INT, or we would have dealt with it above. */
14956 HOST_WIDE_INT hi, lo;
14958 offset += INTVAL (XEXP (base, 1));
14959 base = XEXP (base, 0);
14961 /* Rework the address into a legal sequence of insns. */
14962 /* Valid range for lo is -4095 -> 4095 */
14963 lo = (offset >= 0
14964 ? (offset & 0xfff)
14965 : -((-offset) & 0xfff));
14967 /* Corner case, if lo is the max offset then we would be out of range
14968 once we have added the additional 1 below, so bump the msb into the
14969 pre-loading insn(s). */
14970 if (lo == 4095)
14971 lo &= 0x7ff;
14973 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
14974 ^ (HOST_WIDE_INT) 0x80000000)
14975 - (HOST_WIDE_INT) 0x80000000);
14977 gcc_assert (hi + lo == offset);
14979 if (hi != 0)
14981 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14983 /* Be careful not to destroy OUTVAL. */
14984 if (reg_overlap_mentioned_p (base_plus, outval))
14986 /* Updating base_plus might destroy outval, see if we
14987 can swap the scratch and base_plus. */
14988 if (!reg_overlap_mentioned_p (scratch, outval))
14989 std::swap (scratch, base_plus);
14990 else
14992 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
14994 /* Be conservative and copy outval into scratch now,
14995 this should only be necessary if outval is a
14996 subreg of something larger than a word. */
14997 /* XXX Might this clobber base? I can't see how it
14998 can, since scratch is known to overlap with
14999 outval. */
15000 emit_insn (gen_movhi (scratch_hi, outval));
15001 outval = scratch_hi;
15005 /* Get the base address; addsi3 knows how to handle constants
15006 that require more than one insn. */
15007 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15008 base = base_plus;
15009 offset = lo;
15013 if (BYTES_BIG_ENDIAN)
15015 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15016 plus_constant (Pmode, base,
15017 offset + 1)),
15018 gen_lowpart (QImode, outval)));
15019 emit_insn (gen_lshrsi3 (scratch,
15020 gen_rtx_SUBREG (SImode, outval, 0),
15021 GEN_INT (8)));
15022 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15023 offset)),
15024 gen_lowpart (QImode, scratch)));
15026 else
15028 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15029 offset)),
15030 gen_lowpart (QImode, outval)));
15031 emit_insn (gen_lshrsi3 (scratch,
15032 gen_rtx_SUBREG (SImode, outval, 0),
15033 GEN_INT (8)));
15034 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15035 plus_constant (Pmode, base,
15036 offset + 1)),
15037 gen_lowpart (QImode, scratch)));
15041 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15042 (padded to the size of a word) should be passed in a register. */
15044 static bool
15045 arm_must_pass_in_stack (machine_mode mode, const_tree type)
15047 if (TARGET_AAPCS_BASED)
15048 return must_pass_in_stack_var_size (mode, type);
15049 else
15050 return must_pass_in_stack_var_size_or_pad (mode, type);
15054 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15055 Return true if an argument passed on the stack should be padded upwards,
15056 i.e. if the least-significant byte has useful data.
15057 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15058 aggregate types are placed in the lowest memory address. */
15060 bool
15061 arm_pad_arg_upward (machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
15063 if (!TARGET_AAPCS_BASED)
15064 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
15066 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15067 return false;
15069 return true;
15073 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15074 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15075 register has useful data, and return the opposite if the most
15076 significant byte does. */
15078 bool
15079 arm_pad_reg_upward (machine_mode mode,
15080 tree type, int first ATTRIBUTE_UNUSED)
15082 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15084 /* For AAPCS, small aggregates, small fixed-point types,
15085 and small complex types are always padded upwards. */
15086 if (type)
15088 if ((AGGREGATE_TYPE_P (type)
15089 || TREE_CODE (type) == COMPLEX_TYPE
15090 || FIXED_POINT_TYPE_P (type))
15091 && int_size_in_bytes (type) <= 4)
15092 return true;
15094 else
15096 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15097 && GET_MODE_SIZE (mode) <= 4)
15098 return true;
15102 /* Otherwise, use default padding. */
15103 return !BYTES_BIG_ENDIAN;
15106 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15107 assuming that the address in the base register is word aligned. */
15108 bool
15109 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15111 HOST_WIDE_INT max_offset;
15113 /* Offset must be a multiple of 4 in Thumb mode. */
15114 if (TARGET_THUMB2 && ((offset & 3) != 0))
15115 return false;
15117 if (TARGET_THUMB2)
15118 max_offset = 1020;
15119 else if (TARGET_ARM)
15120 max_offset = 255;
15121 else
15122 return false;
15124 return ((offset <= max_offset) && (offset >= -max_offset));
15127 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15128 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15129 Assumes that the address in the base register RN is word aligned. Pattern
15130 guarantees that both memory accesses use the same base register,
15131 the offsets are constants within the range, and the gap between the offsets is 4.
15132 If preload complete then check that registers are legal. WBACK indicates whether
15133 address is updated. LOAD indicates whether memory access is load or store. */
15134 bool
15135 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15136 bool wback, bool load)
15138 unsigned int t, t2, n;
15140 if (!reload_completed)
15141 return true;
15143 if (!offset_ok_for_ldrd_strd (offset))
15144 return false;
15146 t = REGNO (rt);
15147 t2 = REGNO (rt2);
15148 n = REGNO (rn);
15150 if ((TARGET_THUMB2)
15151 && ((wback && (n == t || n == t2))
15152 || (t == SP_REGNUM)
15153 || (t == PC_REGNUM)
15154 || (t2 == SP_REGNUM)
15155 || (t2 == PC_REGNUM)
15156 || (!load && (n == PC_REGNUM))
15157 || (load && (t == t2))
15158 /* Triggers Cortex-M3 LDRD errata. */
15159 || (!wback && load && fix_cm3_ldrd && (n == t))))
15160 return false;
15162 if ((TARGET_ARM)
15163 && ((wback && (n == t || n == t2))
15164 || (t2 == PC_REGNUM)
15165 || (t % 2 != 0) /* First destination register is not even. */
15166 || (t2 != t + 1)
15167 /* PC can be used as base register (for offset addressing only),
15168 but it is depricated. */
15169 || (n == PC_REGNUM)))
15170 return false;
15172 return true;
15175 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15176 operand MEM's address contains an immediate offset from the base
15177 register and has no side effects, in which case it sets BASE and
15178 OFFSET accordingly. */
15179 static bool
15180 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset)
15182 rtx addr;
15184 gcc_assert (base != NULL && offset != NULL);
15186 /* TODO: Handle more general memory operand patterns, such as
15187 PRE_DEC and PRE_INC. */
15189 if (side_effects_p (mem))
15190 return false;
15192 /* Can't deal with subregs. */
15193 if (GET_CODE (mem) == SUBREG)
15194 return false;
15196 gcc_assert (MEM_P (mem));
15198 *offset = const0_rtx;
15200 addr = XEXP (mem, 0);
15202 /* If addr isn't valid for DImode, then we can't handle it. */
15203 if (!arm_legitimate_address_p (DImode, addr,
15204 reload_in_progress || reload_completed))
15205 return false;
15207 if (REG_P (addr))
15209 *base = addr;
15210 return true;
15212 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15214 *base = XEXP (addr, 0);
15215 *offset = XEXP (addr, 1);
15216 return (REG_P (*base) && CONST_INT_P (*offset));
15219 return false;
15222 /* Called from a peephole2 to replace two word-size accesses with a
15223 single LDRD/STRD instruction. Returns true iff we can generate a
15224 new instruction sequence. That is, both accesses use the same base
15225 register and the gap between constant offsets is 4. This function
15226 may reorder its operands to match ldrd/strd RTL templates.
15227 OPERANDS are the operands found by the peephole matcher;
15228 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15229 corresponding memory operands. LOAD indicaates whether the access
15230 is load or store. CONST_STORE indicates a store of constant
15231 integer values held in OPERANDS[4,5] and assumes that the pattern
15232 is of length 4 insn, for the purpose of checking dead registers.
15233 COMMUTE indicates that register operands may be reordered. */
15234 bool
15235 gen_operands_ldrd_strd (rtx *operands, bool load,
15236 bool const_store, bool commute)
15238 int nops = 2;
15239 HOST_WIDE_INT offsets[2], offset;
15240 rtx base = NULL_RTX;
15241 rtx cur_base, cur_offset, tmp;
15242 int i, gap;
15243 HARD_REG_SET regset;
15245 gcc_assert (!const_store || !load);
15246 /* Check that the memory references are immediate offsets from the
15247 same base register. Extract the base register, the destination
15248 registers, and the corresponding memory offsets. */
15249 for (i = 0; i < nops; i++)
15251 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
15252 return false;
15254 if (i == 0)
15255 base = cur_base;
15256 else if (REGNO (base) != REGNO (cur_base))
15257 return false;
15259 offsets[i] = INTVAL (cur_offset);
15260 if (GET_CODE (operands[i]) == SUBREG)
15262 tmp = SUBREG_REG (operands[i]);
15263 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15264 operands[i] = tmp;
15268 /* Make sure there is no dependency between the individual loads. */
15269 if (load && REGNO (operands[0]) == REGNO (base))
15270 return false; /* RAW */
15272 if (load && REGNO (operands[0]) == REGNO (operands[1]))
15273 return false; /* WAW */
15275 /* If the same input register is used in both stores
15276 when storing different constants, try to find a free register.
15277 For example, the code
15278 mov r0, 0
15279 str r0, [r2]
15280 mov r0, 1
15281 str r0, [r2, #4]
15282 can be transformed into
15283 mov r1, 0
15284 mov r0, 1
15285 strd r1, r0, [r2]
15286 in Thumb mode assuming that r1 is free.
15287 For ARM mode do the same but only if the starting register
15288 can be made to be even. */
15289 if (const_store
15290 && REGNO (operands[0]) == REGNO (operands[1])
15291 && INTVAL (operands[4]) != INTVAL (operands[5]))
15293 if (TARGET_THUMB2)
15295 CLEAR_HARD_REG_SET (regset);
15296 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15297 if (tmp == NULL_RTX)
15298 return false;
15300 /* Use the new register in the first load to ensure that
15301 if the original input register is not dead after peephole,
15302 then it will have the correct constant value. */
15303 operands[0] = tmp;
15305 else if (TARGET_ARM)
15307 int regno = REGNO (operands[0]);
15308 if (!peep2_reg_dead_p (4, operands[0]))
15310 /* When the input register is even and is not dead after the
15311 pattern, it has to hold the second constant but we cannot
15312 form a legal STRD in ARM mode with this register as the second
15313 register. */
15314 if (regno % 2 == 0)
15315 return false;
15317 /* Is regno-1 free? */
15318 SET_HARD_REG_SET (regset);
15319 CLEAR_HARD_REG_BIT(regset, regno - 1);
15320 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15321 if (tmp == NULL_RTX)
15322 return false;
15324 operands[0] = tmp;
15326 else
15328 /* Find a DImode register. */
15329 CLEAR_HARD_REG_SET (regset);
15330 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15331 if (tmp != NULL_RTX)
15333 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15334 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15336 else
15338 /* Can we use the input register to form a DI register? */
15339 SET_HARD_REG_SET (regset);
15340 CLEAR_HARD_REG_BIT(regset,
15341 regno % 2 == 0 ? regno + 1 : regno - 1);
15342 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15343 if (tmp == NULL_RTX)
15344 return false;
15345 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15349 gcc_assert (operands[0] != NULL_RTX);
15350 gcc_assert (operands[1] != NULL_RTX);
15351 gcc_assert (REGNO (operands[0]) % 2 == 0);
15352 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15356 /* Make sure the instructions are ordered with lower memory access first. */
15357 if (offsets[0] > offsets[1])
15359 gap = offsets[0] - offsets[1];
15360 offset = offsets[1];
15362 /* Swap the instructions such that lower memory is accessed first. */
15363 std::swap (operands[0], operands[1]);
15364 std::swap (operands[2], operands[3]);
15365 if (const_store)
15366 std::swap (operands[4], operands[5]);
15368 else
15370 gap = offsets[1] - offsets[0];
15371 offset = offsets[0];
15374 /* Make sure accesses are to consecutive memory locations. */
15375 if (gap != 4)
15376 return false;
15378 /* Make sure we generate legal instructions. */
15379 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15380 false, load))
15381 return true;
15383 /* In Thumb state, where registers are almost unconstrained, there
15384 is little hope to fix it. */
15385 if (TARGET_THUMB2)
15386 return false;
15388 if (load && commute)
15390 /* Try reordering registers. */
15391 std::swap (operands[0], operands[1]);
15392 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15393 false, load))
15394 return true;
15397 if (const_store)
15399 /* If input registers are dead after this pattern, they can be
15400 reordered or replaced by other registers that are free in the
15401 current pattern. */
15402 if (!peep2_reg_dead_p (4, operands[0])
15403 || !peep2_reg_dead_p (4, operands[1]))
15404 return false;
15406 /* Try to reorder the input registers. */
15407 /* For example, the code
15408 mov r0, 0
15409 mov r1, 1
15410 str r1, [r2]
15411 str r0, [r2, #4]
15412 can be transformed into
15413 mov r1, 0
15414 mov r0, 1
15415 strd r0, [r2]
15417 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15418 false, false))
15420 std::swap (operands[0], operands[1]);
15421 return true;
15424 /* Try to find a free DI register. */
15425 CLEAR_HARD_REG_SET (regset);
15426 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15427 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15428 while (true)
15430 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15431 if (tmp == NULL_RTX)
15432 return false;
15434 /* DREG must be an even-numbered register in DImode.
15435 Split it into SI registers. */
15436 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15437 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15438 gcc_assert (operands[0] != NULL_RTX);
15439 gcc_assert (operands[1] != NULL_RTX);
15440 gcc_assert (REGNO (operands[0]) % 2 == 0);
15441 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15443 return (operands_ok_ldrd_strd (operands[0], operands[1],
15444 base, offset,
15445 false, load));
15449 return false;
15455 /* Print a symbolic form of X to the debug file, F. */
15456 static void
15457 arm_print_value (FILE *f, rtx x)
15459 switch (GET_CODE (x))
15461 case CONST_INT:
15462 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15463 return;
15465 case CONST_DOUBLE:
15466 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15467 return;
15469 case CONST_VECTOR:
15471 int i;
15473 fprintf (f, "<");
15474 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15476 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15477 if (i < (CONST_VECTOR_NUNITS (x) - 1))
15478 fputc (',', f);
15480 fprintf (f, ">");
15482 return;
15484 case CONST_STRING:
15485 fprintf (f, "\"%s\"", XSTR (x, 0));
15486 return;
15488 case SYMBOL_REF:
15489 fprintf (f, "`%s'", XSTR (x, 0));
15490 return;
15492 case LABEL_REF:
15493 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
15494 return;
15496 case CONST:
15497 arm_print_value (f, XEXP (x, 0));
15498 return;
15500 case PLUS:
15501 arm_print_value (f, XEXP (x, 0));
15502 fprintf (f, "+");
15503 arm_print_value (f, XEXP (x, 1));
15504 return;
15506 case PC:
15507 fprintf (f, "pc");
15508 return;
15510 default:
15511 fprintf (f, "????");
15512 return;
15516 /* Routines for manipulation of the constant pool. */
15518 /* Arm instructions cannot load a large constant directly into a
15519 register; they have to come from a pc relative load. The constant
15520 must therefore be placed in the addressable range of the pc
15521 relative load. Depending on the precise pc relative load
15522 instruction the range is somewhere between 256 bytes and 4k. This
15523 means that we often have to dump a constant inside a function, and
15524 generate code to branch around it.
15526 It is important to minimize this, since the branches will slow
15527 things down and make the code larger.
15529 Normally we can hide the table after an existing unconditional
15530 branch so that there is no interruption of the flow, but in the
15531 worst case the code looks like this:
15533 ldr rn, L1
15535 b L2
15536 align
15537 L1: .long value
15541 ldr rn, L3
15543 b L4
15544 align
15545 L3: .long value
15549 We fix this by performing a scan after scheduling, which notices
15550 which instructions need to have their operands fetched from the
15551 constant table and builds the table.
15553 The algorithm starts by building a table of all the constants that
15554 need fixing up and all the natural barriers in the function (places
15555 where a constant table can be dropped without breaking the flow).
15556 For each fixup we note how far the pc-relative replacement will be
15557 able to reach and the offset of the instruction into the function.
15559 Having built the table we then group the fixes together to form
15560 tables that are as large as possible (subject to addressing
15561 constraints) and emit each table of constants after the last
15562 barrier that is within range of all the instructions in the group.
15563 If a group does not contain a barrier, then we forcibly create one
15564 by inserting a jump instruction into the flow. Once the table has
15565 been inserted, the insns are then modified to reference the
15566 relevant entry in the pool.
15568 Possible enhancements to the algorithm (not implemented) are:
15570 1) For some processors and object formats, there may be benefit in
15571 aligning the pools to the start of cache lines; this alignment
15572 would need to be taken into account when calculating addressability
15573 of a pool. */
15575 /* These typedefs are located at the start of this file, so that
15576 they can be used in the prototypes there. This comment is to
15577 remind readers of that fact so that the following structures
15578 can be understood more easily.
15580 typedef struct minipool_node Mnode;
15581 typedef struct minipool_fixup Mfix; */
15583 struct minipool_node
15585 /* Doubly linked chain of entries. */
15586 Mnode * next;
15587 Mnode * prev;
15588 /* The maximum offset into the code that this entry can be placed. While
15589 pushing fixes for forward references, all entries are sorted in order
15590 of increasing max_address. */
15591 HOST_WIDE_INT max_address;
15592 /* Similarly for an entry inserted for a backwards ref. */
15593 HOST_WIDE_INT min_address;
15594 /* The number of fixes referencing this entry. This can become zero
15595 if we "unpush" an entry. In this case we ignore the entry when we
15596 come to emit the code. */
15597 int refcount;
15598 /* The offset from the start of the minipool. */
15599 HOST_WIDE_INT offset;
15600 /* The value in table. */
15601 rtx value;
15602 /* The mode of value. */
15603 machine_mode mode;
15604 /* The size of the value. With iWMMXt enabled
15605 sizes > 4 also imply an alignment of 8-bytes. */
15606 int fix_size;
15609 struct minipool_fixup
15611 Mfix * next;
15612 rtx_insn * insn;
15613 HOST_WIDE_INT address;
15614 rtx * loc;
15615 machine_mode mode;
15616 int fix_size;
15617 rtx value;
15618 Mnode * minipool;
15619 HOST_WIDE_INT forwards;
15620 HOST_WIDE_INT backwards;
15623 /* Fixes less than a word need padding out to a word boundary. */
15624 #define MINIPOOL_FIX_SIZE(mode) \
15625 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15627 static Mnode * minipool_vector_head;
15628 static Mnode * minipool_vector_tail;
15629 static rtx_code_label *minipool_vector_label;
15630 static int minipool_pad;
15632 /* The linked list of all minipool fixes required for this function. */
15633 Mfix * minipool_fix_head;
15634 Mfix * minipool_fix_tail;
15635 /* The fix entry for the current minipool, once it has been placed. */
15636 Mfix * minipool_barrier;
15638 #ifndef JUMP_TABLES_IN_TEXT_SECTION
15639 #define JUMP_TABLES_IN_TEXT_SECTION 0
15640 #endif
15642 static HOST_WIDE_INT
15643 get_jump_table_size (rtx_jump_table_data *insn)
15645 /* ADDR_VECs only take room if read-only data does into the text
15646 section. */
15647 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
15649 rtx body = PATTERN (insn);
15650 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
15651 HOST_WIDE_INT size;
15652 HOST_WIDE_INT modesize;
15654 modesize = GET_MODE_SIZE (GET_MODE (body));
15655 size = modesize * XVECLEN (body, elt);
15656 switch (modesize)
15658 case 1:
15659 /* Round up size of TBB table to a halfword boundary. */
15660 size = (size + 1) & ~HOST_WIDE_INT_1;
15661 break;
15662 case 2:
15663 /* No padding necessary for TBH. */
15664 break;
15665 case 4:
15666 /* Add two bytes for alignment on Thumb. */
15667 if (TARGET_THUMB)
15668 size += 2;
15669 break;
15670 default:
15671 gcc_unreachable ();
15673 return size;
15676 return 0;
15679 /* Return the maximum amount of padding that will be inserted before
15680 label LABEL. */
15682 static HOST_WIDE_INT
15683 get_label_padding (rtx label)
15685 HOST_WIDE_INT align, min_insn_size;
15687 align = 1 << label_to_alignment (label);
15688 min_insn_size = TARGET_THUMB ? 2 : 4;
15689 return align > min_insn_size ? align - min_insn_size : 0;
15692 /* Move a minipool fix MP from its current location to before MAX_MP.
15693 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
15694 constraints may need updating. */
15695 static Mnode *
15696 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
15697 HOST_WIDE_INT max_address)
15699 /* The code below assumes these are different. */
15700 gcc_assert (mp != max_mp);
15702 if (max_mp == NULL)
15704 if (max_address < mp->max_address)
15705 mp->max_address = max_address;
15707 else
15709 if (max_address > max_mp->max_address - mp->fix_size)
15710 mp->max_address = max_mp->max_address - mp->fix_size;
15711 else
15712 mp->max_address = max_address;
15714 /* Unlink MP from its current position. Since max_mp is non-null,
15715 mp->prev must be non-null. */
15716 mp->prev->next = mp->next;
15717 if (mp->next != NULL)
15718 mp->next->prev = mp->prev;
15719 else
15720 minipool_vector_tail = mp->prev;
15722 /* Re-insert it before MAX_MP. */
15723 mp->next = max_mp;
15724 mp->prev = max_mp->prev;
15725 max_mp->prev = mp;
15727 if (mp->prev != NULL)
15728 mp->prev->next = mp;
15729 else
15730 minipool_vector_head = mp;
15733 /* Save the new entry. */
15734 max_mp = mp;
15736 /* Scan over the preceding entries and adjust their addresses as
15737 required. */
15738 while (mp->prev != NULL
15739 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
15741 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
15742 mp = mp->prev;
15745 return max_mp;
15748 /* Add a constant to the minipool for a forward reference. Returns the
15749 node added or NULL if the constant will not fit in this pool. */
15750 static Mnode *
15751 add_minipool_forward_ref (Mfix *fix)
15753 /* If set, max_mp is the first pool_entry that has a lower
15754 constraint than the one we are trying to add. */
15755 Mnode * max_mp = NULL;
15756 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
15757 Mnode * mp;
15759 /* If the minipool starts before the end of FIX->INSN then this FIX
15760 can not be placed into the current pool. Furthermore, adding the
15761 new constant pool entry may cause the pool to start FIX_SIZE bytes
15762 earlier. */
15763 if (minipool_vector_head &&
15764 (fix->address + get_attr_length (fix->insn)
15765 >= minipool_vector_head->max_address - fix->fix_size))
15766 return NULL;
15768 /* Scan the pool to see if a constant with the same value has
15769 already been added. While we are doing this, also note the
15770 location where we must insert the constant if it doesn't already
15771 exist. */
15772 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
15774 if (GET_CODE (fix->value) == GET_CODE (mp->value)
15775 && fix->mode == mp->mode
15776 && (!LABEL_P (fix->value)
15777 || (CODE_LABEL_NUMBER (fix->value)
15778 == CODE_LABEL_NUMBER (mp->value)))
15779 && rtx_equal_p (fix->value, mp->value))
15781 /* More than one fix references this entry. */
15782 mp->refcount++;
15783 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
15786 /* Note the insertion point if necessary. */
15787 if (max_mp == NULL
15788 && mp->max_address > max_address)
15789 max_mp = mp;
15791 /* If we are inserting an 8-bytes aligned quantity and
15792 we have not already found an insertion point, then
15793 make sure that all such 8-byte aligned quantities are
15794 placed at the start of the pool. */
15795 if (ARM_DOUBLEWORD_ALIGN
15796 && max_mp == NULL
15797 && fix->fix_size >= 8
15798 && mp->fix_size < 8)
15800 max_mp = mp;
15801 max_address = mp->max_address;
15805 /* The value is not currently in the minipool, so we need to create
15806 a new entry for it. If MAX_MP is NULL, the entry will be put on
15807 the end of the list since the placement is less constrained than
15808 any existing entry. Otherwise, we insert the new fix before
15809 MAX_MP and, if necessary, adjust the constraints on the other
15810 entries. */
15811 mp = XNEW (Mnode);
15812 mp->fix_size = fix->fix_size;
15813 mp->mode = fix->mode;
15814 mp->value = fix->value;
15815 mp->refcount = 1;
15816 /* Not yet required for a backwards ref. */
15817 mp->min_address = -65536;
15819 if (max_mp == NULL)
15821 mp->max_address = max_address;
15822 mp->next = NULL;
15823 mp->prev = minipool_vector_tail;
15825 if (mp->prev == NULL)
15827 minipool_vector_head = mp;
15828 minipool_vector_label = gen_label_rtx ();
15830 else
15831 mp->prev->next = mp;
15833 minipool_vector_tail = mp;
15835 else
15837 if (max_address > max_mp->max_address - mp->fix_size)
15838 mp->max_address = max_mp->max_address - mp->fix_size;
15839 else
15840 mp->max_address = max_address;
15842 mp->next = max_mp;
15843 mp->prev = max_mp->prev;
15844 max_mp->prev = mp;
15845 if (mp->prev != NULL)
15846 mp->prev->next = mp;
15847 else
15848 minipool_vector_head = mp;
15851 /* Save the new entry. */
15852 max_mp = mp;
15854 /* Scan over the preceding entries and adjust their addresses as
15855 required. */
15856 while (mp->prev != NULL
15857 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
15859 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
15860 mp = mp->prev;
15863 return max_mp;
15866 static Mnode *
15867 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
15868 HOST_WIDE_INT min_address)
15870 HOST_WIDE_INT offset;
15872 /* The code below assumes these are different. */
15873 gcc_assert (mp != min_mp);
15875 if (min_mp == NULL)
15877 if (min_address > mp->min_address)
15878 mp->min_address = min_address;
15880 else
15882 /* We will adjust this below if it is too loose. */
15883 mp->min_address = min_address;
15885 /* Unlink MP from its current position. Since min_mp is non-null,
15886 mp->next must be non-null. */
15887 mp->next->prev = mp->prev;
15888 if (mp->prev != NULL)
15889 mp->prev->next = mp->next;
15890 else
15891 minipool_vector_head = mp->next;
15893 /* Reinsert it after MIN_MP. */
15894 mp->prev = min_mp;
15895 mp->next = min_mp->next;
15896 min_mp->next = mp;
15897 if (mp->next != NULL)
15898 mp->next->prev = mp;
15899 else
15900 minipool_vector_tail = mp;
15903 min_mp = mp;
15905 offset = 0;
15906 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
15908 mp->offset = offset;
15909 if (mp->refcount > 0)
15910 offset += mp->fix_size;
15912 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
15913 mp->next->min_address = mp->min_address + mp->fix_size;
15916 return min_mp;
15919 /* Add a constant to the minipool for a backward reference. Returns the
15920 node added or NULL if the constant will not fit in this pool.
15922 Note that the code for insertion for a backwards reference can be
15923 somewhat confusing because the calculated offsets for each fix do
15924 not take into account the size of the pool (which is still under
15925 construction. */
15926 static Mnode *
15927 add_minipool_backward_ref (Mfix *fix)
15929 /* If set, min_mp is the last pool_entry that has a lower constraint
15930 than the one we are trying to add. */
15931 Mnode *min_mp = NULL;
15932 /* This can be negative, since it is only a constraint. */
15933 HOST_WIDE_INT min_address = fix->address - fix->backwards;
15934 Mnode *mp;
15936 /* If we can't reach the current pool from this insn, or if we can't
15937 insert this entry at the end of the pool without pushing other
15938 fixes out of range, then we don't try. This ensures that we
15939 can't fail later on. */
15940 if (min_address >= minipool_barrier->address
15941 || (minipool_vector_tail->min_address + fix->fix_size
15942 >= minipool_barrier->address))
15943 return NULL;
15945 /* Scan the pool to see if a constant with the same value has
15946 already been added. While we are doing this, also note the
15947 location where we must insert the constant if it doesn't already
15948 exist. */
15949 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
15951 if (GET_CODE (fix->value) == GET_CODE (mp->value)
15952 && fix->mode == mp->mode
15953 && (!LABEL_P (fix->value)
15954 || (CODE_LABEL_NUMBER (fix->value)
15955 == CODE_LABEL_NUMBER (mp->value)))
15956 && rtx_equal_p (fix->value, mp->value)
15957 /* Check that there is enough slack to move this entry to the
15958 end of the table (this is conservative). */
15959 && (mp->max_address
15960 > (minipool_barrier->address
15961 + minipool_vector_tail->offset
15962 + minipool_vector_tail->fix_size)))
15964 mp->refcount++;
15965 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
15968 if (min_mp != NULL)
15969 mp->min_address += fix->fix_size;
15970 else
15972 /* Note the insertion point if necessary. */
15973 if (mp->min_address < min_address)
15975 /* For now, we do not allow the insertion of 8-byte alignment
15976 requiring nodes anywhere but at the start of the pool. */
15977 if (ARM_DOUBLEWORD_ALIGN
15978 && fix->fix_size >= 8 && mp->fix_size < 8)
15979 return NULL;
15980 else
15981 min_mp = mp;
15983 else if (mp->max_address
15984 < minipool_barrier->address + mp->offset + fix->fix_size)
15986 /* Inserting before this entry would push the fix beyond
15987 its maximum address (which can happen if we have
15988 re-located a forwards fix); force the new fix to come
15989 after it. */
15990 if (ARM_DOUBLEWORD_ALIGN
15991 && fix->fix_size >= 8 && mp->fix_size < 8)
15992 return NULL;
15993 else
15995 min_mp = mp;
15996 min_address = mp->min_address + fix->fix_size;
15999 /* Do not insert a non-8-byte aligned quantity before 8-byte
16000 aligned quantities. */
16001 else if (ARM_DOUBLEWORD_ALIGN
16002 && fix->fix_size < 8
16003 && mp->fix_size >= 8)
16005 min_mp = mp;
16006 min_address = mp->min_address + fix->fix_size;
16011 /* We need to create a new entry. */
16012 mp = XNEW (Mnode);
16013 mp->fix_size = fix->fix_size;
16014 mp->mode = fix->mode;
16015 mp->value = fix->value;
16016 mp->refcount = 1;
16017 mp->max_address = minipool_barrier->address + 65536;
16019 mp->min_address = min_address;
16021 if (min_mp == NULL)
16023 mp->prev = NULL;
16024 mp->next = minipool_vector_head;
16026 if (mp->next == NULL)
16028 minipool_vector_tail = mp;
16029 minipool_vector_label = gen_label_rtx ();
16031 else
16032 mp->next->prev = mp;
16034 minipool_vector_head = mp;
16036 else
16038 mp->next = min_mp->next;
16039 mp->prev = min_mp;
16040 min_mp->next = mp;
16042 if (mp->next != NULL)
16043 mp->next->prev = mp;
16044 else
16045 minipool_vector_tail = mp;
16048 /* Save the new entry. */
16049 min_mp = mp;
16051 if (mp->prev)
16052 mp = mp->prev;
16053 else
16054 mp->offset = 0;
16056 /* Scan over the following entries and adjust their offsets. */
16057 while (mp->next != NULL)
16059 if (mp->next->min_address < mp->min_address + mp->fix_size)
16060 mp->next->min_address = mp->min_address + mp->fix_size;
16062 if (mp->refcount)
16063 mp->next->offset = mp->offset + mp->fix_size;
16064 else
16065 mp->next->offset = mp->offset;
16067 mp = mp->next;
16070 return min_mp;
16073 static void
16074 assign_minipool_offsets (Mfix *barrier)
16076 HOST_WIDE_INT offset = 0;
16077 Mnode *mp;
16079 minipool_barrier = barrier;
16081 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16083 mp->offset = offset;
16085 if (mp->refcount > 0)
16086 offset += mp->fix_size;
16090 /* Output the literal table */
16091 static void
16092 dump_minipool (rtx_insn *scan)
16094 Mnode * mp;
16095 Mnode * nmp;
16096 int align64 = 0;
16098 if (ARM_DOUBLEWORD_ALIGN)
16099 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16100 if (mp->refcount > 0 && mp->fix_size >= 8)
16102 align64 = 1;
16103 break;
16106 if (dump_file)
16107 fprintf (dump_file,
16108 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16109 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16111 scan = emit_label_after (gen_label_rtx (), scan);
16112 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16113 scan = emit_label_after (minipool_vector_label, scan);
16115 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16117 if (mp->refcount > 0)
16119 if (dump_file)
16121 fprintf (dump_file,
16122 ";; Offset %u, min %ld, max %ld ",
16123 (unsigned) mp->offset, (unsigned long) mp->min_address,
16124 (unsigned long) mp->max_address);
16125 arm_print_value (dump_file, mp->value);
16126 fputc ('\n', dump_file);
16129 rtx val = copy_rtx (mp->value);
16131 switch (GET_MODE_SIZE (mp->mode))
16133 #ifdef HAVE_consttable_1
16134 case 1:
16135 scan = emit_insn_after (gen_consttable_1 (val), scan);
16136 break;
16138 #endif
16139 #ifdef HAVE_consttable_2
16140 case 2:
16141 scan = emit_insn_after (gen_consttable_2 (val), scan);
16142 break;
16144 #endif
16145 #ifdef HAVE_consttable_4
16146 case 4:
16147 scan = emit_insn_after (gen_consttable_4 (val), scan);
16148 break;
16150 #endif
16151 #ifdef HAVE_consttable_8
16152 case 8:
16153 scan = emit_insn_after (gen_consttable_8 (val), scan);
16154 break;
16156 #endif
16157 #ifdef HAVE_consttable_16
16158 case 16:
16159 scan = emit_insn_after (gen_consttable_16 (val), scan);
16160 break;
16162 #endif
16163 default:
16164 gcc_unreachable ();
16168 nmp = mp->next;
16169 free (mp);
16172 minipool_vector_head = minipool_vector_tail = NULL;
16173 scan = emit_insn_after (gen_consttable_end (), scan);
16174 scan = emit_barrier_after (scan);
16177 /* Return the cost of forcibly inserting a barrier after INSN. */
16178 static int
16179 arm_barrier_cost (rtx_insn *insn)
16181 /* Basing the location of the pool on the loop depth is preferable,
16182 but at the moment, the basic block information seems to be
16183 corrupt by this stage of the compilation. */
16184 int base_cost = 50;
16185 rtx_insn *next = next_nonnote_insn (insn);
16187 if (next != NULL && LABEL_P (next))
16188 base_cost -= 20;
16190 switch (GET_CODE (insn))
16192 case CODE_LABEL:
16193 /* It will always be better to place the table before the label, rather
16194 than after it. */
16195 return 50;
16197 case INSN:
16198 case CALL_INSN:
16199 return base_cost;
16201 case JUMP_INSN:
16202 return base_cost - 10;
16204 default:
16205 return base_cost + 10;
16209 /* Find the best place in the insn stream in the range
16210 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16211 Create the barrier by inserting a jump and add a new fix entry for
16212 it. */
16213 static Mfix *
16214 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16216 HOST_WIDE_INT count = 0;
16217 rtx_barrier *barrier;
16218 rtx_insn *from = fix->insn;
16219 /* The instruction after which we will insert the jump. */
16220 rtx_insn *selected = NULL;
16221 int selected_cost;
16222 /* The address at which the jump instruction will be placed. */
16223 HOST_WIDE_INT selected_address;
16224 Mfix * new_fix;
16225 HOST_WIDE_INT max_count = max_address - fix->address;
16226 rtx_code_label *label = gen_label_rtx ();
16228 selected_cost = arm_barrier_cost (from);
16229 selected_address = fix->address;
16231 while (from && count < max_count)
16233 rtx_jump_table_data *tmp;
16234 int new_cost;
16236 /* This code shouldn't have been called if there was a natural barrier
16237 within range. */
16238 gcc_assert (!BARRIER_P (from));
16240 /* Count the length of this insn. This must stay in sync with the
16241 code that pushes minipool fixes. */
16242 if (LABEL_P (from))
16243 count += get_label_padding (from);
16244 else
16245 count += get_attr_length (from);
16247 /* If there is a jump table, add its length. */
16248 if (tablejump_p (from, NULL, &tmp))
16250 count += get_jump_table_size (tmp);
16252 /* Jump tables aren't in a basic block, so base the cost on
16253 the dispatch insn. If we select this location, we will
16254 still put the pool after the table. */
16255 new_cost = arm_barrier_cost (from);
16257 if (count < max_count
16258 && (!selected || new_cost <= selected_cost))
16260 selected = tmp;
16261 selected_cost = new_cost;
16262 selected_address = fix->address + count;
16265 /* Continue after the dispatch table. */
16266 from = NEXT_INSN (tmp);
16267 continue;
16270 new_cost = arm_barrier_cost (from);
16272 if (count < max_count
16273 && (!selected || new_cost <= selected_cost))
16275 selected = from;
16276 selected_cost = new_cost;
16277 selected_address = fix->address + count;
16280 from = NEXT_INSN (from);
16283 /* Make sure that we found a place to insert the jump. */
16284 gcc_assert (selected);
16286 /* Make sure we do not split a call and its corresponding
16287 CALL_ARG_LOCATION note. */
16288 if (CALL_P (selected))
16290 rtx_insn *next = NEXT_INSN (selected);
16291 if (next && NOTE_P (next)
16292 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16293 selected = next;
16296 /* Create a new JUMP_INSN that branches around a barrier. */
16297 from = emit_jump_insn_after (gen_jump (label), selected);
16298 JUMP_LABEL (from) = label;
16299 barrier = emit_barrier_after (from);
16300 emit_label_after (label, barrier);
16302 /* Create a minipool barrier entry for the new barrier. */
16303 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16304 new_fix->insn = barrier;
16305 new_fix->address = selected_address;
16306 new_fix->next = fix->next;
16307 fix->next = new_fix;
16309 return new_fix;
16312 /* Record that there is a natural barrier in the insn stream at
16313 ADDRESS. */
16314 static void
16315 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
16317 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16319 fix->insn = insn;
16320 fix->address = address;
16322 fix->next = NULL;
16323 if (minipool_fix_head != NULL)
16324 minipool_fix_tail->next = fix;
16325 else
16326 minipool_fix_head = fix;
16328 minipool_fix_tail = fix;
16331 /* Record INSN, which will need fixing up to load a value from the
16332 minipool. ADDRESS is the offset of the insn since the start of the
16333 function; LOC is a pointer to the part of the insn which requires
16334 fixing; VALUE is the constant that must be loaded, which is of type
16335 MODE. */
16336 static void
16337 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
16338 machine_mode mode, rtx value)
16340 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16342 fix->insn = insn;
16343 fix->address = address;
16344 fix->loc = loc;
16345 fix->mode = mode;
16346 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16347 fix->value = value;
16348 fix->forwards = get_attr_pool_range (insn);
16349 fix->backwards = get_attr_neg_pool_range (insn);
16350 fix->minipool = NULL;
16352 /* If an insn doesn't have a range defined for it, then it isn't
16353 expecting to be reworked by this code. Better to stop now than
16354 to generate duff assembly code. */
16355 gcc_assert (fix->forwards || fix->backwards);
16357 /* If an entry requires 8-byte alignment then assume all constant pools
16358 require 4 bytes of padding. Trying to do this later on a per-pool
16359 basis is awkward because existing pool entries have to be modified. */
16360 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16361 minipool_pad = 4;
16363 if (dump_file)
16365 fprintf (dump_file,
16366 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16367 GET_MODE_NAME (mode),
16368 INSN_UID (insn), (unsigned long) address,
16369 -1 * (long)fix->backwards, (long)fix->forwards);
16370 arm_print_value (dump_file, fix->value);
16371 fprintf (dump_file, "\n");
16374 /* Add it to the chain of fixes. */
16375 fix->next = NULL;
16377 if (minipool_fix_head != NULL)
16378 minipool_fix_tail->next = fix;
16379 else
16380 minipool_fix_head = fix;
16382 minipool_fix_tail = fix;
16385 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16386 Returns the number of insns needed, or 99 if we always want to synthesize
16387 the value. */
16389 arm_max_const_double_inline_cost ()
16391 /* Let the value get synthesized to avoid the use of literal pools. */
16392 if (arm_disable_literal_pool)
16393 return 99;
16395 return ((optimize_size || arm_ld_sched) ? 3 : 4);
16398 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16399 Returns the number of insns needed, or 99 if we don't know how to
16400 do it. */
16402 arm_const_double_inline_cost (rtx val)
16404 rtx lowpart, highpart;
16405 machine_mode mode;
16407 mode = GET_MODE (val);
16409 if (mode == VOIDmode)
16410 mode = DImode;
16412 gcc_assert (GET_MODE_SIZE (mode) == 8);
16414 lowpart = gen_lowpart (SImode, val);
16415 highpart = gen_highpart_mode (SImode, mode, val);
16417 gcc_assert (CONST_INT_P (lowpart));
16418 gcc_assert (CONST_INT_P (highpart));
16420 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16421 NULL_RTX, NULL_RTX, 0, 0)
16422 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16423 NULL_RTX, NULL_RTX, 0, 0));
16426 /* Cost of loading a SImode constant. */
16427 static inline int
16428 arm_const_inline_cost (enum rtx_code code, rtx val)
16430 return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
16431 NULL_RTX, NULL_RTX, 1, 0);
16434 /* Return true if it is worthwhile to split a 64-bit constant into two
16435 32-bit operations. This is the case if optimizing for size, or
16436 if we have load delay slots, or if one 32-bit part can be done with
16437 a single data operation. */
16438 bool
16439 arm_const_double_by_parts (rtx val)
16441 machine_mode mode = GET_MODE (val);
16442 rtx part;
16444 if (optimize_size || arm_ld_sched)
16445 return true;
16447 if (mode == VOIDmode)
16448 mode = DImode;
16450 part = gen_highpart_mode (SImode, mode, val);
16452 gcc_assert (CONST_INT_P (part));
16454 if (const_ok_for_arm (INTVAL (part))
16455 || const_ok_for_arm (~INTVAL (part)))
16456 return true;
16458 part = gen_lowpart (SImode, val);
16460 gcc_assert (CONST_INT_P (part));
16462 if (const_ok_for_arm (INTVAL (part))
16463 || const_ok_for_arm (~INTVAL (part)))
16464 return true;
16466 return false;
16469 /* Return true if it is possible to inline both the high and low parts
16470 of a 64-bit constant into 32-bit data processing instructions. */
16471 bool
16472 arm_const_double_by_immediates (rtx val)
16474 machine_mode mode = GET_MODE (val);
16475 rtx part;
16477 if (mode == VOIDmode)
16478 mode = DImode;
16480 part = gen_highpart_mode (SImode, mode, val);
16482 gcc_assert (CONST_INT_P (part));
16484 if (!const_ok_for_arm (INTVAL (part)))
16485 return false;
16487 part = gen_lowpart (SImode, val);
16489 gcc_assert (CONST_INT_P (part));
16491 if (!const_ok_for_arm (INTVAL (part)))
16492 return false;
16494 return true;
16497 /* Scan INSN and note any of its operands that need fixing.
16498 If DO_PUSHES is false we do not actually push any of the fixups
16499 needed. */
16500 static void
16501 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
16503 int opno;
16505 extract_constrain_insn (insn);
16507 if (recog_data.n_alternatives == 0)
16508 return;
16510 /* Fill in recog_op_alt with information about the constraints of
16511 this insn. */
16512 preprocess_constraints (insn);
16514 const operand_alternative *op_alt = which_op_alt ();
16515 for (opno = 0; opno < recog_data.n_operands; opno++)
16517 /* Things we need to fix can only occur in inputs. */
16518 if (recog_data.operand_type[opno] != OP_IN)
16519 continue;
16521 /* If this alternative is a memory reference, then any mention
16522 of constants in this alternative is really to fool reload
16523 into allowing us to accept one there. We need to fix them up
16524 now so that we output the right code. */
16525 if (op_alt[opno].memory_ok)
16527 rtx op = recog_data.operand[opno];
16529 if (CONSTANT_P (op))
16531 if (do_pushes)
16532 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
16533 recog_data.operand_mode[opno], op);
16535 else if (MEM_P (op)
16536 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
16537 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
16539 if (do_pushes)
16541 rtx cop = avoid_constant_pool_reference (op);
16543 /* Casting the address of something to a mode narrower
16544 than a word can cause avoid_constant_pool_reference()
16545 to return the pool reference itself. That's no good to
16546 us here. Lets just hope that we can use the
16547 constant pool value directly. */
16548 if (op == cop)
16549 cop = get_pool_constant (XEXP (op, 0));
16551 push_minipool_fix (insn, address,
16552 recog_data.operand_loc[opno],
16553 recog_data.operand_mode[opno], cop);
16560 return;
16563 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
16564 and unions in the context of ARMv8-M Security Extensions. It is used as a
16565 helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
16566 functions. The PADDING_BITS_TO_CLEAR pointer can be the base to either one
16567 or four masks, depending on whether it is being computed for a
16568 'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
16569 respectively. The tree for the type of the argument or a field within an
16570 argument is passed in ARG_TYPE, the current register this argument or field
16571 starts in is kept in the pointer REGNO and updated accordingly, the bit this
16572 argument or field starts at is passed in STARTING_BIT and the last used bit
16573 is kept in LAST_USED_BIT which is also updated accordingly. */
16575 static unsigned HOST_WIDE_INT
16576 comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
16577 uint32_t * padding_bits_to_clear,
16578 unsigned starting_bit, int * last_used_bit)
16581 unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0;
16583 if (TREE_CODE (arg_type) == RECORD_TYPE)
16585 unsigned current_bit = starting_bit;
16586 tree field;
16587 long int offset, size;
16590 field = TYPE_FIELDS (arg_type);
16591 while (field)
16593 /* The offset within a structure is always an offset from
16594 the start of that structure. Make sure we take that into the
16595 calculation of the register based offset that we use here. */
16596 offset = starting_bit;
16597 offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0);
16598 offset %= 32;
16600 /* This is the actual size of the field, for bitfields this is the
16601 bitfield width and not the container size. */
16602 size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16604 if (*last_used_bit != offset)
16606 if (offset < *last_used_bit)
16608 /* This field's offset is before the 'last_used_bit', that
16609 means this field goes on the next register. So we need to
16610 pad the rest of the current register and increase the
16611 register number. */
16612 uint32_t mask;
16613 mask = ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit);
16614 mask++;
16616 padding_bits_to_clear[*regno] |= mask;
16617 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16618 (*regno)++;
16620 else
16622 /* Otherwise we pad the bits between the last field's end and
16623 the start of the new field. */
16624 uint32_t mask;
16626 mask = ((uint32_t)-1) >> (32 - offset);
16627 mask -= ((uint32_t) 1 << *last_used_bit) - 1;
16628 padding_bits_to_clear[*regno] |= mask;
16630 current_bit = offset;
16633 /* Calculate further padding bits for inner structs/unions too. */
16634 if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)))
16636 *last_used_bit = current_bit;
16637 not_to_clear_reg_mask
16638 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno,
16639 padding_bits_to_clear, offset,
16640 last_used_bit);
16642 else
16644 /* Update 'current_bit' with this field's size. If the
16645 'current_bit' lies in a subsequent register, update 'regno' and
16646 reset 'current_bit' to point to the current bit in that new
16647 register. */
16648 current_bit += size;
16649 while (current_bit >= 32)
16651 current_bit-=32;
16652 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16653 (*regno)++;
16655 *last_used_bit = current_bit;
16658 field = TREE_CHAIN (field);
16660 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16662 else if (TREE_CODE (arg_type) == UNION_TYPE)
16664 tree field, field_t;
16665 int i, regno_t, field_size;
16666 int max_reg = -1;
16667 int max_bit = -1;
16668 uint32_t mask;
16669 uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
16670 = {-1, -1, -1, -1};
16672 /* To compute the padding bits in a union we only consider bits as
16673 padding bits if they are always either a padding bit or fall outside a
16674 fields size for all fields in the union. */
16675 field = TYPE_FIELDS (arg_type);
16676 while (field)
16678 uint32_t padding_bits_to_clear_t[NUM_ARG_REGS]
16679 = {0U, 0U, 0U, 0U};
16680 int last_used_bit_t = *last_used_bit;
16681 regno_t = *regno;
16682 field_t = TREE_TYPE (field);
16684 /* If the field's type is either a record or a union make sure to
16685 compute their padding bits too. */
16686 if (RECORD_OR_UNION_TYPE_P (field_t))
16687 not_to_clear_reg_mask
16688 |= comp_not_to_clear_mask_str_un (field_t, &regno_t,
16689 &padding_bits_to_clear_t[0],
16690 starting_bit, &last_used_bit_t);
16691 else
16693 field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16694 regno_t = (field_size / 32) + *regno;
16695 last_used_bit_t = (starting_bit + field_size) % 32;
16698 for (i = *regno; i < regno_t; i++)
16700 /* For all but the last register used by this field only keep the
16701 padding bits that were padding bits in this field. */
16702 padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i];
16705 /* For the last register, keep all padding bits that were padding
16706 bits in this field and any padding bits that are still valid
16707 as padding bits but fall outside of this field's size. */
16708 mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1;
16709 padding_bits_to_clear_res[regno_t]
16710 &= padding_bits_to_clear_t[regno_t] | mask;
16712 /* Update the maximum size of the fields in terms of registers used
16713 ('max_reg') and the 'last_used_bit' in said register. */
16714 if (max_reg < regno_t)
16716 max_reg = regno_t;
16717 max_bit = last_used_bit_t;
16719 else if (max_reg == regno_t && max_bit < last_used_bit_t)
16720 max_bit = last_used_bit_t;
16722 field = TREE_CHAIN (field);
16725 /* Update the current padding_bits_to_clear using the intersection of the
16726 padding bits of all the fields. */
16727 for (i=*regno; i < max_reg; i++)
16728 padding_bits_to_clear[i] |= padding_bits_to_clear_res[i];
16730 /* Do not keep trailing padding bits, we do not know yet whether this
16731 is the end of the argument. */
16732 mask = ((uint32_t) 1 << max_bit) - 1;
16733 padding_bits_to_clear[max_reg]
16734 |= padding_bits_to_clear_res[max_reg] & mask;
16736 *regno = max_reg;
16737 *last_used_bit = max_bit;
16739 else
16740 /* This function should only be used for structs and unions. */
16741 gcc_unreachable ();
16743 return not_to_clear_reg_mask;
16746 /* In the context of ARMv8-M Security Extensions, this function is used for both
16747 'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
16748 registers are used when returning or passing arguments, which is then
16749 returned as a mask. It will also compute a mask to indicate padding/unused
16750 bits for each of these registers, and passes this through the
16751 PADDING_BITS_TO_CLEAR pointer. The tree of the argument type is passed in
16752 ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
16753 the starting register used to pass this argument or return value is passed
16754 in REGNO. It makes use of 'comp_not_to_clear_mask_str_un' to compute these
16755 for struct and union types. */
16757 static unsigned HOST_WIDE_INT
16758 compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
16759 uint32_t * padding_bits_to_clear)
16762 int last_used_bit = 0;
16763 unsigned HOST_WIDE_INT not_to_clear_mask;
16765 if (RECORD_OR_UNION_TYPE_P (arg_type))
16767 not_to_clear_mask
16768 = comp_not_to_clear_mask_str_un (arg_type, &regno,
16769 padding_bits_to_clear, 0,
16770 &last_used_bit);
16773 /* If the 'last_used_bit' is not zero, that means we are still using a
16774 part of the last 'regno'. In such cases we must clear the trailing
16775 bits. Otherwise we are not using regno and we should mark it as to
16776 clear. */
16777 if (last_used_bit != 0)
16778 padding_bits_to_clear[regno]
16779 |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit) + 1;
16780 else
16781 not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno);
16783 else
16785 not_to_clear_mask = 0;
16786 /* We are not dealing with structs nor unions. So these arguments may be
16787 passed in floating point registers too. In some cases a BLKmode is
16788 used when returning or passing arguments in multiple VFP registers. */
16789 if (GET_MODE (arg_rtx) == BLKmode)
16791 int i, arg_regs;
16792 rtx reg;
16794 /* This should really only occur when dealing with the hard-float
16795 ABI. */
16796 gcc_assert (TARGET_HARD_FLOAT_ABI);
16798 for (i = 0; i < XVECLEN (arg_rtx, 0); i++)
16800 reg = XEXP (XVECEXP (arg_rtx, 0, i), 0);
16801 gcc_assert (REG_P (reg));
16803 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg);
16805 /* If we are dealing with DF mode, make sure we don't
16806 clear either of the registers it addresses. */
16807 arg_regs = ARM_NUM_REGS (GET_MODE (reg));
16808 if (arg_regs > 1)
16810 unsigned HOST_WIDE_INT mask;
16811 mask = HOST_WIDE_INT_1U << (REGNO (reg) + arg_regs);
16812 mask -= HOST_WIDE_INT_1U << REGNO (reg);
16813 not_to_clear_mask |= mask;
16817 else
16819 /* Otherwise we can rely on the MODE to determine how many registers
16820 are being used by this argument. */
16821 int arg_regs = ARM_NUM_REGS (GET_MODE (arg_rtx));
16822 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (arg_rtx);
16823 if (arg_regs > 1)
16825 unsigned HOST_WIDE_INT
16826 mask = HOST_WIDE_INT_1U << (REGNO (arg_rtx) + arg_regs);
16827 mask -= HOST_WIDE_INT_1U << REGNO (arg_rtx);
16828 not_to_clear_mask |= mask;
16833 return not_to_clear_mask;
16836 /* Saves callee saved registers, clears callee saved registers and caller saved
16837 registers not used to pass arguments before a cmse_nonsecure_call. And
16838 restores the callee saved registers after. */
16840 static void
16841 cmse_nonsecure_call_clear_caller_saved (void)
16843 basic_block bb;
16845 FOR_EACH_BB_FN (bb, cfun)
16847 rtx_insn *insn;
16849 FOR_BB_INSNS (bb, insn)
16851 uint64_t to_clear_mask, float_mask;
16852 rtx_insn *seq;
16853 rtx pat, call, unspec, reg, cleared_reg, tmp;
16854 unsigned int regno, maxregno;
16855 rtx address;
16856 CUMULATIVE_ARGS args_so_far_v;
16857 cumulative_args_t args_so_far;
16858 tree arg_type, fntype;
16859 bool using_r4, first_param = true;
16860 function_args_iterator args_iter;
16861 uint32_t padding_bits_to_clear[4] = {0U, 0U, 0U, 0U};
16862 uint32_t * padding_bits_to_clear_ptr = &padding_bits_to_clear[0];
16864 if (!NONDEBUG_INSN_P (insn))
16865 continue;
16867 if (!CALL_P (insn))
16868 continue;
16870 pat = PATTERN (insn);
16871 gcc_assert (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 0);
16872 call = XVECEXP (pat, 0, 0);
16874 /* Get the real call RTX if the insn sets a value, ie. returns. */
16875 if (GET_CODE (call) == SET)
16876 call = SET_SRC (call);
16878 /* Check if it is a cmse_nonsecure_call. */
16879 unspec = XEXP (call, 0);
16880 if (GET_CODE (unspec) != UNSPEC
16881 || XINT (unspec, 1) != UNSPEC_NONSECURE_MEM)
16882 continue;
16884 /* Determine the caller-saved registers we need to clear. */
16885 to_clear_mask = (1LL << (NUM_ARG_REGS)) - 1;
16886 maxregno = NUM_ARG_REGS - 1;
16887 /* Only look at the caller-saved floating point registers in case of
16888 -mfloat-abi=hard. For -mfloat-abi=softfp we will be using the
16889 lazy store and loads which clear both caller- and callee-saved
16890 registers. */
16891 if (TARGET_HARD_FLOAT_ABI)
16893 float_mask = (1LL << (D7_VFP_REGNUM + 1)) - 1;
16894 float_mask &= ~((1LL << FIRST_VFP_REGNUM) - 1);
16895 to_clear_mask |= float_mask;
16896 maxregno = D7_VFP_REGNUM;
16899 /* Make sure the register used to hold the function address is not
16900 cleared. */
16901 address = RTVEC_ELT (XVEC (unspec, 0), 0);
16902 gcc_assert (MEM_P (address));
16903 gcc_assert (REG_P (XEXP (address, 0)));
16904 to_clear_mask &= ~(1LL << REGNO (XEXP (address, 0)));
16906 /* Set basic block of call insn so that df rescan is performed on
16907 insns inserted here. */
16908 set_block_for_insn (insn, bb);
16909 df_set_flags (DF_DEFER_INSN_RESCAN);
16910 start_sequence ();
16912 /* Make sure the scheduler doesn't schedule other insns beyond
16913 here. */
16914 emit_insn (gen_blockage ());
16916 /* Walk through all arguments and clear registers appropriately.
16918 fntype = TREE_TYPE (MEM_EXPR (address));
16919 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX,
16920 NULL_TREE);
16921 args_so_far = pack_cumulative_args (&args_so_far_v);
16922 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
16924 rtx arg_rtx;
16925 machine_mode arg_mode = TYPE_MODE (arg_type);
16927 if (VOID_TYPE_P (arg_type))
16928 continue;
16930 if (!first_param)
16931 arm_function_arg_advance (args_so_far, arg_mode, arg_type,
16932 true);
16934 arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type,
16935 true);
16936 gcc_assert (REG_P (arg_rtx));
16937 to_clear_mask
16938 &= ~compute_not_to_clear_mask (arg_type, arg_rtx,
16939 REGNO (arg_rtx),
16940 padding_bits_to_clear_ptr);
16942 first_param = false;
16945 /* Clear padding bits where needed. */
16946 cleared_reg = XEXP (address, 0);
16947 reg = gen_rtx_REG (SImode, IP_REGNUM);
16948 using_r4 = false;
16949 for (regno = R0_REGNUM; regno < NUM_ARG_REGS; regno++)
16951 if (padding_bits_to_clear[regno] == 0)
16952 continue;
16954 /* If this is a Thumb-1 target copy the address of the function
16955 we are calling from 'r4' into 'ip' such that we can use r4 to
16956 clear the unused bits in the arguments. */
16957 if (TARGET_THUMB1 && !using_r4)
16959 using_r4 = true;
16960 reg = cleared_reg;
16961 emit_move_insn (gen_rtx_REG (SImode, IP_REGNUM),
16962 reg);
16965 tmp = GEN_INT ((((~padding_bits_to_clear[regno]) << 16u) >> 16u));
16966 emit_move_insn (reg, tmp);
16967 /* Also fill the top half of the negated
16968 padding_bits_to_clear. */
16969 if (((~padding_bits_to_clear[regno]) >> 16) > 0)
16971 tmp = GEN_INT ((~padding_bits_to_clear[regno]) >> 16);
16972 emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode, reg,
16973 GEN_INT (16),
16974 GEN_INT (16)),
16975 tmp));
16978 emit_insn (gen_andsi3 (gen_rtx_REG (SImode, regno),
16979 gen_rtx_REG (SImode, regno),
16980 reg));
16983 if (using_r4)
16984 emit_move_insn (cleared_reg,
16985 gen_rtx_REG (SImode, IP_REGNUM));
16987 /* We use right shift and left shift to clear the LSB of the address
16988 we jump to instead of using bic, to avoid having to use an extra
16989 register on Thumb-1. */
16990 tmp = gen_rtx_LSHIFTRT (SImode, cleared_reg, const1_rtx);
16991 emit_insn (gen_rtx_SET (cleared_reg, tmp));
16992 tmp = gen_rtx_ASHIFT (SImode, cleared_reg, const1_rtx);
16993 emit_insn (gen_rtx_SET (cleared_reg, tmp));
16995 /* Clearing all registers that leak before doing a non-secure
16996 call. */
16997 for (regno = R0_REGNUM; regno <= maxregno; regno++)
16999 if (!(to_clear_mask & (1LL << regno)))
17000 continue;
17002 /* If regno is an even vfp register and its successor is also to
17003 be cleared, use vmov. */
17004 if (IS_VFP_REGNUM (regno))
17006 if (TARGET_VFP_DOUBLE
17007 && VFP_REGNO_OK_FOR_DOUBLE (regno)
17008 && to_clear_mask & (1LL << (regno + 1)))
17009 emit_move_insn (gen_rtx_REG (DFmode, regno++),
17010 CONST0_RTX (DFmode));
17011 else
17012 emit_move_insn (gen_rtx_REG (SFmode, regno),
17013 CONST0_RTX (SFmode));
17015 else
17016 emit_move_insn (gen_rtx_REG (SImode, regno), cleared_reg);
17019 seq = get_insns ();
17020 end_sequence ();
17021 emit_insn_before (seq, insn);
17027 /* Rewrite move insn into subtract of 0 if the condition codes will
17028 be useful in next conditional jump insn. */
17030 static void
17031 thumb1_reorg (void)
17033 basic_block bb;
17035 FOR_EACH_BB_FN (bb, cfun)
17037 rtx dest, src;
17038 rtx cmp, op0, op1, set = NULL;
17039 rtx_insn *prev, *insn = BB_END (bb);
17040 bool insn_clobbered = false;
17042 while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17043 insn = PREV_INSN (insn);
17045 /* Find the last cbranchsi4_insn in basic block BB. */
17046 if (insn == BB_HEAD (bb)
17047 || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17048 continue;
17050 /* Get the register with which we are comparing. */
17051 cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
17052 op0 = XEXP (cmp, 0);
17053 op1 = XEXP (cmp, 1);
17055 /* Check that comparison is against ZERO. */
17056 if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
17057 continue;
17059 /* Find the first flag setting insn before INSN in basic block BB. */
17060 gcc_assert (insn != BB_HEAD (bb));
17061 for (prev = PREV_INSN (insn);
17062 (!insn_clobbered
17063 && prev != BB_HEAD (bb)
17064 && (NOTE_P (prev)
17065 || DEBUG_INSN_P (prev)
17066 || ((set = single_set (prev)) != NULL
17067 && get_attr_conds (prev) == CONDS_NOCOND)));
17068 prev = PREV_INSN (prev))
17070 if (reg_set_p (op0, prev))
17071 insn_clobbered = true;
17074 /* Skip if op0 is clobbered by insn other than prev. */
17075 if (insn_clobbered)
17076 continue;
17078 if (!set)
17079 continue;
17081 dest = SET_DEST (set);
17082 src = SET_SRC (set);
17083 if (!low_register_operand (dest, SImode)
17084 || !low_register_operand (src, SImode))
17085 continue;
17087 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17088 in INSN. Both src and dest of the move insn are checked. */
17089 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17091 dest = copy_rtx (dest);
17092 src = copy_rtx (src);
17093 src = gen_rtx_MINUS (SImode, src, const0_rtx);
17094 PATTERN (prev) = gen_rtx_SET (dest, src);
17095 INSN_CODE (prev) = -1;
17096 /* Set test register in INSN to dest. */
17097 XEXP (cmp, 0) = copy_rtx (dest);
17098 INSN_CODE (insn) = -1;
17103 /* Convert instructions to their cc-clobbering variant if possible, since
17104 that allows us to use smaller encodings. */
17106 static void
17107 thumb2_reorg (void)
17109 basic_block bb;
17110 regset_head live;
17112 INIT_REG_SET (&live);
17114 /* We are freeing block_for_insn in the toplev to keep compatibility
17115 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17116 compute_bb_for_insn ();
17117 df_analyze ();
17119 enum Convert_Action {SKIP, CONV, SWAP_CONV};
17121 FOR_EACH_BB_FN (bb, cfun)
17123 if ((current_tune->disparage_flag_setting_t16_encodings
17124 == tune_params::DISPARAGE_FLAGS_ALL)
17125 && optimize_bb_for_speed_p (bb))
17126 continue;
17128 rtx_insn *insn;
17129 Convert_Action action = SKIP;
17130 Convert_Action action_for_partial_flag_setting
17131 = ((current_tune->disparage_flag_setting_t16_encodings
17132 != tune_params::DISPARAGE_FLAGS_NEITHER)
17133 && optimize_bb_for_speed_p (bb))
17134 ? SKIP : CONV;
17136 COPY_REG_SET (&live, DF_LR_OUT (bb));
17137 df_simulate_initialize_backwards (bb, &live);
17138 FOR_BB_INSNS_REVERSE (bb, insn)
17140 if (NONJUMP_INSN_P (insn)
17141 && !REGNO_REG_SET_P (&live, CC_REGNUM)
17142 && GET_CODE (PATTERN (insn)) == SET)
17144 action = SKIP;
17145 rtx pat = PATTERN (insn);
17146 rtx dst = XEXP (pat, 0);
17147 rtx src = XEXP (pat, 1);
17148 rtx op0 = NULL_RTX, op1 = NULL_RTX;
17150 if (UNARY_P (src) || BINARY_P (src))
17151 op0 = XEXP (src, 0);
17153 if (BINARY_P (src))
17154 op1 = XEXP (src, 1);
17156 if (low_register_operand (dst, SImode))
17158 switch (GET_CODE (src))
17160 case PLUS:
17161 /* Adding two registers and storing the result
17162 in the first source is already a 16-bit
17163 operation. */
17164 if (rtx_equal_p (dst, op0)
17165 && register_operand (op1, SImode))
17166 break;
17168 if (low_register_operand (op0, SImode))
17170 /* ADDS <Rd>,<Rn>,<Rm> */
17171 if (low_register_operand (op1, SImode))
17172 action = CONV;
17173 /* ADDS <Rdn>,#<imm8> */
17174 /* SUBS <Rdn>,#<imm8> */
17175 else if (rtx_equal_p (dst, op0)
17176 && CONST_INT_P (op1)
17177 && IN_RANGE (INTVAL (op1), -255, 255))
17178 action = CONV;
17179 /* ADDS <Rd>,<Rn>,#<imm3> */
17180 /* SUBS <Rd>,<Rn>,#<imm3> */
17181 else if (CONST_INT_P (op1)
17182 && IN_RANGE (INTVAL (op1), -7, 7))
17183 action = CONV;
17185 /* ADCS <Rd>, <Rn> */
17186 else if (GET_CODE (XEXP (src, 0)) == PLUS
17187 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17188 && low_register_operand (XEXP (XEXP (src, 0), 1),
17189 SImode)
17190 && COMPARISON_P (op1)
17191 && cc_register (XEXP (op1, 0), VOIDmode)
17192 && maybe_get_arm_condition_code (op1) == ARM_CS
17193 && XEXP (op1, 1) == const0_rtx)
17194 action = CONV;
17195 break;
17197 case MINUS:
17198 /* RSBS <Rd>,<Rn>,#0
17199 Not handled here: see NEG below. */
17200 /* SUBS <Rd>,<Rn>,#<imm3>
17201 SUBS <Rdn>,#<imm8>
17202 Not handled here: see PLUS above. */
17203 /* SUBS <Rd>,<Rn>,<Rm> */
17204 if (low_register_operand (op0, SImode)
17205 && low_register_operand (op1, SImode))
17206 action = CONV;
17207 break;
17209 case MULT:
17210 /* MULS <Rdm>,<Rn>,<Rdm>
17211 As an exception to the rule, this is only used
17212 when optimizing for size since MULS is slow on all
17213 known implementations. We do not even want to use
17214 MULS in cold code, if optimizing for speed, so we
17215 test the global flag here. */
17216 if (!optimize_size)
17217 break;
17218 /* Fall through. */
17219 case AND:
17220 case IOR:
17221 case XOR:
17222 /* ANDS <Rdn>,<Rm> */
17223 if (rtx_equal_p (dst, op0)
17224 && low_register_operand (op1, SImode))
17225 action = action_for_partial_flag_setting;
17226 else if (rtx_equal_p (dst, op1)
17227 && low_register_operand (op0, SImode))
17228 action = action_for_partial_flag_setting == SKIP
17229 ? SKIP : SWAP_CONV;
17230 break;
17232 case ASHIFTRT:
17233 case ASHIFT:
17234 case LSHIFTRT:
17235 /* ASRS <Rdn>,<Rm> */
17236 /* LSRS <Rdn>,<Rm> */
17237 /* LSLS <Rdn>,<Rm> */
17238 if (rtx_equal_p (dst, op0)
17239 && low_register_operand (op1, SImode))
17240 action = action_for_partial_flag_setting;
17241 /* ASRS <Rd>,<Rm>,#<imm5> */
17242 /* LSRS <Rd>,<Rm>,#<imm5> */
17243 /* LSLS <Rd>,<Rm>,#<imm5> */
17244 else if (low_register_operand (op0, SImode)
17245 && CONST_INT_P (op1)
17246 && IN_RANGE (INTVAL (op1), 0, 31))
17247 action = action_for_partial_flag_setting;
17248 break;
17250 case ROTATERT:
17251 /* RORS <Rdn>,<Rm> */
17252 if (rtx_equal_p (dst, op0)
17253 && low_register_operand (op1, SImode))
17254 action = action_for_partial_flag_setting;
17255 break;
17257 case NOT:
17258 /* MVNS <Rd>,<Rm> */
17259 if (low_register_operand (op0, SImode))
17260 action = action_for_partial_flag_setting;
17261 break;
17263 case NEG:
17264 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17265 if (low_register_operand (op0, SImode))
17266 action = CONV;
17267 break;
17269 case CONST_INT:
17270 /* MOVS <Rd>,#<imm8> */
17271 if (CONST_INT_P (src)
17272 && IN_RANGE (INTVAL (src), 0, 255))
17273 action = action_for_partial_flag_setting;
17274 break;
17276 case REG:
17277 /* MOVS and MOV<c> with registers have different
17278 encodings, so are not relevant here. */
17279 break;
17281 default:
17282 break;
17286 if (action != SKIP)
17288 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17289 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17290 rtvec vec;
17292 if (action == SWAP_CONV)
17294 src = copy_rtx (src);
17295 XEXP (src, 0) = op1;
17296 XEXP (src, 1) = op0;
17297 pat = gen_rtx_SET (dst, src);
17298 vec = gen_rtvec (2, pat, clobber);
17300 else /* action == CONV */
17301 vec = gen_rtvec (2, pat, clobber);
17303 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17304 INSN_CODE (insn) = -1;
17308 if (NONDEBUG_INSN_P (insn))
17309 df_simulate_one_insn_backwards (bb, insn, &live);
17313 CLEAR_REG_SET (&live);
17316 /* Gcc puts the pool in the wrong place for ARM, since we can only
17317 load addresses a limited distance around the pc. We do some
17318 special munging to move the constant pool values to the correct
17319 point in the code. */
17320 static void
17321 arm_reorg (void)
17323 rtx_insn *insn;
17324 HOST_WIDE_INT address = 0;
17325 Mfix * fix;
17327 if (use_cmse)
17328 cmse_nonsecure_call_clear_caller_saved ();
17329 if (TARGET_THUMB1)
17330 thumb1_reorg ();
17331 else if (TARGET_THUMB2)
17332 thumb2_reorg ();
17334 /* Ensure all insns that must be split have been split at this point.
17335 Otherwise, the pool placement code below may compute incorrect
17336 insn lengths. Note that when optimizing, all insns have already
17337 been split at this point. */
17338 if (!optimize)
17339 split_all_insns_noflow ();
17341 minipool_fix_head = minipool_fix_tail = NULL;
17343 /* The first insn must always be a note, or the code below won't
17344 scan it properly. */
17345 insn = get_insns ();
17346 gcc_assert (NOTE_P (insn));
17347 minipool_pad = 0;
17349 /* Scan all the insns and record the operands that will need fixing. */
17350 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17352 if (BARRIER_P (insn))
17353 push_minipool_barrier (insn, address);
17354 else if (INSN_P (insn))
17356 rtx_jump_table_data *table;
17358 note_invalid_constants (insn, address, true);
17359 address += get_attr_length (insn);
17361 /* If the insn is a vector jump, add the size of the table
17362 and skip the table. */
17363 if (tablejump_p (insn, NULL, &table))
17365 address += get_jump_table_size (table);
17366 insn = table;
17369 else if (LABEL_P (insn))
17370 /* Add the worst-case padding due to alignment. We don't add
17371 the _current_ padding because the minipool insertions
17372 themselves might change it. */
17373 address += get_label_padding (insn);
17376 fix = minipool_fix_head;
17378 /* Now scan the fixups and perform the required changes. */
17379 while (fix)
17381 Mfix * ftmp;
17382 Mfix * fdel;
17383 Mfix * last_added_fix;
17384 Mfix * last_barrier = NULL;
17385 Mfix * this_fix;
17387 /* Skip any further barriers before the next fix. */
17388 while (fix && BARRIER_P (fix->insn))
17389 fix = fix->next;
17391 /* No more fixes. */
17392 if (fix == NULL)
17393 break;
17395 last_added_fix = NULL;
17397 for (ftmp = fix; ftmp; ftmp = ftmp->next)
17399 if (BARRIER_P (ftmp->insn))
17401 if (ftmp->address >= minipool_vector_head->max_address)
17402 break;
17404 last_barrier = ftmp;
17406 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17407 break;
17409 last_added_fix = ftmp; /* Keep track of the last fix added. */
17412 /* If we found a barrier, drop back to that; any fixes that we
17413 could have reached but come after the barrier will now go in
17414 the next mini-pool. */
17415 if (last_barrier != NULL)
17417 /* Reduce the refcount for those fixes that won't go into this
17418 pool after all. */
17419 for (fdel = last_barrier->next;
17420 fdel && fdel != ftmp;
17421 fdel = fdel->next)
17423 fdel->minipool->refcount--;
17424 fdel->minipool = NULL;
17427 ftmp = last_barrier;
17429 else
17431 /* ftmp is first fix that we can't fit into this pool and
17432 there no natural barriers that we could use. Insert a
17433 new barrier in the code somewhere between the previous
17434 fix and this one, and arrange to jump around it. */
17435 HOST_WIDE_INT max_address;
17437 /* The last item on the list of fixes must be a barrier, so
17438 we can never run off the end of the list of fixes without
17439 last_barrier being set. */
17440 gcc_assert (ftmp);
17442 max_address = minipool_vector_head->max_address;
17443 /* Check that there isn't another fix that is in range that
17444 we couldn't fit into this pool because the pool was
17445 already too large: we need to put the pool before such an
17446 instruction. The pool itself may come just after the
17447 fix because create_fix_barrier also allows space for a
17448 jump instruction. */
17449 if (ftmp->address < max_address)
17450 max_address = ftmp->address + 1;
17452 last_barrier = create_fix_barrier (last_added_fix, max_address);
17455 assign_minipool_offsets (last_barrier);
17457 while (ftmp)
17459 if (!BARRIER_P (ftmp->insn)
17460 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17461 == NULL))
17462 break;
17464 ftmp = ftmp->next;
17467 /* Scan over the fixes we have identified for this pool, fixing them
17468 up and adding the constants to the pool itself. */
17469 for (this_fix = fix; this_fix && ftmp != this_fix;
17470 this_fix = this_fix->next)
17471 if (!BARRIER_P (this_fix->insn))
17473 rtx addr
17474 = plus_constant (Pmode,
17475 gen_rtx_LABEL_REF (VOIDmode,
17476 minipool_vector_label),
17477 this_fix->minipool->offset);
17478 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17481 dump_minipool (last_barrier->insn);
17482 fix = ftmp;
17485 /* From now on we must synthesize any constants that we can't handle
17486 directly. This can happen if the RTL gets split during final
17487 instruction generation. */
17488 cfun->machine->after_arm_reorg = 1;
17490 /* Free the minipool memory. */
17491 obstack_free (&minipool_obstack, minipool_startobj);
17494 /* Routines to output assembly language. */
17496 /* Return string representation of passed in real value. */
17497 static const char *
17498 fp_const_from_val (REAL_VALUE_TYPE *r)
17500 if (!fp_consts_inited)
17501 init_fp_table ();
17503 gcc_assert (real_equal (r, &value_fp0));
17504 return "0";
17507 /* OPERANDS[0] is the entire list of insns that constitute pop,
17508 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17509 is in the list, UPDATE is true iff the list contains explicit
17510 update of base register. */
17511 void
17512 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17513 bool update)
17515 int i;
17516 char pattern[100];
17517 int offset;
17518 const char *conditional;
17519 int num_saves = XVECLEN (operands[0], 0);
17520 unsigned int regno;
17521 unsigned int regno_base = REGNO (operands[1]);
17522 bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
17524 offset = 0;
17525 offset += update ? 1 : 0;
17526 offset += return_pc ? 1 : 0;
17528 /* Is the base register in the list? */
17529 for (i = offset; i < num_saves; i++)
17531 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17532 /* If SP is in the list, then the base register must be SP. */
17533 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17534 /* If base register is in the list, there must be no explicit update. */
17535 if (regno == regno_base)
17536 gcc_assert (!update);
17539 conditional = reverse ? "%?%D0" : "%?%d0";
17540 /* Can't use POP if returning from an interrupt. */
17541 if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
17542 sprintf (pattern, "pop%s\t{", conditional);
17543 else
17545 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17546 It's just a convention, their semantics are identical. */
17547 if (regno_base == SP_REGNUM)
17548 sprintf (pattern, "ldmfd%s\t", conditional);
17549 else if (update)
17550 sprintf (pattern, "ldmia%s\t", conditional);
17551 else
17552 sprintf (pattern, "ldm%s\t", conditional);
17554 strcat (pattern, reg_names[regno_base]);
17555 if (update)
17556 strcat (pattern, "!, {");
17557 else
17558 strcat (pattern, ", {");
17561 /* Output the first destination register. */
17562 strcat (pattern,
17563 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17565 /* Output the rest of the destination registers. */
17566 for (i = offset + 1; i < num_saves; i++)
17568 strcat (pattern, ", ");
17569 strcat (pattern,
17570 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17573 strcat (pattern, "}");
17575 if (interrupt_p && return_pc)
17576 strcat (pattern, "^");
17578 output_asm_insn (pattern, &cond);
17582 /* Output the assembly for a store multiple. */
17584 const char *
17585 vfp_output_vstmd (rtx * operands)
17587 char pattern[100];
17588 int p;
17589 int base;
17590 int i;
17591 rtx addr_reg = REG_P (XEXP (operands[0], 0))
17592 ? XEXP (operands[0], 0)
17593 : XEXP (XEXP (operands[0], 0), 0);
17594 bool push_p = REGNO (addr_reg) == SP_REGNUM;
17596 if (push_p)
17597 strcpy (pattern, "vpush%?.64\t{%P1");
17598 else
17599 strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
17601 p = strlen (pattern);
17603 gcc_assert (REG_P (operands[1]));
17605 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17606 for (i = 1; i < XVECLEN (operands[2], 0); i++)
17608 p += sprintf (&pattern[p], ", d%d", base + i);
17610 strcpy (&pattern[p], "}");
17612 output_asm_insn (pattern, operands);
17613 return "";
17617 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17618 number of bytes pushed. */
17620 static int
17621 vfp_emit_fstmd (int base_reg, int count)
17623 rtx par;
17624 rtx dwarf;
17625 rtx tmp, reg;
17626 int i;
17628 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17629 register pairs are stored by a store multiple insn. We avoid this
17630 by pushing an extra pair. */
17631 if (count == 2 && !arm_arch6)
17633 if (base_reg == LAST_VFP_REGNUM - 3)
17634 base_reg -= 2;
17635 count++;
17638 /* FSTMD may not store more than 16 doubleword registers at once. Split
17639 larger stores into multiple parts (up to a maximum of two, in
17640 practice). */
17641 if (count > 16)
17643 int saved;
17644 /* NOTE: base_reg is an internal register number, so each D register
17645 counts as 2. */
17646 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17647 saved += vfp_emit_fstmd (base_reg, 16);
17648 return saved;
17651 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17652 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17654 reg = gen_rtx_REG (DFmode, base_reg);
17655 base_reg += 2;
17657 XVECEXP (par, 0, 0)
17658 = gen_rtx_SET (gen_frame_mem
17659 (BLKmode,
17660 gen_rtx_PRE_MODIFY (Pmode,
17661 stack_pointer_rtx,
17662 plus_constant
17663 (Pmode, stack_pointer_rtx,
17664 - (count * 8)))
17666 gen_rtx_UNSPEC (BLKmode,
17667 gen_rtvec (1, reg),
17668 UNSPEC_PUSH_MULT));
17670 tmp = gen_rtx_SET (stack_pointer_rtx,
17671 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17672 RTX_FRAME_RELATED_P (tmp) = 1;
17673 XVECEXP (dwarf, 0, 0) = tmp;
17675 tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
17676 RTX_FRAME_RELATED_P (tmp) = 1;
17677 XVECEXP (dwarf, 0, 1) = tmp;
17679 for (i = 1; i < count; i++)
17681 reg = gen_rtx_REG (DFmode, base_reg);
17682 base_reg += 2;
17683 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
17685 tmp = gen_rtx_SET (gen_frame_mem (DFmode,
17686 plus_constant (Pmode,
17687 stack_pointer_rtx,
17688 i * 8)),
17689 reg);
17690 RTX_FRAME_RELATED_P (tmp) = 1;
17691 XVECEXP (dwarf, 0, i + 1) = tmp;
17694 par = emit_insn (par);
17695 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17696 RTX_FRAME_RELATED_P (par) = 1;
17698 return count * 8;
17701 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
17702 has the cmse_nonsecure_call attribute and returns false otherwise. */
17704 bool
17705 detect_cmse_nonsecure_call (tree addr)
17707 if (!addr)
17708 return FALSE;
17710 tree fntype = TREE_TYPE (addr);
17711 if (use_cmse && lookup_attribute ("cmse_nonsecure_call",
17712 TYPE_ATTRIBUTES (fntype)))
17713 return TRUE;
17714 return FALSE;
17718 /* Emit a call instruction with pattern PAT. ADDR is the address of
17719 the call target. */
17721 void
17722 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
17724 rtx insn;
17726 insn = emit_call_insn (pat);
17728 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17729 If the call might use such an entry, add a use of the PIC register
17730 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17731 if (TARGET_VXWORKS_RTP
17732 && flag_pic
17733 && !sibcall
17734 && GET_CODE (addr) == SYMBOL_REF
17735 && (SYMBOL_REF_DECL (addr)
17736 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
17737 : !SYMBOL_REF_LOCAL_P (addr)))
17739 require_pic_register ();
17740 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
17743 if (TARGET_AAPCS_BASED)
17745 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17746 linker. We need to add an IP clobber to allow setting
17747 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
17748 is not needed since it's a fixed register. */
17749 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
17750 clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
17754 /* Output a 'call' insn. */
17755 const char *
17756 output_call (rtx *operands)
17758 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
17760 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17761 if (REGNO (operands[0]) == LR_REGNUM)
17763 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
17764 output_asm_insn ("mov%?\t%0, %|lr", operands);
17767 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17769 if (TARGET_INTERWORK || arm_arch4t)
17770 output_asm_insn ("bx%?\t%0", operands);
17771 else
17772 output_asm_insn ("mov%?\t%|pc, %0", operands);
17774 return "";
17777 /* Output a move from arm registers to arm registers of a long double
17778 OPERANDS[0] is the destination.
17779 OPERANDS[1] is the source. */
17780 const char *
17781 output_mov_long_double_arm_from_arm (rtx *operands)
17783 /* We have to be careful here because the two might overlap. */
17784 int dest_start = REGNO (operands[0]);
17785 int src_start = REGNO (operands[1]);
17786 rtx ops[2];
17787 int i;
17789 if (dest_start < src_start)
17791 for (i = 0; i < 3; i++)
17793 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17794 ops[1] = gen_rtx_REG (SImode, src_start + i);
17795 output_asm_insn ("mov%?\t%0, %1", ops);
17798 else
17800 for (i = 2; i >= 0; i--)
17802 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17803 ops[1] = gen_rtx_REG (SImode, src_start + i);
17804 output_asm_insn ("mov%?\t%0, %1", ops);
17808 return "";
17811 void
17812 arm_emit_movpair (rtx dest, rtx src)
17814 /* If the src is an immediate, simplify it. */
17815 if (CONST_INT_P (src))
17817 HOST_WIDE_INT val = INTVAL (src);
17818 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
17819 if ((val >> 16) & 0x0000ffff)
17821 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
17822 GEN_INT (16)),
17823 GEN_INT ((val >> 16) & 0x0000ffff));
17824 rtx_insn *insn = get_last_insn ();
17825 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
17827 return;
17829 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
17830 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
17831 rtx_insn *insn = get_last_insn ();
17832 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
17835 /* Output a move between double words. It must be REG<-MEM
17836 or MEM<-REG. */
17837 const char *
17838 output_move_double (rtx *operands, bool emit, int *count)
17840 enum rtx_code code0 = GET_CODE (operands[0]);
17841 enum rtx_code code1 = GET_CODE (operands[1]);
17842 rtx otherops[3];
17843 if (count)
17844 *count = 1;
17846 /* The only case when this might happen is when
17847 you are looking at the length of a DImode instruction
17848 that has an invalid constant in it. */
17849 if (code0 == REG && code1 != MEM)
17851 gcc_assert (!emit);
17852 *count = 2;
17853 return "";
17856 if (code0 == REG)
17858 unsigned int reg0 = REGNO (operands[0]);
17860 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
17862 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
17864 switch (GET_CODE (XEXP (operands[1], 0)))
17866 case REG:
17868 if (emit)
17870 if (TARGET_LDRD
17871 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
17872 output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
17873 else
17874 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
17876 break;
17878 case PRE_INC:
17879 gcc_assert (TARGET_LDRD);
17880 if (emit)
17881 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
17882 break;
17884 case PRE_DEC:
17885 if (emit)
17887 if (TARGET_LDRD)
17888 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
17889 else
17890 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
17892 break;
17894 case POST_INC:
17895 if (emit)
17897 if (TARGET_LDRD)
17898 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
17899 else
17900 output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
17902 break;
17904 case POST_DEC:
17905 gcc_assert (TARGET_LDRD);
17906 if (emit)
17907 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
17908 break;
17910 case PRE_MODIFY:
17911 case POST_MODIFY:
17912 /* Autoicrement addressing modes should never have overlapping
17913 base and destination registers, and overlapping index registers
17914 are already prohibited, so this doesn't need to worry about
17915 fix_cm3_ldrd. */
17916 otherops[0] = operands[0];
17917 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
17918 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
17920 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
17922 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
17924 /* Registers overlap so split out the increment. */
17925 if (emit)
17927 output_asm_insn ("add%?\t%1, %1, %2", otherops);
17928 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
17930 if (count)
17931 *count = 2;
17933 else
17935 /* Use a single insn if we can.
17936 FIXME: IWMMXT allows offsets larger than ldrd can
17937 handle, fix these up with a pair of ldr. */
17938 if (TARGET_THUMB2
17939 || !CONST_INT_P (otherops[2])
17940 || (INTVAL (otherops[2]) > -256
17941 && INTVAL (otherops[2]) < 256))
17943 if (emit)
17944 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
17946 else
17948 if (emit)
17950 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
17951 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
17953 if (count)
17954 *count = 2;
17959 else
17961 /* Use a single insn if we can.
17962 FIXME: IWMMXT allows offsets larger than ldrd can handle,
17963 fix these up with a pair of ldr. */
17964 if (TARGET_THUMB2
17965 || !CONST_INT_P (otherops[2])
17966 || (INTVAL (otherops[2]) > -256
17967 && INTVAL (otherops[2]) < 256))
17969 if (emit)
17970 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
17972 else
17974 if (emit)
17976 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
17977 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
17979 if (count)
17980 *count = 2;
17983 break;
17985 case LABEL_REF:
17986 case CONST:
17987 /* We might be able to use ldrd %0, %1 here. However the range is
17988 different to ldr/adr, and it is broken on some ARMv7-M
17989 implementations. */
17990 /* Use the second register of the pair to avoid problematic
17991 overlap. */
17992 otherops[1] = operands[1];
17993 if (emit)
17994 output_asm_insn ("adr%?\t%0, %1", otherops);
17995 operands[1] = otherops[0];
17996 if (emit)
17998 if (TARGET_LDRD)
17999 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18000 else
18001 output_asm_insn ("ldmia%?\t%1, %M0", operands);
18004 if (count)
18005 *count = 2;
18006 break;
18008 /* ??? This needs checking for thumb2. */
18009 default:
18010 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18011 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18013 otherops[0] = operands[0];
18014 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18015 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18017 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18019 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18021 switch ((int) INTVAL (otherops[2]))
18023 case -8:
18024 if (emit)
18025 output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
18026 return "";
18027 case -4:
18028 if (TARGET_THUMB2)
18029 break;
18030 if (emit)
18031 output_asm_insn ("ldmda%?\t%1, %M0", otherops);
18032 return "";
18033 case 4:
18034 if (TARGET_THUMB2)
18035 break;
18036 if (emit)
18037 output_asm_insn ("ldmib%?\t%1, %M0", otherops);
18038 return "";
18041 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18042 operands[1] = otherops[0];
18043 if (TARGET_LDRD
18044 && (REG_P (otherops[2])
18045 || TARGET_THUMB2
18046 || (CONST_INT_P (otherops[2])
18047 && INTVAL (otherops[2]) > -256
18048 && INTVAL (otherops[2]) < 256)))
18050 if (reg_overlap_mentioned_p (operands[0],
18051 otherops[2]))
18053 /* Swap base and index registers over to
18054 avoid a conflict. */
18055 std::swap (otherops[1], otherops[2]);
18057 /* If both registers conflict, it will usually
18058 have been fixed by a splitter. */
18059 if (reg_overlap_mentioned_p (operands[0], otherops[2])
18060 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18062 if (emit)
18064 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18065 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18067 if (count)
18068 *count = 2;
18070 else
18072 otherops[0] = operands[0];
18073 if (emit)
18074 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
18076 return "";
18079 if (CONST_INT_P (otherops[2]))
18081 if (emit)
18083 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18084 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18085 else
18086 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18089 else
18091 if (emit)
18092 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18095 else
18097 if (emit)
18098 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18101 if (count)
18102 *count = 2;
18104 if (TARGET_LDRD)
18105 return "ldrd%?\t%0, [%1]";
18107 return "ldmia%?\t%1, %M0";
18109 else
18111 otherops[1] = adjust_address (operands[1], SImode, 4);
18112 /* Take care of overlapping base/data reg. */
18113 if (reg_mentioned_p (operands[0], operands[1]))
18115 if (emit)
18117 output_asm_insn ("ldr%?\t%0, %1", otherops);
18118 output_asm_insn ("ldr%?\t%0, %1", operands);
18120 if (count)
18121 *count = 2;
18124 else
18126 if (emit)
18128 output_asm_insn ("ldr%?\t%0, %1", operands);
18129 output_asm_insn ("ldr%?\t%0, %1", otherops);
18131 if (count)
18132 *count = 2;
18137 else
18139 /* Constraints should ensure this. */
18140 gcc_assert (code0 == MEM && code1 == REG);
18141 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18142 || (TARGET_ARM && TARGET_LDRD));
18144 switch (GET_CODE (XEXP (operands[0], 0)))
18146 case REG:
18147 if (emit)
18149 if (TARGET_LDRD)
18150 output_asm_insn ("strd%?\t%1, [%m0]", operands);
18151 else
18152 output_asm_insn ("stm%?\t%m0, %M1", operands);
18154 break;
18156 case PRE_INC:
18157 gcc_assert (TARGET_LDRD);
18158 if (emit)
18159 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
18160 break;
18162 case PRE_DEC:
18163 if (emit)
18165 if (TARGET_LDRD)
18166 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
18167 else
18168 output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
18170 break;
18172 case POST_INC:
18173 if (emit)
18175 if (TARGET_LDRD)
18176 output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
18177 else
18178 output_asm_insn ("stm%?\t%m0!, %M1", operands);
18180 break;
18182 case POST_DEC:
18183 gcc_assert (TARGET_LDRD);
18184 if (emit)
18185 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
18186 break;
18188 case PRE_MODIFY:
18189 case POST_MODIFY:
18190 otherops[0] = operands[1];
18191 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18192 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18194 /* IWMMXT allows offsets larger than ldrd can handle,
18195 fix these up with a pair of ldr. */
18196 if (!TARGET_THUMB2
18197 && CONST_INT_P (otherops[2])
18198 && (INTVAL(otherops[2]) <= -256
18199 || INTVAL(otherops[2]) >= 256))
18201 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18203 if (emit)
18205 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18206 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18208 if (count)
18209 *count = 2;
18211 else
18213 if (emit)
18215 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18216 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18218 if (count)
18219 *count = 2;
18222 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18224 if (emit)
18225 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
18227 else
18229 if (emit)
18230 output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
18232 break;
18234 case PLUS:
18235 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18236 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18238 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18240 case -8:
18241 if (emit)
18242 output_asm_insn ("stmdb%?\t%m0, %M1", operands);
18243 return "";
18245 case -4:
18246 if (TARGET_THUMB2)
18247 break;
18248 if (emit)
18249 output_asm_insn ("stmda%?\t%m0, %M1", operands);
18250 return "";
18252 case 4:
18253 if (TARGET_THUMB2)
18254 break;
18255 if (emit)
18256 output_asm_insn ("stmib%?\t%m0, %M1", operands);
18257 return "";
18260 if (TARGET_LDRD
18261 && (REG_P (otherops[2])
18262 || TARGET_THUMB2
18263 || (CONST_INT_P (otherops[2])
18264 && INTVAL (otherops[2]) > -256
18265 && INTVAL (otherops[2]) < 256)))
18267 otherops[0] = operands[1];
18268 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18269 if (emit)
18270 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
18271 return "";
18273 /* Fall through */
18275 default:
18276 otherops[0] = adjust_address (operands[0], SImode, 4);
18277 otherops[1] = operands[1];
18278 if (emit)
18280 output_asm_insn ("str%?\t%1, %0", operands);
18281 output_asm_insn ("str%?\t%H1, %0", otherops);
18283 if (count)
18284 *count = 2;
18288 return "";
18291 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18292 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18294 const char *
18295 output_move_quad (rtx *operands)
18297 if (REG_P (operands[0]))
18299 /* Load, or reg->reg move. */
18301 if (MEM_P (operands[1]))
18303 switch (GET_CODE (XEXP (operands[1], 0)))
18305 case REG:
18306 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18307 break;
18309 case LABEL_REF:
18310 case CONST:
18311 output_asm_insn ("adr%?\t%0, %1", operands);
18312 output_asm_insn ("ldmia%?\t%0, %M0", operands);
18313 break;
18315 default:
18316 gcc_unreachable ();
18319 else
18321 rtx ops[2];
18322 int dest, src, i;
18324 gcc_assert (REG_P (operands[1]));
18326 dest = REGNO (operands[0]);
18327 src = REGNO (operands[1]);
18329 /* This seems pretty dumb, but hopefully GCC won't try to do it
18330 very often. */
18331 if (dest < src)
18332 for (i = 0; i < 4; i++)
18334 ops[0] = gen_rtx_REG (SImode, dest + i);
18335 ops[1] = gen_rtx_REG (SImode, src + i);
18336 output_asm_insn ("mov%?\t%0, %1", ops);
18338 else
18339 for (i = 3; i >= 0; i--)
18341 ops[0] = gen_rtx_REG (SImode, dest + i);
18342 ops[1] = gen_rtx_REG (SImode, src + i);
18343 output_asm_insn ("mov%?\t%0, %1", ops);
18347 else
18349 gcc_assert (MEM_P (operands[0]));
18350 gcc_assert (REG_P (operands[1]));
18351 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18353 switch (GET_CODE (XEXP (operands[0], 0)))
18355 case REG:
18356 output_asm_insn ("stm%?\t%m0, %M1", operands);
18357 break;
18359 default:
18360 gcc_unreachable ();
18364 return "";
18367 /* Output a VFP load or store instruction. */
18369 const char *
18370 output_move_vfp (rtx *operands)
18372 rtx reg, mem, addr, ops[2];
18373 int load = REG_P (operands[0]);
18374 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18375 int sp = (!TARGET_VFP_FP16INST
18376 || GET_MODE_SIZE (GET_MODE (operands[0])) == 4);
18377 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18378 const char *templ;
18379 char buff[50];
18380 machine_mode mode;
18382 reg = operands[!load];
18383 mem = operands[load];
18385 mode = GET_MODE (reg);
18387 gcc_assert (REG_P (reg));
18388 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18389 gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT)
18390 || mode == SFmode
18391 || mode == DFmode
18392 || mode == HImode
18393 || mode == SImode
18394 || mode == DImode
18395 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18396 gcc_assert (MEM_P (mem));
18398 addr = XEXP (mem, 0);
18400 switch (GET_CODE (addr))
18402 case PRE_DEC:
18403 templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18404 ops[0] = XEXP (addr, 0);
18405 ops[1] = reg;
18406 break;
18408 case POST_INC:
18409 templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18410 ops[0] = XEXP (addr, 0);
18411 ops[1] = reg;
18412 break;
18414 default:
18415 templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18416 ops[0] = reg;
18417 ops[1] = mem;
18418 break;
18421 sprintf (buff, templ,
18422 load ? "ld" : "st",
18423 dp ? "64" : sp ? "32" : "16",
18424 dp ? "P" : "",
18425 integer_p ? "\t%@ int" : "");
18426 output_asm_insn (buff, ops);
18428 return "";
18431 /* Output a Neon double-word or quad-word load or store, or a load
18432 or store for larger structure modes.
18434 WARNING: The ordering of elements is weird in big-endian mode,
18435 because the EABI requires that vectors stored in memory appear
18436 as though they were stored by a VSTM, as required by the EABI.
18437 GCC RTL defines element ordering based on in-memory order.
18438 This can be different from the architectural ordering of elements
18439 within a NEON register. The intrinsics defined in arm_neon.h use the
18440 NEON register element ordering, not the GCC RTL element ordering.
18442 For example, the in-memory ordering of a big-endian a quadword
18443 vector with 16-bit elements when stored from register pair {d0,d1}
18444 will be (lowest address first, d0[N] is NEON register element N):
18446 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18448 When necessary, quadword registers (dN, dN+1) are moved to ARM
18449 registers from rN in the order:
18451 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18453 So that STM/LDM can be used on vectors in ARM registers, and the
18454 same memory layout will result as if VSTM/VLDM were used.
18456 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18457 possible, which allows use of appropriate alignment tags.
18458 Note that the choice of "64" is independent of the actual vector
18459 element size; this size simply ensures that the behavior is
18460 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18462 Due to limitations of those instructions, use of VST1.64/VLD1.64
18463 is not possible if:
18464 - the address contains PRE_DEC, or
18465 - the mode refers to more than 4 double-word registers
18467 In those cases, it would be possible to replace VSTM/VLDM by a
18468 sequence of instructions; this is not currently implemented since
18469 this is not certain to actually improve performance. */
18471 const char *
18472 output_move_neon (rtx *operands)
18474 rtx reg, mem, addr, ops[2];
18475 int regno, nregs, load = REG_P (operands[0]);
18476 const char *templ;
18477 char buff[50];
18478 machine_mode mode;
18480 reg = operands[!load];
18481 mem = operands[load];
18483 mode = GET_MODE (reg);
18485 gcc_assert (REG_P (reg));
18486 regno = REGNO (reg);
18487 nregs = HARD_REGNO_NREGS (regno, mode) / 2;
18488 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18489 || NEON_REGNO_OK_FOR_QUAD (regno));
18490 gcc_assert (VALID_NEON_DREG_MODE (mode)
18491 || VALID_NEON_QREG_MODE (mode)
18492 || VALID_NEON_STRUCT_MODE (mode));
18493 gcc_assert (MEM_P (mem));
18495 addr = XEXP (mem, 0);
18497 /* Strip off const from addresses like (const (plus (...))). */
18498 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18499 addr = XEXP (addr, 0);
18501 switch (GET_CODE (addr))
18503 case POST_INC:
18504 /* We have to use vldm / vstm for too-large modes. */
18505 if (nregs > 4)
18507 templ = "v%smia%%?\t%%0!, %%h1";
18508 ops[0] = XEXP (addr, 0);
18510 else
18512 templ = "v%s1.64\t%%h1, %%A0";
18513 ops[0] = mem;
18515 ops[1] = reg;
18516 break;
18518 case PRE_DEC:
18519 /* We have to use vldm / vstm in this case, since there is no
18520 pre-decrement form of the vld1 / vst1 instructions. */
18521 templ = "v%smdb%%?\t%%0!, %%h1";
18522 ops[0] = XEXP (addr, 0);
18523 ops[1] = reg;
18524 break;
18526 case POST_MODIFY:
18527 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18528 gcc_unreachable ();
18530 case REG:
18531 /* We have to use vldm / vstm for too-large modes. */
18532 if (nregs > 1)
18534 if (nregs > 4)
18535 templ = "v%smia%%?\t%%m0, %%h1";
18536 else
18537 templ = "v%s1.64\t%%h1, %%A0";
18539 ops[0] = mem;
18540 ops[1] = reg;
18541 break;
18543 /* Fall through. */
18544 case LABEL_REF:
18545 case PLUS:
18547 int i;
18548 int overlap = -1;
18549 for (i = 0; i < nregs; i++)
18551 /* We're only using DImode here because it's a convenient size. */
18552 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18553 ops[1] = adjust_address (mem, DImode, 8 * i);
18554 if (reg_overlap_mentioned_p (ops[0], mem))
18556 gcc_assert (overlap == -1);
18557 overlap = i;
18559 else
18561 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18562 output_asm_insn (buff, ops);
18565 if (overlap != -1)
18567 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18568 ops[1] = adjust_address (mem, SImode, 8 * overlap);
18569 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18570 output_asm_insn (buff, ops);
18573 return "";
18576 default:
18577 gcc_unreachable ();
18580 sprintf (buff, templ, load ? "ld" : "st");
18581 output_asm_insn (buff, ops);
18583 return "";
18586 /* Compute and return the length of neon_mov<mode>, where <mode> is
18587 one of VSTRUCT modes: EI, OI, CI or XI. */
18589 arm_attr_length_move_neon (rtx_insn *insn)
18591 rtx reg, mem, addr;
18592 int load;
18593 machine_mode mode;
18595 extract_insn_cached (insn);
18597 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18599 mode = GET_MODE (recog_data.operand[0]);
18600 switch (mode)
18602 case EImode:
18603 case OImode:
18604 return 8;
18605 case CImode:
18606 return 12;
18607 case XImode:
18608 return 16;
18609 default:
18610 gcc_unreachable ();
18614 load = REG_P (recog_data.operand[0]);
18615 reg = recog_data.operand[!load];
18616 mem = recog_data.operand[load];
18618 gcc_assert (MEM_P (mem));
18620 mode = GET_MODE (reg);
18621 addr = XEXP (mem, 0);
18623 /* Strip off const from addresses like (const (plus (...))). */
18624 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18625 addr = XEXP (addr, 0);
18627 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18629 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
18630 return insns * 4;
18632 else
18633 return 4;
18636 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18637 return zero. */
18640 arm_address_offset_is_imm (rtx_insn *insn)
18642 rtx mem, addr;
18644 extract_insn_cached (insn);
18646 if (REG_P (recog_data.operand[0]))
18647 return 0;
18649 mem = recog_data.operand[0];
18651 gcc_assert (MEM_P (mem));
18653 addr = XEXP (mem, 0);
18655 if (REG_P (addr)
18656 || (GET_CODE (addr) == PLUS
18657 && REG_P (XEXP (addr, 0))
18658 && CONST_INT_P (XEXP (addr, 1))))
18659 return 1;
18660 else
18661 return 0;
18664 /* Output an ADD r, s, #n where n may be too big for one instruction.
18665 If adding zero to one register, output nothing. */
18666 const char *
18667 output_add_immediate (rtx *operands)
18669 HOST_WIDE_INT n = INTVAL (operands[2]);
18671 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18673 if (n < 0)
18674 output_multi_immediate (operands,
18675 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18676 -n);
18677 else
18678 output_multi_immediate (operands,
18679 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18683 return "";
18686 /* Output a multiple immediate operation.
18687 OPERANDS is the vector of operands referred to in the output patterns.
18688 INSTR1 is the output pattern to use for the first constant.
18689 INSTR2 is the output pattern to use for subsequent constants.
18690 IMMED_OP is the index of the constant slot in OPERANDS.
18691 N is the constant value. */
18692 static const char *
18693 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18694 int immed_op, HOST_WIDE_INT n)
18696 #if HOST_BITS_PER_WIDE_INT > 32
18697 n &= 0xffffffff;
18698 #endif
18700 if (n == 0)
18702 /* Quick and easy output. */
18703 operands[immed_op] = const0_rtx;
18704 output_asm_insn (instr1, operands);
18706 else
18708 int i;
18709 const char * instr = instr1;
18711 /* Note that n is never zero here (which would give no output). */
18712 for (i = 0; i < 32; i += 2)
18714 if (n & (3 << i))
18716 operands[immed_op] = GEN_INT (n & (255 << i));
18717 output_asm_insn (instr, operands);
18718 instr = instr2;
18719 i += 6;
18724 return "";
18727 /* Return the name of a shifter operation. */
18728 static const char *
18729 arm_shift_nmem(enum rtx_code code)
18731 switch (code)
18733 case ASHIFT:
18734 return ARM_LSL_NAME;
18736 case ASHIFTRT:
18737 return "asr";
18739 case LSHIFTRT:
18740 return "lsr";
18742 case ROTATERT:
18743 return "ror";
18745 default:
18746 abort();
18750 /* Return the appropriate ARM instruction for the operation code.
18751 The returned result should not be overwritten. OP is the rtx of the
18752 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18753 was shifted. */
18754 const char *
18755 arithmetic_instr (rtx op, int shift_first_arg)
18757 switch (GET_CODE (op))
18759 case PLUS:
18760 return "add";
18762 case MINUS:
18763 return shift_first_arg ? "rsb" : "sub";
18765 case IOR:
18766 return "orr";
18768 case XOR:
18769 return "eor";
18771 case AND:
18772 return "and";
18774 case ASHIFT:
18775 case ASHIFTRT:
18776 case LSHIFTRT:
18777 case ROTATERT:
18778 return arm_shift_nmem(GET_CODE(op));
18780 default:
18781 gcc_unreachable ();
18785 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18786 for the operation code. The returned result should not be overwritten.
18787 OP is the rtx code of the shift.
18788 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18789 shift. */
18790 static const char *
18791 shift_op (rtx op, HOST_WIDE_INT *amountp)
18793 const char * mnem;
18794 enum rtx_code code = GET_CODE (op);
18796 switch (code)
18798 case ROTATE:
18799 if (!CONST_INT_P (XEXP (op, 1)))
18801 output_operand_lossage ("invalid shift operand");
18802 return NULL;
18805 code = ROTATERT;
18806 *amountp = 32 - INTVAL (XEXP (op, 1));
18807 mnem = "ror";
18808 break;
18810 case ASHIFT:
18811 case ASHIFTRT:
18812 case LSHIFTRT:
18813 case ROTATERT:
18814 mnem = arm_shift_nmem(code);
18815 if (CONST_INT_P (XEXP (op, 1)))
18817 *amountp = INTVAL (XEXP (op, 1));
18819 else if (REG_P (XEXP (op, 1)))
18821 *amountp = -1;
18822 return mnem;
18824 else
18826 output_operand_lossage ("invalid shift operand");
18827 return NULL;
18829 break;
18831 case MULT:
18832 /* We never have to worry about the amount being other than a
18833 power of 2, since this case can never be reloaded from a reg. */
18834 if (!CONST_INT_P (XEXP (op, 1)))
18836 output_operand_lossage ("invalid shift operand");
18837 return NULL;
18840 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
18842 /* Amount must be a power of two. */
18843 if (*amountp & (*amountp - 1))
18845 output_operand_lossage ("invalid shift operand");
18846 return NULL;
18849 *amountp = exact_log2 (*amountp);
18850 gcc_assert (IN_RANGE (*amountp, 0, 31));
18851 return ARM_LSL_NAME;
18853 default:
18854 output_operand_lossage ("invalid shift operand");
18855 return NULL;
18858 /* This is not 100% correct, but follows from the desire to merge
18859 multiplication by a power of 2 with the recognizer for a
18860 shift. >=32 is not a valid shift for "lsl", so we must try and
18861 output a shift that produces the correct arithmetical result.
18862 Using lsr #32 is identical except for the fact that the carry bit
18863 is not set correctly if we set the flags; but we never use the
18864 carry bit from such an operation, so we can ignore that. */
18865 if (code == ROTATERT)
18866 /* Rotate is just modulo 32. */
18867 *amountp &= 31;
18868 else if (*amountp != (*amountp & 31))
18870 if (code == ASHIFT)
18871 mnem = "lsr";
18872 *amountp = 32;
18875 /* Shifts of 0 are no-ops. */
18876 if (*amountp == 0)
18877 return NULL;
18879 return mnem;
18882 /* Output a .ascii pseudo-op, keeping track of lengths. This is
18883 because /bin/as is horribly restrictive. The judgement about
18884 whether or not each character is 'printable' (and can be output as
18885 is) or not (and must be printed with an octal escape) must be made
18886 with reference to the *host* character set -- the situation is
18887 similar to that discussed in the comments above pp_c_char in
18888 c-pretty-print.c. */
18890 #define MAX_ASCII_LEN 51
18892 void
18893 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
18895 int i;
18896 int len_so_far = 0;
18898 fputs ("\t.ascii\t\"", stream);
18900 for (i = 0; i < len; i++)
18902 int c = p[i];
18904 if (len_so_far >= MAX_ASCII_LEN)
18906 fputs ("\"\n\t.ascii\t\"", stream);
18907 len_so_far = 0;
18910 if (ISPRINT (c))
18912 if (c == '\\' || c == '\"')
18914 putc ('\\', stream);
18915 len_so_far++;
18917 putc (c, stream);
18918 len_so_far++;
18920 else
18922 fprintf (stream, "\\%03o", c);
18923 len_so_far += 4;
18927 fputs ("\"\n", stream);
18930 /* Whether a register is callee saved or not. This is necessary because high
18931 registers are marked as caller saved when optimizing for size on Thumb-1
18932 targets despite being callee saved in order to avoid using them. */
18933 #define callee_saved_reg_p(reg) \
18934 (!call_used_regs[reg] \
18935 || (TARGET_THUMB1 && optimize_size \
18936 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
18938 /* Compute the register save mask for registers 0 through 12
18939 inclusive. This code is used by arm_compute_save_reg_mask. */
18941 static unsigned long
18942 arm_compute_save_reg0_reg12_mask (void)
18944 unsigned long func_type = arm_current_func_type ();
18945 unsigned long save_reg_mask = 0;
18946 unsigned int reg;
18948 if (IS_INTERRUPT (func_type))
18950 unsigned int max_reg;
18951 /* Interrupt functions must not corrupt any registers,
18952 even call clobbered ones. If this is a leaf function
18953 we can just examine the registers used by the RTL, but
18954 otherwise we have to assume that whatever function is
18955 called might clobber anything, and so we have to save
18956 all the call-clobbered registers as well. */
18957 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
18958 /* FIQ handlers have registers r8 - r12 banked, so
18959 we only need to check r0 - r7, Normal ISRs only
18960 bank r14 and r15, so we must check up to r12.
18961 r13 is the stack pointer which is always preserved,
18962 so we do not need to consider it here. */
18963 max_reg = 7;
18964 else
18965 max_reg = 12;
18967 for (reg = 0; reg <= max_reg; reg++)
18968 if (df_regs_ever_live_p (reg)
18969 || (! crtl->is_leaf && call_used_regs[reg]))
18970 save_reg_mask |= (1 << reg);
18972 /* Also save the pic base register if necessary. */
18973 if (flag_pic
18974 && !TARGET_SINGLE_PIC_BASE
18975 && arm_pic_register != INVALID_REGNUM
18976 && crtl->uses_pic_offset_table)
18977 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
18979 else if (IS_VOLATILE(func_type))
18981 /* For noreturn functions we historically omitted register saves
18982 altogether. However this really messes up debugging. As a
18983 compromise save just the frame pointers. Combined with the link
18984 register saved elsewhere this should be sufficient to get
18985 a backtrace. */
18986 if (frame_pointer_needed)
18987 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
18988 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
18989 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
18990 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
18991 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
18993 else
18995 /* In the normal case we only need to save those registers
18996 which are call saved and which are used by this function. */
18997 for (reg = 0; reg <= 11; reg++)
18998 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
18999 save_reg_mask |= (1 << reg);
19001 /* Handle the frame pointer as a special case. */
19002 if (frame_pointer_needed)
19003 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19005 /* If we aren't loading the PIC register,
19006 don't stack it even though it may be live. */
19007 if (flag_pic
19008 && !TARGET_SINGLE_PIC_BASE
19009 && arm_pic_register != INVALID_REGNUM
19010 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19011 || crtl->uses_pic_offset_table))
19012 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19014 /* The prologue will copy SP into R0, so save it. */
19015 if (IS_STACKALIGN (func_type))
19016 save_reg_mask |= 1;
19019 /* Save registers so the exception handler can modify them. */
19020 if (crtl->calls_eh_return)
19022 unsigned int i;
19024 for (i = 0; ; i++)
19026 reg = EH_RETURN_DATA_REGNO (i);
19027 if (reg == INVALID_REGNUM)
19028 break;
19029 save_reg_mask |= 1 << reg;
19033 return save_reg_mask;
19036 /* Return true if r3 is live at the start of the function. */
19038 static bool
19039 arm_r3_live_at_start_p (void)
19041 /* Just look at cfg info, which is still close enough to correct at this
19042 point. This gives false positives for broken functions that might use
19043 uninitialized data that happens to be allocated in r3, but who cares? */
19044 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19047 /* Compute the number of bytes used to store the static chain register on the
19048 stack, above the stack frame. We need to know this accurately to get the
19049 alignment of the rest of the stack frame correct. */
19051 static int
19052 arm_compute_static_chain_stack_bytes (void)
19054 /* See the defining assertion in arm_expand_prologue. */
19055 if (IS_NESTED (arm_current_func_type ())
19056 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19057 || (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
19058 && !df_regs_ever_live_p (LR_REGNUM)))
19059 && arm_r3_live_at_start_p ()
19060 && crtl->args.pretend_args_size == 0)
19061 return 4;
19063 return 0;
19066 /* Compute a bit mask of which registers need to be
19067 saved on the stack for the current function.
19068 This is used by arm_get_frame_offsets, which may add extra registers. */
19070 static unsigned long
19071 arm_compute_save_reg_mask (void)
19073 unsigned int save_reg_mask = 0;
19074 unsigned long func_type = arm_current_func_type ();
19075 unsigned int reg;
19077 if (IS_NAKED (func_type))
19078 /* This should never really happen. */
19079 return 0;
19081 /* If we are creating a stack frame, then we must save the frame pointer,
19082 IP (which will hold the old stack pointer), LR and the PC. */
19083 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19084 save_reg_mask |=
19085 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19086 | (1 << IP_REGNUM)
19087 | (1 << LR_REGNUM)
19088 | (1 << PC_REGNUM);
19090 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19092 /* Decide if we need to save the link register.
19093 Interrupt routines have their own banked link register,
19094 so they never need to save it.
19095 Otherwise if we do not use the link register we do not need to save
19096 it. If we are pushing other registers onto the stack however, we
19097 can save an instruction in the epilogue by pushing the link register
19098 now and then popping it back into the PC. This incurs extra memory
19099 accesses though, so we only do it when optimizing for size, and only
19100 if we know that we will not need a fancy return sequence. */
19101 if (df_regs_ever_live_p (LR_REGNUM)
19102 || (save_reg_mask
19103 && optimize_size
19104 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19105 && !crtl->tail_call_emit
19106 && !crtl->calls_eh_return))
19107 save_reg_mask |= 1 << LR_REGNUM;
19109 if (cfun->machine->lr_save_eliminated)
19110 save_reg_mask &= ~ (1 << LR_REGNUM);
19112 if (TARGET_REALLY_IWMMXT
19113 && ((bit_count (save_reg_mask)
19114 + ARM_NUM_INTS (crtl->args.pretend_args_size +
19115 arm_compute_static_chain_stack_bytes())
19116 ) % 2) != 0)
19118 /* The total number of registers that are going to be pushed
19119 onto the stack is odd. We need to ensure that the stack
19120 is 64-bit aligned before we start to save iWMMXt registers,
19121 and also before we start to create locals. (A local variable
19122 might be a double or long long which we will load/store using
19123 an iWMMXt instruction). Therefore we need to push another
19124 ARM register, so that the stack will be 64-bit aligned. We
19125 try to avoid using the arg registers (r0 -r3) as they might be
19126 used to pass values in a tail call. */
19127 for (reg = 4; reg <= 12; reg++)
19128 if ((save_reg_mask & (1 << reg)) == 0)
19129 break;
19131 if (reg <= 12)
19132 save_reg_mask |= (1 << reg);
19133 else
19135 cfun->machine->sibcall_blocked = 1;
19136 save_reg_mask |= (1 << 3);
19140 /* We may need to push an additional register for use initializing the
19141 PIC base register. */
19142 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19143 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19145 reg = thumb_find_work_register (1 << 4);
19146 if (!call_used_regs[reg])
19147 save_reg_mask |= (1 << reg);
19150 return save_reg_mask;
19153 /* Compute a bit mask of which registers need to be
19154 saved on the stack for the current function. */
19155 static unsigned long
19156 thumb1_compute_save_reg_mask (void)
19158 unsigned long mask;
19159 unsigned reg;
19161 mask = 0;
19162 for (reg = 0; reg < 12; reg ++)
19163 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19164 mask |= 1 << reg;
19166 /* Handle the frame pointer as a special case. */
19167 if (frame_pointer_needed)
19168 mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19170 if (flag_pic
19171 && !TARGET_SINGLE_PIC_BASE
19172 && arm_pic_register != INVALID_REGNUM
19173 && crtl->uses_pic_offset_table)
19174 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19176 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19177 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19178 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19180 /* LR will also be pushed if any lo regs are pushed. */
19181 if (mask & 0xff || thumb_force_lr_save ())
19182 mask |= (1 << LR_REGNUM);
19184 /* Make sure we have a low work register if we need one.
19185 We will need one if we are going to push a high register,
19186 but we are not currently intending to push a low register. */
19187 if ((mask & 0xff) == 0
19188 && ((mask & 0x0f00) || TARGET_BACKTRACE))
19190 /* Use thumb_find_work_register to choose which register
19191 we will use. If the register is live then we will
19192 have to push it. Use LAST_LO_REGNUM as our fallback
19193 choice for the register to select. */
19194 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19195 /* Make sure the register returned by thumb_find_work_register is
19196 not part of the return value. */
19197 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19198 reg = LAST_LO_REGNUM;
19200 if (callee_saved_reg_p (reg))
19201 mask |= 1 << reg;
19204 /* The 504 below is 8 bytes less than 512 because there are two possible
19205 alignment words. We can't tell here if they will be present or not so we
19206 have to play it safe and assume that they are. */
19207 if ((CALLER_INTERWORKING_SLOT_SIZE +
19208 ROUND_UP_WORD (get_frame_size ()) +
19209 crtl->outgoing_args_size) >= 504)
19211 /* This is the same as the code in thumb1_expand_prologue() which
19212 determines which register to use for stack decrement. */
19213 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19214 if (mask & (1 << reg))
19215 break;
19217 if (reg > LAST_LO_REGNUM)
19219 /* Make sure we have a register available for stack decrement. */
19220 mask |= 1 << LAST_LO_REGNUM;
19224 return mask;
19228 /* Return the number of bytes required to save VFP registers. */
19229 static int
19230 arm_get_vfp_saved_size (void)
19232 unsigned int regno;
19233 int count;
19234 int saved;
19236 saved = 0;
19237 /* Space for saved VFP registers. */
19238 if (TARGET_HARD_FLOAT)
19240 count = 0;
19241 for (regno = FIRST_VFP_REGNUM;
19242 regno < LAST_VFP_REGNUM;
19243 regno += 2)
19245 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19246 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19248 if (count > 0)
19250 /* Workaround ARM10 VFPr1 bug. */
19251 if (count == 2 && !arm_arch6)
19252 count++;
19253 saved += count * 8;
19255 count = 0;
19257 else
19258 count++;
19260 if (count > 0)
19262 if (count == 2 && !arm_arch6)
19263 count++;
19264 saved += count * 8;
19267 return saved;
19271 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19272 everything bar the final return instruction. If simple_return is true,
19273 then do not output epilogue, because it has already been emitted in RTL. */
19274 const char *
19275 output_return_instruction (rtx operand, bool really_return, bool reverse,
19276 bool simple_return)
19278 char conditional[10];
19279 char instr[100];
19280 unsigned reg;
19281 unsigned long live_regs_mask;
19282 unsigned long func_type;
19283 arm_stack_offsets *offsets;
19285 func_type = arm_current_func_type ();
19287 if (IS_NAKED (func_type))
19288 return "";
19290 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19292 /* If this function was declared non-returning, and we have
19293 found a tail call, then we have to trust that the called
19294 function won't return. */
19295 if (really_return)
19297 rtx ops[2];
19299 /* Otherwise, trap an attempted return by aborting. */
19300 ops[0] = operand;
19301 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19302 : "abort");
19303 assemble_external_libcall (ops[1]);
19304 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19307 return "";
19310 gcc_assert (!cfun->calls_alloca || really_return);
19312 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19314 cfun->machine->return_used_this_function = 1;
19316 offsets = arm_get_frame_offsets ();
19317 live_regs_mask = offsets->saved_regs_mask;
19319 if (!simple_return && live_regs_mask)
19321 const char * return_reg;
19323 /* If we do not have any special requirements for function exit
19324 (e.g. interworking) then we can load the return address
19325 directly into the PC. Otherwise we must load it into LR. */
19326 if (really_return
19327 && !IS_CMSE_ENTRY (func_type)
19328 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19329 return_reg = reg_names[PC_REGNUM];
19330 else
19331 return_reg = reg_names[LR_REGNUM];
19333 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19335 /* There are three possible reasons for the IP register
19336 being saved. 1) a stack frame was created, in which case
19337 IP contains the old stack pointer, or 2) an ISR routine
19338 corrupted it, or 3) it was saved to align the stack on
19339 iWMMXt. In case 1, restore IP into SP, otherwise just
19340 restore IP. */
19341 if (frame_pointer_needed)
19343 live_regs_mask &= ~ (1 << IP_REGNUM);
19344 live_regs_mask |= (1 << SP_REGNUM);
19346 else
19347 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19350 /* On some ARM architectures it is faster to use LDR rather than
19351 LDM to load a single register. On other architectures, the
19352 cost is the same. In 26 bit mode, or for exception handlers,
19353 we have to use LDM to load the PC so that the CPSR is also
19354 restored. */
19355 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19356 if (live_regs_mask == (1U << reg))
19357 break;
19359 if (reg <= LAST_ARM_REGNUM
19360 && (reg != LR_REGNUM
19361 || ! really_return
19362 || ! IS_INTERRUPT (func_type)))
19364 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19365 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19367 else
19369 char *p;
19370 int first = 1;
19372 /* Generate the load multiple instruction to restore the
19373 registers. Note we can get here, even if
19374 frame_pointer_needed is true, but only if sp already
19375 points to the base of the saved core registers. */
19376 if (live_regs_mask & (1 << SP_REGNUM))
19378 unsigned HOST_WIDE_INT stack_adjust;
19380 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19381 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19383 if (stack_adjust && arm_arch5 && TARGET_ARM)
19384 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19385 else
19387 /* If we can't use ldmib (SA110 bug),
19388 then try to pop r3 instead. */
19389 if (stack_adjust)
19390 live_regs_mask |= 1 << 3;
19392 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19395 /* For interrupt returns we have to use an LDM rather than
19396 a POP so that we can use the exception return variant. */
19397 else if (IS_INTERRUPT (func_type))
19398 sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
19399 else
19400 sprintf (instr, "pop%s\t{", conditional);
19402 p = instr + strlen (instr);
19404 for (reg = 0; reg <= SP_REGNUM; reg++)
19405 if (live_regs_mask & (1 << reg))
19407 int l = strlen (reg_names[reg]);
19409 if (first)
19410 first = 0;
19411 else
19413 memcpy (p, ", ", 2);
19414 p += 2;
19417 memcpy (p, "%|", 2);
19418 memcpy (p + 2, reg_names[reg], l);
19419 p += l + 2;
19422 if (live_regs_mask & (1 << LR_REGNUM))
19424 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19425 /* If returning from an interrupt, restore the CPSR. */
19426 if (IS_INTERRUPT (func_type))
19427 strcat (p, "^");
19429 else
19430 strcpy (p, "}");
19433 output_asm_insn (instr, & operand);
19435 /* See if we need to generate an extra instruction to
19436 perform the actual function return. */
19437 if (really_return
19438 && func_type != ARM_FT_INTERWORKED
19439 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19441 /* The return has already been handled
19442 by loading the LR into the PC. */
19443 return "";
19447 if (really_return)
19449 switch ((int) ARM_FUNC_TYPE (func_type))
19451 case ARM_FT_ISR:
19452 case ARM_FT_FIQ:
19453 /* ??? This is wrong for unified assembly syntax. */
19454 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19455 break;
19457 case ARM_FT_INTERWORKED:
19458 gcc_assert (arm_arch5 || arm_arch4t);
19459 sprintf (instr, "bx%s\t%%|lr", conditional);
19460 break;
19462 case ARM_FT_EXCEPTION:
19463 /* ??? This is wrong for unified assembly syntax. */
19464 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19465 break;
19467 default:
19468 if (IS_CMSE_ENTRY (func_type))
19470 /* Check if we have to clear the 'GE bits' which is only used if
19471 parallel add and subtraction instructions are available. */
19472 if (TARGET_INT_SIMD)
19473 snprintf (instr, sizeof (instr),
19474 "msr%s\tAPSR_nzcvqg, %%|lr", conditional);
19475 else
19476 snprintf (instr, sizeof (instr),
19477 "msr%s\tAPSR_nzcvq, %%|lr", conditional);
19479 output_asm_insn (instr, & operand);
19480 if (TARGET_HARD_FLOAT && !TARGET_THUMB1)
19482 /* Clear the cumulative exception-status bits (0-4,7) and the
19483 condition code bits (28-31) of the FPSCR. We need to
19484 remember to clear the first scratch register used (IP) and
19485 save and restore the second (r4). */
19486 snprintf (instr, sizeof (instr), "push\t{%%|r4}");
19487 output_asm_insn (instr, & operand);
19488 snprintf (instr, sizeof (instr), "vmrs\t%%|ip, fpscr");
19489 output_asm_insn (instr, & operand);
19490 snprintf (instr, sizeof (instr), "movw\t%%|r4, #65376");
19491 output_asm_insn (instr, & operand);
19492 snprintf (instr, sizeof (instr), "movt\t%%|r4, #4095");
19493 output_asm_insn (instr, & operand);
19494 snprintf (instr, sizeof (instr), "and\t%%|ip, %%|r4");
19495 output_asm_insn (instr, & operand);
19496 snprintf (instr, sizeof (instr), "vmsr\tfpscr, %%|ip");
19497 output_asm_insn (instr, & operand);
19498 snprintf (instr, sizeof (instr), "pop\t{%%|r4}");
19499 output_asm_insn (instr, & operand);
19500 snprintf (instr, sizeof (instr), "mov\t%%|ip, %%|lr");
19501 output_asm_insn (instr, & operand);
19503 snprintf (instr, sizeof (instr), "bxns\t%%|lr");
19505 /* Use bx if it's available. */
19506 else if (arm_arch5 || arm_arch4t)
19507 sprintf (instr, "bx%s\t%%|lr", conditional);
19508 else
19509 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19510 break;
19513 output_asm_insn (instr, & operand);
19516 return "";
19519 /* Output in FILE asm statements needed to declare the NAME of the function
19520 defined by its DECL node. */
19522 void
19523 arm_asm_declare_function_name (FILE *file, const char *name, tree decl)
19525 size_t cmse_name_len;
19526 char *cmse_name = 0;
19527 char cmse_prefix[] = "__acle_se_";
19529 /* When compiling with ARMv8-M Security Extensions enabled, we should print an
19530 extra function label for each function with the 'cmse_nonsecure_entry'
19531 attribute. This extra function label should be prepended with
19532 '__acle_se_', telling the linker that it needs to create secure gateway
19533 veneers for this function. */
19534 if (use_cmse && lookup_attribute ("cmse_nonsecure_entry",
19535 DECL_ATTRIBUTES (decl)))
19537 cmse_name_len = sizeof (cmse_prefix) + strlen (name);
19538 cmse_name = XALLOCAVEC (char, cmse_name_len);
19539 snprintf (cmse_name, cmse_name_len, "%s%s", cmse_prefix, name);
19540 targetm.asm_out.globalize_label (file, cmse_name);
19542 ARM_DECLARE_FUNCTION_NAME (file, cmse_name, decl);
19543 ASM_OUTPUT_TYPE_DIRECTIVE (file, cmse_name, "function");
19546 ARM_DECLARE_FUNCTION_NAME (file, name, decl);
19547 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
19548 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
19549 ASM_OUTPUT_LABEL (file, name);
19551 if (cmse_name)
19552 ASM_OUTPUT_LABEL (file, cmse_name);
19554 ARM_OUTPUT_FN_UNWIND (file, TRUE);
19557 /* Write the function name into the code section, directly preceding
19558 the function prologue.
19560 Code will be output similar to this:
19562 .ascii "arm_poke_function_name", 0
19563 .align
19565 .word 0xff000000 + (t1 - t0)
19566 arm_poke_function_name
19567 mov ip, sp
19568 stmfd sp!, {fp, ip, lr, pc}
19569 sub fp, ip, #4
19571 When performing a stack backtrace, code can inspect the value
19572 of 'pc' stored at 'fp' + 0. If the trace function then looks
19573 at location pc - 12 and the top 8 bits are set, then we know
19574 that there is a function name embedded immediately preceding this
19575 location and has length ((pc[-3]) & 0xff000000).
19577 We assume that pc is declared as a pointer to an unsigned long.
19579 It is of no benefit to output the function name if we are assembling
19580 a leaf function. These function types will not contain a stack
19581 backtrace structure, therefore it is not possible to determine the
19582 function name. */
19583 void
19584 arm_poke_function_name (FILE *stream, const char *name)
19586 unsigned long alignlength;
19587 unsigned long length;
19588 rtx x;
19590 length = strlen (name) + 1;
19591 alignlength = ROUND_UP_WORD (length);
19593 ASM_OUTPUT_ASCII (stream, name, length);
19594 ASM_OUTPUT_ALIGN (stream, 2);
19595 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19596 assemble_aligned_integer (UNITS_PER_WORD, x);
19599 /* Place some comments into the assembler stream
19600 describing the current function. */
19601 static void
19602 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
19604 unsigned long func_type;
19606 /* Sanity check. */
19607 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19609 func_type = arm_current_func_type ();
19611 switch ((int) ARM_FUNC_TYPE (func_type))
19613 default:
19614 case ARM_FT_NORMAL:
19615 break;
19616 case ARM_FT_INTERWORKED:
19617 asm_fprintf (f, "\t%@ Function supports interworking.\n");
19618 break;
19619 case ARM_FT_ISR:
19620 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19621 break;
19622 case ARM_FT_FIQ:
19623 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19624 break;
19625 case ARM_FT_EXCEPTION:
19626 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19627 break;
19630 if (IS_NAKED (func_type))
19631 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19633 if (IS_VOLATILE (func_type))
19634 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19636 if (IS_NESTED (func_type))
19637 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19638 if (IS_STACKALIGN (func_type))
19639 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19640 if (IS_CMSE_ENTRY (func_type))
19641 asm_fprintf (f, "\t%@ Non-secure entry function: called from non-secure code.\n");
19643 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19644 crtl->args.size,
19645 crtl->args.pretend_args_size, frame_size);
19647 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19648 frame_pointer_needed,
19649 cfun->machine->uses_anonymous_args);
19651 if (cfun->machine->lr_save_eliminated)
19652 asm_fprintf (f, "\t%@ link register save eliminated.\n");
19654 if (crtl->calls_eh_return)
19655 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19659 static void
19660 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
19661 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
19663 arm_stack_offsets *offsets;
19665 if (TARGET_THUMB1)
19667 int regno;
19669 /* Emit any call-via-reg trampolines that are needed for v4t support
19670 of call_reg and call_value_reg type insns. */
19671 for (regno = 0; regno < LR_REGNUM; regno++)
19673 rtx label = cfun->machine->call_via[regno];
19675 if (label != NULL)
19677 switch_to_section (function_section (current_function_decl));
19678 targetm.asm_out.internal_label (asm_out_file, "L",
19679 CODE_LABEL_NUMBER (label));
19680 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19684 /* ??? Probably not safe to set this here, since it assumes that a
19685 function will be emitted as assembly immediately after we generate
19686 RTL for it. This does not happen for inline functions. */
19687 cfun->machine->return_used_this_function = 0;
19689 else /* TARGET_32BIT */
19691 /* We need to take into account any stack-frame rounding. */
19692 offsets = arm_get_frame_offsets ();
19694 gcc_assert (!use_return_insn (FALSE, NULL)
19695 || (cfun->machine->return_used_this_function != 0)
19696 || offsets->saved_regs == offsets->outgoing_args
19697 || frame_pointer_needed);
19701 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19702 STR and STRD. If an even number of registers are being pushed, one
19703 or more STRD patterns are created for each register pair. If an
19704 odd number of registers are pushed, emit an initial STR followed by
19705 as many STRD instructions as are needed. This works best when the
19706 stack is initially 64-bit aligned (the normal case), since it
19707 ensures that each STRD is also 64-bit aligned. */
19708 static void
19709 thumb2_emit_strd_push (unsigned long saved_regs_mask)
19711 int num_regs = 0;
19712 int i;
19713 int regno;
19714 rtx par = NULL_RTX;
19715 rtx dwarf = NULL_RTX;
19716 rtx tmp;
19717 bool first = true;
19719 num_regs = bit_count (saved_regs_mask);
19721 /* Must be at least one register to save, and can't save SP or PC. */
19722 gcc_assert (num_regs > 0 && num_regs <= 14);
19723 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19724 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19726 /* Create sequence for DWARF info. All the frame-related data for
19727 debugging is held in this wrapper. */
19728 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19730 /* Describe the stack adjustment. */
19731 tmp = gen_rtx_SET (stack_pointer_rtx,
19732 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19733 RTX_FRAME_RELATED_P (tmp) = 1;
19734 XVECEXP (dwarf, 0, 0) = tmp;
19736 /* Find the first register. */
19737 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19740 i = 0;
19742 /* If there's an odd number of registers to push. Start off by
19743 pushing a single register. This ensures that subsequent strd
19744 operations are dword aligned (assuming that SP was originally
19745 64-bit aligned). */
19746 if ((num_regs & 1) != 0)
19748 rtx reg, mem, insn;
19750 reg = gen_rtx_REG (SImode, regno);
19751 if (num_regs == 1)
19752 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19753 stack_pointer_rtx));
19754 else
19755 mem = gen_frame_mem (Pmode,
19756 gen_rtx_PRE_MODIFY
19757 (Pmode, stack_pointer_rtx,
19758 plus_constant (Pmode, stack_pointer_rtx,
19759 -4 * num_regs)));
19761 tmp = gen_rtx_SET (mem, reg);
19762 RTX_FRAME_RELATED_P (tmp) = 1;
19763 insn = emit_insn (tmp);
19764 RTX_FRAME_RELATED_P (insn) = 1;
19765 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19766 tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
19767 RTX_FRAME_RELATED_P (tmp) = 1;
19768 i++;
19769 regno++;
19770 XVECEXP (dwarf, 0, i) = tmp;
19771 first = false;
19774 while (i < num_regs)
19775 if (saved_regs_mask & (1 << regno))
19777 rtx reg1, reg2, mem1, mem2;
19778 rtx tmp0, tmp1, tmp2;
19779 int regno2;
19781 /* Find the register to pair with this one. */
19782 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
19783 regno2++)
19786 reg1 = gen_rtx_REG (SImode, regno);
19787 reg2 = gen_rtx_REG (SImode, regno2);
19789 if (first)
19791 rtx insn;
19793 first = false;
19794 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19795 stack_pointer_rtx,
19796 -4 * num_regs));
19797 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19798 stack_pointer_rtx,
19799 -4 * (num_regs - 1)));
19800 tmp0 = gen_rtx_SET (stack_pointer_rtx,
19801 plus_constant (Pmode, stack_pointer_rtx,
19802 -4 * (num_regs)));
19803 tmp1 = gen_rtx_SET (mem1, reg1);
19804 tmp2 = gen_rtx_SET (mem2, reg2);
19805 RTX_FRAME_RELATED_P (tmp0) = 1;
19806 RTX_FRAME_RELATED_P (tmp1) = 1;
19807 RTX_FRAME_RELATED_P (tmp2) = 1;
19808 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
19809 XVECEXP (par, 0, 0) = tmp0;
19810 XVECEXP (par, 0, 1) = tmp1;
19811 XVECEXP (par, 0, 2) = tmp2;
19812 insn = emit_insn (par);
19813 RTX_FRAME_RELATED_P (insn) = 1;
19814 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19816 else
19818 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19819 stack_pointer_rtx,
19820 4 * i));
19821 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19822 stack_pointer_rtx,
19823 4 * (i + 1)));
19824 tmp1 = gen_rtx_SET (mem1, reg1);
19825 tmp2 = gen_rtx_SET (mem2, reg2);
19826 RTX_FRAME_RELATED_P (tmp1) = 1;
19827 RTX_FRAME_RELATED_P (tmp2) = 1;
19828 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19829 XVECEXP (par, 0, 0) = tmp1;
19830 XVECEXP (par, 0, 1) = tmp2;
19831 emit_insn (par);
19834 /* Create unwind information. This is an approximation. */
19835 tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
19836 plus_constant (Pmode,
19837 stack_pointer_rtx,
19838 4 * i)),
19839 reg1);
19840 tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
19841 plus_constant (Pmode,
19842 stack_pointer_rtx,
19843 4 * (i + 1))),
19844 reg2);
19846 RTX_FRAME_RELATED_P (tmp1) = 1;
19847 RTX_FRAME_RELATED_P (tmp2) = 1;
19848 XVECEXP (dwarf, 0, i + 1) = tmp1;
19849 XVECEXP (dwarf, 0, i + 2) = tmp2;
19850 i += 2;
19851 regno = regno2 + 1;
19853 else
19854 regno++;
19856 return;
19859 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19860 whenever possible, otherwise it emits single-word stores. The first store
19861 also allocates stack space for all saved registers, using writeback with
19862 post-addressing mode. All other stores use offset addressing. If no STRD
19863 can be emitted, this function emits a sequence of single-word stores,
19864 and not an STM as before, because single-word stores provide more freedom
19865 scheduling and can be turned into an STM by peephole optimizations. */
19866 static void
19867 arm_emit_strd_push (unsigned long saved_regs_mask)
19869 int num_regs = 0;
19870 int i, j, dwarf_index = 0;
19871 int offset = 0;
19872 rtx dwarf = NULL_RTX;
19873 rtx insn = NULL_RTX;
19874 rtx tmp, mem;
19876 /* TODO: A more efficient code can be emitted by changing the
19877 layout, e.g., first push all pairs that can use STRD to keep the
19878 stack aligned, and then push all other registers. */
19879 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19880 if (saved_regs_mask & (1 << i))
19881 num_regs++;
19883 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19884 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19885 gcc_assert (num_regs > 0);
19887 /* Create sequence for DWARF info. */
19888 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19890 /* For dwarf info, we generate explicit stack update. */
19891 tmp = gen_rtx_SET (stack_pointer_rtx,
19892 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19893 RTX_FRAME_RELATED_P (tmp) = 1;
19894 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19896 /* Save registers. */
19897 offset = - 4 * num_regs;
19898 j = 0;
19899 while (j <= LAST_ARM_REGNUM)
19900 if (saved_regs_mask & (1 << j))
19902 if ((j % 2 == 0)
19903 && (saved_regs_mask & (1 << (j + 1))))
19905 /* Current register and previous register form register pair for
19906 which STRD can be generated. */
19907 if (offset < 0)
19909 /* Allocate stack space for all saved registers. */
19910 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19911 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19912 mem = gen_frame_mem (DImode, tmp);
19913 offset = 0;
19915 else if (offset > 0)
19916 mem = gen_frame_mem (DImode,
19917 plus_constant (Pmode,
19918 stack_pointer_rtx,
19919 offset));
19920 else
19921 mem = gen_frame_mem (DImode, stack_pointer_rtx);
19923 tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
19924 RTX_FRAME_RELATED_P (tmp) = 1;
19925 tmp = emit_insn (tmp);
19927 /* Record the first store insn. */
19928 if (dwarf_index == 1)
19929 insn = tmp;
19931 /* Generate dwarf info. */
19932 mem = gen_frame_mem (SImode,
19933 plus_constant (Pmode,
19934 stack_pointer_rtx,
19935 offset));
19936 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
19937 RTX_FRAME_RELATED_P (tmp) = 1;
19938 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19940 mem = gen_frame_mem (SImode,
19941 plus_constant (Pmode,
19942 stack_pointer_rtx,
19943 offset + 4));
19944 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
19945 RTX_FRAME_RELATED_P (tmp) = 1;
19946 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19948 offset += 8;
19949 j += 2;
19951 else
19953 /* Emit a single word store. */
19954 if (offset < 0)
19956 /* Allocate stack space for all saved registers. */
19957 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19958 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19959 mem = gen_frame_mem (SImode, tmp);
19960 offset = 0;
19962 else if (offset > 0)
19963 mem = gen_frame_mem (SImode,
19964 plus_constant (Pmode,
19965 stack_pointer_rtx,
19966 offset));
19967 else
19968 mem = gen_frame_mem (SImode, stack_pointer_rtx);
19970 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
19971 RTX_FRAME_RELATED_P (tmp) = 1;
19972 tmp = emit_insn (tmp);
19974 /* Record the first store insn. */
19975 if (dwarf_index == 1)
19976 insn = tmp;
19978 /* Generate dwarf info. */
19979 mem = gen_frame_mem (SImode,
19980 plus_constant(Pmode,
19981 stack_pointer_rtx,
19982 offset));
19983 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
19984 RTX_FRAME_RELATED_P (tmp) = 1;
19985 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19987 offset += 4;
19988 j += 1;
19991 else
19992 j++;
19994 /* Attach dwarf info to the first insn we generate. */
19995 gcc_assert (insn != NULL_RTX);
19996 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19997 RTX_FRAME_RELATED_P (insn) = 1;
20000 /* Generate and emit an insn that we will recognize as a push_multi.
20001 Unfortunately, since this insn does not reflect very well the actual
20002 semantics of the operation, we need to annotate the insn for the benefit
20003 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
20004 MASK for registers that should be annotated for DWARF2 frame unwind
20005 information. */
20006 static rtx
20007 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
20009 int num_regs = 0;
20010 int num_dwarf_regs = 0;
20011 int i, j;
20012 rtx par;
20013 rtx dwarf;
20014 int dwarf_par_index;
20015 rtx tmp, reg;
20017 /* We don't record the PC in the dwarf frame information. */
20018 dwarf_regs_mask &= ~(1 << PC_REGNUM);
20020 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20022 if (mask & (1 << i))
20023 num_regs++;
20024 if (dwarf_regs_mask & (1 << i))
20025 num_dwarf_regs++;
20028 gcc_assert (num_regs && num_regs <= 16);
20029 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
20031 /* For the body of the insn we are going to generate an UNSPEC in
20032 parallel with several USEs. This allows the insn to be recognized
20033 by the push_multi pattern in the arm.md file.
20035 The body of the insn looks something like this:
20037 (parallel [
20038 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20039 (const_int:SI <num>)))
20040 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20041 (use (reg:SI XX))
20042 (use (reg:SI YY))
20046 For the frame note however, we try to be more explicit and actually
20047 show each register being stored into the stack frame, plus a (single)
20048 decrement of the stack pointer. We do it this way in order to be
20049 friendly to the stack unwinding code, which only wants to see a single
20050 stack decrement per instruction. The RTL we generate for the note looks
20051 something like this:
20053 (sequence [
20054 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20055 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20056 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20057 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20061 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20062 instead we'd have a parallel expression detailing all
20063 the stores to the various memory addresses so that debug
20064 information is more up-to-date. Remember however while writing
20065 this to take care of the constraints with the push instruction.
20067 Note also that this has to be taken care of for the VFP registers.
20069 For more see PR43399. */
20071 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20072 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20073 dwarf_par_index = 1;
20075 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20077 if (mask & (1 << i))
20079 reg = gen_rtx_REG (SImode, i);
20081 XVECEXP (par, 0, 0)
20082 = gen_rtx_SET (gen_frame_mem
20083 (BLKmode,
20084 gen_rtx_PRE_MODIFY (Pmode,
20085 stack_pointer_rtx,
20086 plus_constant
20087 (Pmode, stack_pointer_rtx,
20088 -4 * num_regs))
20090 gen_rtx_UNSPEC (BLKmode,
20091 gen_rtvec (1, reg),
20092 UNSPEC_PUSH_MULT));
20094 if (dwarf_regs_mask & (1 << i))
20096 tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
20097 reg);
20098 RTX_FRAME_RELATED_P (tmp) = 1;
20099 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20102 break;
20106 for (j = 1, i++; j < num_regs; i++)
20108 if (mask & (1 << i))
20110 reg = gen_rtx_REG (SImode, i);
20112 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20114 if (dwarf_regs_mask & (1 << i))
20117 = gen_rtx_SET (gen_frame_mem
20118 (SImode,
20119 plus_constant (Pmode, stack_pointer_rtx,
20120 4 * j)),
20121 reg);
20122 RTX_FRAME_RELATED_P (tmp) = 1;
20123 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20126 j++;
20130 par = emit_insn (par);
20132 tmp = gen_rtx_SET (stack_pointer_rtx,
20133 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20134 RTX_FRAME_RELATED_P (tmp) = 1;
20135 XVECEXP (dwarf, 0, 0) = tmp;
20137 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20139 return par;
20142 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20143 SIZE is the offset to be adjusted.
20144 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20145 static void
20146 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20148 rtx dwarf;
20150 RTX_FRAME_RELATED_P (insn) = 1;
20151 dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
20152 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20155 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20156 SAVED_REGS_MASK shows which registers need to be restored.
20158 Unfortunately, since this insn does not reflect very well the actual
20159 semantics of the operation, we need to annotate the insn for the benefit
20160 of DWARF2 frame unwind information. */
20161 static void
20162 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20164 int num_regs = 0;
20165 int i, j;
20166 rtx par;
20167 rtx dwarf = NULL_RTX;
20168 rtx tmp, reg;
20169 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20170 int offset_adj;
20171 int emit_update;
20173 offset_adj = return_in_pc ? 1 : 0;
20174 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20175 if (saved_regs_mask & (1 << i))
20176 num_regs++;
20178 gcc_assert (num_regs && num_regs <= 16);
20180 /* If SP is in reglist, then we don't emit SP update insn. */
20181 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20183 /* The parallel needs to hold num_regs SETs
20184 and one SET for the stack update. */
20185 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20187 if (return_in_pc)
20188 XVECEXP (par, 0, 0) = ret_rtx;
20190 if (emit_update)
20192 /* Increment the stack pointer, based on there being
20193 num_regs 4-byte registers to restore. */
20194 tmp = gen_rtx_SET (stack_pointer_rtx,
20195 plus_constant (Pmode,
20196 stack_pointer_rtx,
20197 4 * num_regs));
20198 RTX_FRAME_RELATED_P (tmp) = 1;
20199 XVECEXP (par, 0, offset_adj) = tmp;
20202 /* Now restore every reg, which may include PC. */
20203 for (j = 0, i = 0; j < num_regs; i++)
20204 if (saved_regs_mask & (1 << i))
20206 reg = gen_rtx_REG (SImode, i);
20207 if ((num_regs == 1) && emit_update && !return_in_pc)
20209 /* Emit single load with writeback. */
20210 tmp = gen_frame_mem (SImode,
20211 gen_rtx_POST_INC (Pmode,
20212 stack_pointer_rtx));
20213 tmp = emit_insn (gen_rtx_SET (reg, tmp));
20214 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20215 return;
20218 tmp = gen_rtx_SET (reg,
20219 gen_frame_mem
20220 (SImode,
20221 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20222 RTX_FRAME_RELATED_P (tmp) = 1;
20223 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20225 /* We need to maintain a sequence for DWARF info too. As dwarf info
20226 should not have PC, skip PC. */
20227 if (i != PC_REGNUM)
20228 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20230 j++;
20233 if (return_in_pc)
20234 par = emit_jump_insn (par);
20235 else
20236 par = emit_insn (par);
20238 REG_NOTES (par) = dwarf;
20239 if (!return_in_pc)
20240 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20241 stack_pointer_rtx, stack_pointer_rtx);
20244 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20245 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20247 Unfortunately, since this insn does not reflect very well the actual
20248 semantics of the operation, we need to annotate the insn for the benefit
20249 of DWARF2 frame unwind information. */
20250 static void
20251 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20253 int i, j;
20254 rtx par;
20255 rtx dwarf = NULL_RTX;
20256 rtx tmp, reg;
20258 gcc_assert (num_regs && num_regs <= 32);
20260 /* Workaround ARM10 VFPr1 bug. */
20261 if (num_regs == 2 && !arm_arch6)
20263 if (first_reg == 15)
20264 first_reg--;
20266 num_regs++;
20269 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20270 there could be up to 32 D-registers to restore.
20271 If there are more than 16 D-registers, make two recursive calls,
20272 each of which emits one pop_multi instruction. */
20273 if (num_regs > 16)
20275 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20276 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20277 return;
20280 /* The parallel needs to hold num_regs SETs
20281 and one SET for the stack update. */
20282 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20284 /* Increment the stack pointer, based on there being
20285 num_regs 8-byte registers to restore. */
20286 tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
20287 RTX_FRAME_RELATED_P (tmp) = 1;
20288 XVECEXP (par, 0, 0) = tmp;
20290 /* Now show every reg that will be restored, using a SET for each. */
20291 for (j = 0, i=first_reg; j < num_regs; i += 2)
20293 reg = gen_rtx_REG (DFmode, i);
20295 tmp = gen_rtx_SET (reg,
20296 gen_frame_mem
20297 (DFmode,
20298 plus_constant (Pmode, base_reg, 8 * j)));
20299 RTX_FRAME_RELATED_P (tmp) = 1;
20300 XVECEXP (par, 0, j + 1) = tmp;
20302 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20304 j++;
20307 par = emit_insn (par);
20308 REG_NOTES (par) = dwarf;
20310 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20311 if (REGNO (base_reg) == IP_REGNUM)
20313 RTX_FRAME_RELATED_P (par) = 1;
20314 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20316 else
20317 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20318 base_reg, base_reg);
20321 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20322 number of registers are being popped, multiple LDRD patterns are created for
20323 all register pairs. If odd number of registers are popped, last register is
20324 loaded by using LDR pattern. */
20325 static void
20326 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20328 int num_regs = 0;
20329 int i, j;
20330 rtx par = NULL_RTX;
20331 rtx dwarf = NULL_RTX;
20332 rtx tmp, reg, tmp1;
20333 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20335 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20336 if (saved_regs_mask & (1 << i))
20337 num_regs++;
20339 gcc_assert (num_regs && num_regs <= 16);
20341 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20342 to be popped. So, if num_regs is even, now it will become odd,
20343 and we can generate pop with PC. If num_regs is odd, it will be
20344 even now, and ldr with return can be generated for PC. */
20345 if (return_in_pc)
20346 num_regs--;
20348 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20350 /* Var j iterates over all the registers to gather all the registers in
20351 saved_regs_mask. Var i gives index of saved registers in stack frame.
20352 A PARALLEL RTX of register-pair is created here, so that pattern for
20353 LDRD can be matched. As PC is always last register to be popped, and
20354 we have already decremented num_regs if PC, we don't have to worry
20355 about PC in this loop. */
20356 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20357 if (saved_regs_mask & (1 << j))
20359 /* Create RTX for memory load. */
20360 reg = gen_rtx_REG (SImode, j);
20361 tmp = gen_rtx_SET (reg,
20362 gen_frame_mem (SImode,
20363 plus_constant (Pmode,
20364 stack_pointer_rtx, 4 * i)));
20365 RTX_FRAME_RELATED_P (tmp) = 1;
20367 if (i % 2 == 0)
20369 /* When saved-register index (i) is even, the RTX to be emitted is
20370 yet to be created. Hence create it first. The LDRD pattern we
20371 are generating is :
20372 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20373 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20374 where target registers need not be consecutive. */
20375 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20376 dwarf = NULL_RTX;
20379 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20380 added as 0th element and if i is odd, reg_i is added as 1st element
20381 of LDRD pattern shown above. */
20382 XVECEXP (par, 0, (i % 2)) = tmp;
20383 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20385 if ((i % 2) == 1)
20387 /* When saved-register index (i) is odd, RTXs for both the registers
20388 to be loaded are generated in above given LDRD pattern, and the
20389 pattern can be emitted now. */
20390 par = emit_insn (par);
20391 REG_NOTES (par) = dwarf;
20392 RTX_FRAME_RELATED_P (par) = 1;
20395 i++;
20398 /* If the number of registers pushed is odd AND return_in_pc is false OR
20399 number of registers are even AND return_in_pc is true, last register is
20400 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20401 then LDR with post increment. */
20403 /* Increment the stack pointer, based on there being
20404 num_regs 4-byte registers to restore. */
20405 tmp = gen_rtx_SET (stack_pointer_rtx,
20406 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20407 RTX_FRAME_RELATED_P (tmp) = 1;
20408 tmp = emit_insn (tmp);
20409 if (!return_in_pc)
20411 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20412 stack_pointer_rtx, stack_pointer_rtx);
20415 dwarf = NULL_RTX;
20417 if (((num_regs % 2) == 1 && !return_in_pc)
20418 || ((num_regs % 2) == 0 && return_in_pc))
20420 /* Scan for the single register to be popped. Skip until the saved
20421 register is found. */
20422 for (; (saved_regs_mask & (1 << j)) == 0; j++);
20424 /* Gen LDR with post increment here. */
20425 tmp1 = gen_rtx_MEM (SImode,
20426 gen_rtx_POST_INC (SImode,
20427 stack_pointer_rtx));
20428 set_mem_alias_set (tmp1, get_frame_alias_set ());
20430 reg = gen_rtx_REG (SImode, j);
20431 tmp = gen_rtx_SET (reg, tmp1);
20432 RTX_FRAME_RELATED_P (tmp) = 1;
20433 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20435 if (return_in_pc)
20437 /* If return_in_pc, j must be PC_REGNUM. */
20438 gcc_assert (j == PC_REGNUM);
20439 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20440 XVECEXP (par, 0, 0) = ret_rtx;
20441 XVECEXP (par, 0, 1) = tmp;
20442 par = emit_jump_insn (par);
20444 else
20446 par = emit_insn (tmp);
20447 REG_NOTES (par) = dwarf;
20448 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20449 stack_pointer_rtx, stack_pointer_rtx);
20453 else if ((num_regs % 2) == 1 && return_in_pc)
20455 /* There are 2 registers to be popped. So, generate the pattern
20456 pop_multiple_with_stack_update_and_return to pop in PC. */
20457 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20460 return;
20463 /* LDRD in ARM mode needs consecutive registers as operands. This function
20464 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20465 offset addressing and then generates one separate stack udpate. This provides
20466 more scheduling freedom, compared to writeback on every load. However,
20467 if the function returns using load into PC directly
20468 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20469 before the last load. TODO: Add a peephole optimization to recognize
20470 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20471 peephole optimization to merge the load at stack-offset zero
20472 with the stack update instruction using load with writeback
20473 in post-index addressing mode. */
20474 static void
20475 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20477 int j = 0;
20478 int offset = 0;
20479 rtx par = NULL_RTX;
20480 rtx dwarf = NULL_RTX;
20481 rtx tmp, mem;
20483 /* Restore saved registers. */
20484 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20485 j = 0;
20486 while (j <= LAST_ARM_REGNUM)
20487 if (saved_regs_mask & (1 << j))
20489 if ((j % 2) == 0
20490 && (saved_regs_mask & (1 << (j + 1)))
20491 && (j + 1) != PC_REGNUM)
20493 /* Current register and next register form register pair for which
20494 LDRD can be generated. PC is always the last register popped, and
20495 we handle it separately. */
20496 if (offset > 0)
20497 mem = gen_frame_mem (DImode,
20498 plus_constant (Pmode,
20499 stack_pointer_rtx,
20500 offset));
20501 else
20502 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20504 tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
20505 tmp = emit_insn (tmp);
20506 RTX_FRAME_RELATED_P (tmp) = 1;
20508 /* Generate dwarf info. */
20510 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20511 gen_rtx_REG (SImode, j),
20512 NULL_RTX);
20513 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20514 gen_rtx_REG (SImode, j + 1),
20515 dwarf);
20517 REG_NOTES (tmp) = dwarf;
20519 offset += 8;
20520 j += 2;
20522 else if (j != PC_REGNUM)
20524 /* Emit a single word load. */
20525 if (offset > 0)
20526 mem = gen_frame_mem (SImode,
20527 plus_constant (Pmode,
20528 stack_pointer_rtx,
20529 offset));
20530 else
20531 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20533 tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
20534 tmp = emit_insn (tmp);
20535 RTX_FRAME_RELATED_P (tmp) = 1;
20537 /* Generate dwarf info. */
20538 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20539 gen_rtx_REG (SImode, j),
20540 NULL_RTX);
20542 offset += 4;
20543 j += 1;
20545 else /* j == PC_REGNUM */
20546 j++;
20548 else
20549 j++;
20551 /* Update the stack. */
20552 if (offset > 0)
20554 tmp = gen_rtx_SET (stack_pointer_rtx,
20555 plus_constant (Pmode,
20556 stack_pointer_rtx,
20557 offset));
20558 tmp = emit_insn (tmp);
20559 arm_add_cfa_adjust_cfa_note (tmp, offset,
20560 stack_pointer_rtx, stack_pointer_rtx);
20561 offset = 0;
20564 if (saved_regs_mask & (1 << PC_REGNUM))
20566 /* Only PC is to be popped. */
20567 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20568 XVECEXP (par, 0, 0) = ret_rtx;
20569 tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
20570 gen_frame_mem (SImode,
20571 gen_rtx_POST_INC (SImode,
20572 stack_pointer_rtx)));
20573 RTX_FRAME_RELATED_P (tmp) = 1;
20574 XVECEXP (par, 0, 1) = tmp;
20575 par = emit_jump_insn (par);
20577 /* Generate dwarf info. */
20578 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20579 gen_rtx_REG (SImode, PC_REGNUM),
20580 NULL_RTX);
20581 REG_NOTES (par) = dwarf;
20582 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20583 stack_pointer_rtx, stack_pointer_rtx);
20587 /* Calculate the size of the return value that is passed in registers. */
20588 static unsigned
20589 arm_size_return_regs (void)
20591 machine_mode mode;
20593 if (crtl->return_rtx != 0)
20594 mode = GET_MODE (crtl->return_rtx);
20595 else
20596 mode = DECL_MODE (DECL_RESULT (current_function_decl));
20598 return GET_MODE_SIZE (mode);
20601 /* Return true if the current function needs to save/restore LR. */
20602 static bool
20603 thumb_force_lr_save (void)
20605 return !cfun->machine->lr_save_eliminated
20606 && (!crtl->is_leaf
20607 || thumb_far_jump_used_p ()
20608 || df_regs_ever_live_p (LR_REGNUM));
20611 /* We do not know if r3 will be available because
20612 we do have an indirect tailcall happening in this
20613 particular case. */
20614 static bool
20615 is_indirect_tailcall_p (rtx call)
20617 rtx pat = PATTERN (call);
20619 /* Indirect tail call. */
20620 pat = XVECEXP (pat, 0, 0);
20621 if (GET_CODE (pat) == SET)
20622 pat = SET_SRC (pat);
20624 pat = XEXP (XEXP (pat, 0), 0);
20625 return REG_P (pat);
20628 /* Return true if r3 is used by any of the tail call insns in the
20629 current function. */
20630 static bool
20631 any_sibcall_could_use_r3 (void)
20633 edge_iterator ei;
20634 edge e;
20636 if (!crtl->tail_call_emit)
20637 return false;
20638 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20639 if (e->flags & EDGE_SIBCALL)
20641 rtx_insn *call = BB_END (e->src);
20642 if (!CALL_P (call))
20643 call = prev_nonnote_nondebug_insn (call);
20644 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20645 if (find_regno_fusage (call, USE, 3)
20646 || is_indirect_tailcall_p (call))
20647 return true;
20649 return false;
20653 /* Compute the distance from register FROM to register TO.
20654 These can be the arg pointer (26), the soft frame pointer (25),
20655 the stack pointer (13) or the hard frame pointer (11).
20656 In thumb mode r7 is used as the soft frame pointer, if needed.
20657 Typical stack layout looks like this:
20659 old stack pointer -> | |
20660 ----
20661 | | \
20662 | | saved arguments for
20663 | | vararg functions
20664 | | /
20666 hard FP & arg pointer -> | | \
20667 | | stack
20668 | | frame
20669 | | /
20671 | | \
20672 | | call saved
20673 | | registers
20674 soft frame pointer -> | | /
20676 | | \
20677 | | local
20678 | | variables
20679 locals base pointer -> | | /
20681 | | \
20682 | | outgoing
20683 | | arguments
20684 current stack pointer -> | | /
20687 For a given function some or all of these stack components
20688 may not be needed, giving rise to the possibility of
20689 eliminating some of the registers.
20691 The values returned by this function must reflect the behavior
20692 of arm_expand_prologue() and arm_compute_save_reg_mask().
20694 The sign of the number returned reflects the direction of stack
20695 growth, so the values are positive for all eliminations except
20696 from the soft frame pointer to the hard frame pointer.
20698 SFP may point just inside the local variables block to ensure correct
20699 alignment. */
20702 /* Calculate stack offsets. These are used to calculate register elimination
20703 offsets and in prologue/epilogue code. Also calculates which registers
20704 should be saved. */
20706 static arm_stack_offsets *
20707 arm_get_frame_offsets (void)
20709 struct arm_stack_offsets *offsets;
20710 unsigned long func_type;
20711 int saved;
20712 int core_saved;
20713 HOST_WIDE_INT frame_size;
20714 int i;
20716 offsets = &cfun->machine->stack_offsets;
20718 if (reload_completed)
20719 return offsets;
20721 /* Initially this is the size of the local variables. It will translated
20722 into an offset once we have determined the size of preceding data. */
20723 frame_size = ROUND_UP_WORD (get_frame_size ());
20725 /* Space for variadic functions. */
20726 offsets->saved_args = crtl->args.pretend_args_size;
20728 /* In Thumb mode this is incorrect, but never used. */
20729 offsets->frame
20730 = (offsets->saved_args
20731 + arm_compute_static_chain_stack_bytes ()
20732 + (frame_pointer_needed ? 4 : 0));
20734 if (TARGET_32BIT)
20736 unsigned int regno;
20738 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
20739 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20740 saved = core_saved;
20742 /* We know that SP will be doubleword aligned on entry, and we must
20743 preserve that condition at any subroutine call. We also require the
20744 soft frame pointer to be doubleword aligned. */
20746 if (TARGET_REALLY_IWMMXT)
20748 /* Check for the call-saved iWMMXt registers. */
20749 for (regno = FIRST_IWMMXT_REGNUM;
20750 regno <= LAST_IWMMXT_REGNUM;
20751 regno++)
20752 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20753 saved += 8;
20756 func_type = arm_current_func_type ();
20757 /* Space for saved VFP registers. */
20758 if (! IS_VOLATILE (func_type)
20759 && TARGET_HARD_FLOAT)
20760 saved += arm_get_vfp_saved_size ();
20762 else /* TARGET_THUMB1 */
20764 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
20765 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20766 saved = core_saved;
20767 if (TARGET_BACKTRACE)
20768 saved += 16;
20771 /* Saved registers include the stack frame. */
20772 offsets->saved_regs
20773 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
20774 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
20776 /* A leaf function does not need any stack alignment if it has nothing
20777 on the stack. */
20778 if (crtl->is_leaf && frame_size == 0
20779 /* However if it calls alloca(), we have a dynamically allocated
20780 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20781 && ! cfun->calls_alloca)
20783 offsets->outgoing_args = offsets->soft_frame;
20784 offsets->locals_base = offsets->soft_frame;
20785 return offsets;
20788 /* Ensure SFP has the correct alignment. */
20789 if (ARM_DOUBLEWORD_ALIGN
20790 && (offsets->soft_frame & 7))
20792 offsets->soft_frame += 4;
20793 /* Try to align stack by pushing an extra reg. Don't bother doing this
20794 when there is a stack frame as the alignment will be rolled into
20795 the normal stack adjustment. */
20796 if (frame_size + crtl->outgoing_args_size == 0)
20798 int reg = -1;
20800 /* Register r3 is caller-saved. Normally it does not need to be
20801 saved on entry by the prologue. However if we choose to save
20802 it for padding then we may confuse the compiler into thinking
20803 a prologue sequence is required when in fact it is not. This
20804 will occur when shrink-wrapping if r3 is used as a scratch
20805 register and there are no other callee-saved writes.
20807 This situation can be avoided when other callee-saved registers
20808 are available and r3 is not mandatory if we choose a callee-saved
20809 register for padding. */
20810 bool prefer_callee_reg_p = false;
20812 /* If it is safe to use r3, then do so. This sometimes
20813 generates better code on Thumb-2 by avoiding the need to
20814 use 32-bit push/pop instructions. */
20815 if (! any_sibcall_could_use_r3 ()
20816 && arm_size_return_regs () <= 12
20817 && (offsets->saved_regs_mask & (1 << 3)) == 0
20818 && (TARGET_THUMB2
20819 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
20821 reg = 3;
20822 if (!TARGET_THUMB2)
20823 prefer_callee_reg_p = true;
20825 if (reg == -1
20826 || prefer_callee_reg_p)
20828 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
20830 /* Avoid fixed registers; they may be changed at
20831 arbitrary times so it's unsafe to restore them
20832 during the epilogue. */
20833 if (!fixed_regs[i]
20834 && (offsets->saved_regs_mask & (1 << i)) == 0)
20836 reg = i;
20837 break;
20842 if (reg != -1)
20844 offsets->saved_regs += 4;
20845 offsets->saved_regs_mask |= (1 << reg);
20850 offsets->locals_base = offsets->soft_frame + frame_size;
20851 offsets->outgoing_args = (offsets->locals_base
20852 + crtl->outgoing_args_size);
20854 if (ARM_DOUBLEWORD_ALIGN)
20856 /* Ensure SP remains doubleword aligned. */
20857 if (offsets->outgoing_args & 7)
20858 offsets->outgoing_args += 4;
20859 gcc_assert (!(offsets->outgoing_args & 7));
20862 return offsets;
20866 /* Calculate the relative offsets for the different stack pointers. Positive
20867 offsets are in the direction of stack growth. */
20869 HOST_WIDE_INT
20870 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20872 arm_stack_offsets *offsets;
20874 offsets = arm_get_frame_offsets ();
20876 /* OK, now we have enough information to compute the distances.
20877 There must be an entry in these switch tables for each pair
20878 of registers in ELIMINABLE_REGS, even if some of the entries
20879 seem to be redundant or useless. */
20880 switch (from)
20882 case ARG_POINTER_REGNUM:
20883 switch (to)
20885 case THUMB_HARD_FRAME_POINTER_REGNUM:
20886 return 0;
20888 case FRAME_POINTER_REGNUM:
20889 /* This is the reverse of the soft frame pointer
20890 to hard frame pointer elimination below. */
20891 return offsets->soft_frame - offsets->saved_args;
20893 case ARM_HARD_FRAME_POINTER_REGNUM:
20894 /* This is only non-zero in the case where the static chain register
20895 is stored above the frame. */
20896 return offsets->frame - offsets->saved_args - 4;
20898 case STACK_POINTER_REGNUM:
20899 /* If nothing has been pushed on the stack at all
20900 then this will return -4. This *is* correct! */
20901 return offsets->outgoing_args - (offsets->saved_args + 4);
20903 default:
20904 gcc_unreachable ();
20906 gcc_unreachable ();
20908 case FRAME_POINTER_REGNUM:
20909 switch (to)
20911 case THUMB_HARD_FRAME_POINTER_REGNUM:
20912 return 0;
20914 case ARM_HARD_FRAME_POINTER_REGNUM:
20915 /* The hard frame pointer points to the top entry in the
20916 stack frame. The soft frame pointer to the bottom entry
20917 in the stack frame. If there is no stack frame at all,
20918 then they are identical. */
20920 return offsets->frame - offsets->soft_frame;
20922 case STACK_POINTER_REGNUM:
20923 return offsets->outgoing_args - offsets->soft_frame;
20925 default:
20926 gcc_unreachable ();
20928 gcc_unreachable ();
20930 default:
20931 /* You cannot eliminate from the stack pointer.
20932 In theory you could eliminate from the hard frame
20933 pointer to the stack pointer, but this will never
20934 happen, since if a stack frame is not needed the
20935 hard frame pointer will never be used. */
20936 gcc_unreachable ();
20940 /* Given FROM and TO register numbers, say whether this elimination is
20941 allowed. Frame pointer elimination is automatically handled.
20943 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
20944 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
20945 pointer, we must eliminate FRAME_POINTER_REGNUM into
20946 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
20947 ARG_POINTER_REGNUM. */
20949 bool
20950 arm_can_eliminate (const int from, const int to)
20952 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
20953 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
20954 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
20955 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
20956 true);
20959 /* Emit RTL to save coprocessor registers on function entry. Returns the
20960 number of bytes pushed. */
20962 static int
20963 arm_save_coproc_regs(void)
20965 int saved_size = 0;
20966 unsigned reg;
20967 unsigned start_reg;
20968 rtx insn;
20970 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
20971 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
20973 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
20974 insn = gen_rtx_MEM (V2SImode, insn);
20975 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
20976 RTX_FRAME_RELATED_P (insn) = 1;
20977 saved_size += 8;
20980 if (TARGET_HARD_FLOAT)
20982 start_reg = FIRST_VFP_REGNUM;
20984 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
20986 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
20987 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
20989 if (start_reg != reg)
20990 saved_size += vfp_emit_fstmd (start_reg,
20991 (reg - start_reg) / 2);
20992 start_reg = reg + 2;
20995 if (start_reg != reg)
20996 saved_size += vfp_emit_fstmd (start_reg,
20997 (reg - start_reg) / 2);
20999 return saved_size;
21003 /* Set the Thumb frame pointer from the stack pointer. */
21005 static void
21006 thumb_set_frame_pointer (arm_stack_offsets *offsets)
21008 HOST_WIDE_INT amount;
21009 rtx insn, dwarf;
21011 amount = offsets->outgoing_args - offsets->locals_base;
21012 if (amount < 1024)
21013 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21014 stack_pointer_rtx, GEN_INT (amount)));
21015 else
21017 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21018 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21019 expects the first two operands to be the same. */
21020 if (TARGET_THUMB2)
21022 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21023 stack_pointer_rtx,
21024 hard_frame_pointer_rtx));
21026 else
21028 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21029 hard_frame_pointer_rtx,
21030 stack_pointer_rtx));
21032 dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
21033 plus_constant (Pmode, stack_pointer_rtx, amount));
21034 RTX_FRAME_RELATED_P (dwarf) = 1;
21035 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21038 RTX_FRAME_RELATED_P (insn) = 1;
21041 struct scratch_reg {
21042 rtx reg;
21043 bool saved;
21046 /* Return a short-lived scratch register for use as a 2nd scratch register on
21047 function entry after the registers are saved in the prologue. This register
21048 must be released by means of release_scratch_register_on_entry. IP is not
21049 considered since it is always used as the 1st scratch register if available.
21051 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
21052 mask of live registers. */
21054 static void
21055 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
21056 unsigned long live_regs)
21058 int regno = -1;
21060 sr->saved = false;
21062 if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
21063 regno = LR_REGNUM;
21064 else
21066 unsigned int i;
21068 for (i = 4; i < 11; i++)
21069 if (regno1 != i && (live_regs & (1 << i)) != 0)
21071 regno = i;
21072 break;
21075 if (regno < 0)
21077 /* If IP is used as the 1st scratch register for a nested function,
21078 then either r3 wasn't available or is used to preserve IP. */
21079 if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
21080 regno1 = 3;
21081 regno = (regno1 == 3 ? 2 : 3);
21082 sr->saved
21083 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
21084 regno);
21088 sr->reg = gen_rtx_REG (SImode, regno);
21089 if (sr->saved)
21091 rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21092 rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
21093 rtx x = gen_rtx_SET (stack_pointer_rtx,
21094 plus_constant (Pmode, stack_pointer_rtx, -4));
21095 RTX_FRAME_RELATED_P (insn) = 1;
21096 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21100 /* Release a scratch register obtained from the preceding function. */
21102 static void
21103 release_scratch_register_on_entry (struct scratch_reg *sr)
21105 if (sr->saved)
21107 rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
21108 rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
21109 rtx x = gen_rtx_SET (stack_pointer_rtx,
21110 plus_constant (Pmode, stack_pointer_rtx, 4));
21111 RTX_FRAME_RELATED_P (insn) = 1;
21112 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21116 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
21118 #if PROBE_INTERVAL > 4096
21119 #error Cannot use indexed addressing mode for stack probing
21120 #endif
21122 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
21123 inclusive. These are offsets from the current stack pointer. REGNO1
21124 is the index number of the 1st scratch register and LIVE_REGS is the
21125 mask of live registers. */
21127 static void
21128 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
21129 unsigned int regno1, unsigned long live_regs)
21131 rtx reg1 = gen_rtx_REG (Pmode, regno1);
21133 /* See if we have a constant small number of probes to generate. If so,
21134 that's the easy case. */
21135 if (size <= PROBE_INTERVAL)
21137 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21138 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21139 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
21142 /* The run-time loop is made up of 10 insns in the generic case while the
21143 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
21144 else if (size <= 5 * PROBE_INTERVAL)
21146 HOST_WIDE_INT i, rem;
21148 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21149 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21150 emit_stack_probe (reg1);
21152 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
21153 it exceeds SIZE. If only two probes are needed, this will not
21154 generate any code. Then probe at FIRST + SIZE. */
21155 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
21157 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21158 emit_stack_probe (reg1);
21161 rem = size - (i - PROBE_INTERVAL);
21162 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21164 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21165 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
21167 else
21168 emit_stack_probe (plus_constant (Pmode, reg1, -rem));
21171 /* Otherwise, do the same as above, but in a loop. Note that we must be
21172 extra careful with variables wrapping around because we might be at
21173 the very top (or the very bottom) of the address space and we have
21174 to be able to handle this case properly; in particular, we use an
21175 equality test for the loop condition. */
21176 else
21178 HOST_WIDE_INT rounded_size;
21179 struct scratch_reg sr;
21181 get_scratch_register_on_entry (&sr, regno1, live_regs);
21183 emit_move_insn (reg1, GEN_INT (first));
21186 /* Step 1: round SIZE to the previous multiple of the interval. */
21188 rounded_size = size & -PROBE_INTERVAL;
21189 emit_move_insn (sr.reg, GEN_INT (rounded_size));
21192 /* Step 2: compute initial and final value of the loop counter. */
21194 /* TEST_ADDR = SP + FIRST. */
21195 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21197 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
21198 emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
21201 /* Step 3: the loop
21205 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
21206 probe at TEST_ADDR
21208 while (TEST_ADDR != LAST_ADDR)
21210 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
21211 until it is equal to ROUNDED_SIZE. */
21213 emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
21216 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
21217 that SIZE is equal to ROUNDED_SIZE. */
21219 if (size != rounded_size)
21221 HOST_WIDE_INT rem = size - rounded_size;
21223 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21225 emit_set_insn (sr.reg,
21226 plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
21227 emit_stack_probe (plus_constant (Pmode, sr.reg,
21228 PROBE_INTERVAL - rem));
21230 else
21231 emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
21234 release_scratch_register_on_entry (&sr);
21237 /* Make sure nothing is scheduled before we are done. */
21238 emit_insn (gen_blockage ());
21241 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
21242 absolute addresses. */
21244 const char *
21245 output_probe_stack_range (rtx reg1, rtx reg2)
21247 static int labelno = 0;
21248 char loop_lab[32];
21249 rtx xops[2];
21251 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
21253 /* Loop. */
21254 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
21256 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
21257 xops[0] = reg1;
21258 xops[1] = GEN_INT (PROBE_INTERVAL);
21259 output_asm_insn ("sub\t%0, %0, %1", xops);
21261 /* Probe at TEST_ADDR. */
21262 output_asm_insn ("str\tr0, [%0, #0]", xops);
21264 /* Test if TEST_ADDR == LAST_ADDR. */
21265 xops[1] = reg2;
21266 output_asm_insn ("cmp\t%0, %1", xops);
21268 /* Branch. */
21269 fputs ("\tbne\t", asm_out_file);
21270 assemble_name_raw (asm_out_file, loop_lab);
21271 fputc ('\n', asm_out_file);
21273 return "";
21276 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21277 function. */
21278 void
21279 arm_expand_prologue (void)
21281 rtx amount;
21282 rtx insn;
21283 rtx ip_rtx;
21284 unsigned long live_regs_mask;
21285 unsigned long func_type;
21286 int fp_offset = 0;
21287 int saved_pretend_args = 0;
21288 int saved_regs = 0;
21289 unsigned HOST_WIDE_INT args_to_push;
21290 HOST_WIDE_INT size;
21291 arm_stack_offsets *offsets;
21292 bool clobber_ip;
21294 func_type = arm_current_func_type ();
21296 /* Naked functions don't have prologues. */
21297 if (IS_NAKED (func_type))
21299 if (flag_stack_usage_info)
21300 current_function_static_stack_size = 0;
21301 return;
21304 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21305 args_to_push = crtl->args.pretend_args_size;
21307 /* Compute which register we will have to save onto the stack. */
21308 offsets = arm_get_frame_offsets ();
21309 live_regs_mask = offsets->saved_regs_mask;
21311 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21313 if (IS_STACKALIGN (func_type))
21315 rtx r0, r1;
21317 /* Handle a word-aligned stack pointer. We generate the following:
21319 mov r0, sp
21320 bic r1, r0, #7
21321 mov sp, r1
21322 <save and restore r0 in normal prologue/epilogue>
21323 mov sp, r0
21324 bx lr
21326 The unwinder doesn't need to know about the stack realignment.
21327 Just tell it we saved SP in r0. */
21328 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21330 r0 = gen_rtx_REG (SImode, R0_REGNUM);
21331 r1 = gen_rtx_REG (SImode, R1_REGNUM);
21333 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21334 RTX_FRAME_RELATED_P (insn) = 1;
21335 add_reg_note (insn, REG_CFA_REGISTER, NULL);
21337 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21339 /* ??? The CFA changes here, which may cause GDB to conclude that it
21340 has entered a different function. That said, the unwind info is
21341 correct, individually, before and after this instruction because
21342 we've described the save of SP, which will override the default
21343 handling of SP as restoring from the CFA. */
21344 emit_insn (gen_movsi (stack_pointer_rtx, r1));
21347 /* The static chain register is the same as the IP register. If it is
21348 clobbered when creating the frame, we need to save and restore it. */
21349 clobber_ip = IS_NESTED (func_type)
21350 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21351 || (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21352 && !df_regs_ever_live_p (LR_REGNUM)
21353 && arm_r3_live_at_start_p ()));
21355 /* Find somewhere to store IP whilst the frame is being created.
21356 We try the following places in order:
21358 1. The last argument register r3 if it is available.
21359 2. A slot on the stack above the frame if there are no
21360 arguments to push onto the stack.
21361 3. Register r3 again, after pushing the argument registers
21362 onto the stack, if this is a varargs function.
21363 4. The last slot on the stack created for the arguments to
21364 push, if this isn't a varargs function.
21366 Note - we only need to tell the dwarf2 backend about the SP
21367 adjustment in the second variant; the static chain register
21368 doesn't need to be unwound, as it doesn't contain a value
21369 inherited from the caller. */
21370 if (clobber_ip)
21372 if (!arm_r3_live_at_start_p ())
21373 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21374 else if (args_to_push == 0)
21376 rtx addr, dwarf;
21378 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21379 saved_regs += 4;
21381 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21382 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21383 fp_offset = 4;
21385 /* Just tell the dwarf backend that we adjusted SP. */
21386 dwarf = gen_rtx_SET (stack_pointer_rtx,
21387 plus_constant (Pmode, stack_pointer_rtx,
21388 -fp_offset));
21389 RTX_FRAME_RELATED_P (insn) = 1;
21390 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21392 else
21394 /* Store the args on the stack. */
21395 if (cfun->machine->uses_anonymous_args)
21397 insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21398 (0xf0 >> (args_to_push / 4)) & 0xf);
21399 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21400 saved_pretend_args = 1;
21402 else
21404 rtx addr, dwarf;
21406 if (args_to_push == 4)
21407 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21408 else
21409 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21410 plus_constant (Pmode,
21411 stack_pointer_rtx,
21412 -args_to_push));
21414 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21416 /* Just tell the dwarf backend that we adjusted SP. */
21417 dwarf = gen_rtx_SET (stack_pointer_rtx,
21418 plus_constant (Pmode, stack_pointer_rtx,
21419 -args_to_push));
21420 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21423 RTX_FRAME_RELATED_P (insn) = 1;
21424 fp_offset = args_to_push;
21425 args_to_push = 0;
21429 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21431 if (IS_INTERRUPT (func_type))
21433 /* Interrupt functions must not corrupt any registers.
21434 Creating a frame pointer however, corrupts the IP
21435 register, so we must push it first. */
21436 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21438 /* Do not set RTX_FRAME_RELATED_P on this insn.
21439 The dwarf stack unwinding code only wants to see one
21440 stack decrement per function, and this is not it. If
21441 this instruction is labeled as being part of the frame
21442 creation sequence then dwarf2out_frame_debug_expr will
21443 die when it encounters the assignment of IP to FP
21444 later on, since the use of SP here establishes SP as
21445 the CFA register and not IP.
21447 Anyway this instruction is not really part of the stack
21448 frame creation although it is part of the prologue. */
21451 insn = emit_set_insn (ip_rtx,
21452 plus_constant (Pmode, stack_pointer_rtx,
21453 fp_offset));
21454 RTX_FRAME_RELATED_P (insn) = 1;
21457 if (args_to_push)
21459 /* Push the argument registers, or reserve space for them. */
21460 if (cfun->machine->uses_anonymous_args)
21461 insn = emit_multi_reg_push
21462 ((0xf0 >> (args_to_push / 4)) & 0xf,
21463 (0xf0 >> (args_to_push / 4)) & 0xf);
21464 else
21465 insn = emit_insn
21466 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21467 GEN_INT (- args_to_push)));
21468 RTX_FRAME_RELATED_P (insn) = 1;
21471 /* If this is an interrupt service routine, and the link register
21472 is going to be pushed, and we're not generating extra
21473 push of IP (needed when frame is needed and frame layout if apcs),
21474 subtracting four from LR now will mean that the function return
21475 can be done with a single instruction. */
21476 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21477 && (live_regs_mask & (1 << LR_REGNUM)) != 0
21478 && !(frame_pointer_needed && TARGET_APCS_FRAME)
21479 && TARGET_ARM)
21481 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21483 emit_set_insn (lr, plus_constant (SImode, lr, -4));
21486 if (live_regs_mask)
21488 unsigned long dwarf_regs_mask = live_regs_mask;
21490 saved_regs += bit_count (live_regs_mask) * 4;
21491 if (optimize_size && !frame_pointer_needed
21492 && saved_regs == offsets->saved_regs - offsets->saved_args)
21494 /* If no coprocessor registers are being pushed and we don't have
21495 to worry about a frame pointer then push extra registers to
21496 create the stack frame. This is done is a way that does not
21497 alter the frame layout, so is independent of the epilogue. */
21498 int n;
21499 int frame;
21500 n = 0;
21501 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21502 n++;
21503 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21504 if (frame && n * 4 >= frame)
21506 n = frame / 4;
21507 live_regs_mask |= (1 << n) - 1;
21508 saved_regs += frame;
21512 if (TARGET_LDRD
21513 && current_tune->prefer_ldrd_strd
21514 && !optimize_function_for_size_p (cfun))
21516 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21517 if (TARGET_THUMB2)
21518 thumb2_emit_strd_push (live_regs_mask);
21519 else if (TARGET_ARM
21520 && !TARGET_APCS_FRAME
21521 && !IS_INTERRUPT (func_type))
21522 arm_emit_strd_push (live_regs_mask);
21523 else
21525 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21526 RTX_FRAME_RELATED_P (insn) = 1;
21529 else
21531 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21532 RTX_FRAME_RELATED_P (insn) = 1;
21536 if (! IS_VOLATILE (func_type))
21537 saved_regs += arm_save_coproc_regs ();
21539 if (frame_pointer_needed && TARGET_ARM)
21541 /* Create the new frame pointer. */
21542 if (TARGET_APCS_FRAME)
21544 insn = GEN_INT (-(4 + args_to_push + fp_offset));
21545 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21546 RTX_FRAME_RELATED_P (insn) = 1;
21548 else
21550 insn = GEN_INT (saved_regs - (4 + fp_offset));
21551 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21552 stack_pointer_rtx, insn));
21553 RTX_FRAME_RELATED_P (insn) = 1;
21557 size = offsets->outgoing_args - offsets->saved_args;
21558 if (flag_stack_usage_info)
21559 current_function_static_stack_size = size;
21561 /* If this isn't an interrupt service routine and we have a frame, then do
21562 stack checking. We use IP as the first scratch register, except for the
21563 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
21564 if (!IS_INTERRUPT (func_type)
21565 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
21567 unsigned int regno;
21569 if (!IS_NESTED (func_type) || clobber_ip)
21570 regno = IP_REGNUM;
21571 else if (df_regs_ever_live_p (LR_REGNUM))
21572 regno = LR_REGNUM;
21573 else
21574 regno = 3;
21576 if (crtl->is_leaf && !cfun->calls_alloca)
21578 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
21579 arm_emit_probe_stack_range (STACK_CHECK_PROTECT,
21580 size - STACK_CHECK_PROTECT,
21581 regno, live_regs_mask);
21583 else if (size > 0)
21584 arm_emit_probe_stack_range (STACK_CHECK_PROTECT, size,
21585 regno, live_regs_mask);
21588 /* Recover the static chain register. */
21589 if (clobber_ip)
21591 if (!arm_r3_live_at_start_p () || saved_pretend_args)
21592 insn = gen_rtx_REG (SImode, 3);
21593 else
21595 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21596 insn = gen_frame_mem (SImode, insn);
21598 emit_set_insn (ip_rtx, insn);
21599 emit_insn (gen_force_register_use (ip_rtx));
21602 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21604 /* This add can produce multiple insns for a large constant, so we
21605 need to get tricky. */
21606 rtx_insn *last = get_last_insn ();
21608 amount = GEN_INT (offsets->saved_args + saved_regs
21609 - offsets->outgoing_args);
21611 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21612 amount));
21615 last = last ? NEXT_INSN (last) : get_insns ();
21616 RTX_FRAME_RELATED_P (last) = 1;
21618 while (last != insn);
21620 /* If the frame pointer is needed, emit a special barrier that
21621 will prevent the scheduler from moving stores to the frame
21622 before the stack adjustment. */
21623 if (frame_pointer_needed)
21624 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
21625 hard_frame_pointer_rtx));
21629 if (frame_pointer_needed && TARGET_THUMB2)
21630 thumb_set_frame_pointer (offsets);
21632 if (flag_pic && arm_pic_register != INVALID_REGNUM)
21634 unsigned long mask;
21636 mask = live_regs_mask;
21637 mask &= THUMB2_WORK_REGS;
21638 if (!IS_NESTED (func_type))
21639 mask |= (1 << IP_REGNUM);
21640 arm_load_pic_register (mask);
21643 /* If we are profiling, make sure no instructions are scheduled before
21644 the call to mcount. Similarly if the user has requested no
21645 scheduling in the prolog. Similarly if we want non-call exceptions
21646 using the EABI unwinder, to prevent faulting instructions from being
21647 swapped with a stack adjustment. */
21648 if (crtl->profile || !TARGET_SCHED_PROLOG
21649 || (arm_except_unwind_info (&global_options) == UI_TARGET
21650 && cfun->can_throw_non_call_exceptions))
21651 emit_insn (gen_blockage ());
21653 /* If the link register is being kept alive, with the return address in it,
21654 then make sure that it does not get reused by the ce2 pass. */
21655 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
21656 cfun->machine->lr_save_eliminated = 1;
21659 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21660 static void
21661 arm_print_condition (FILE *stream)
21663 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
21665 /* Branch conversion is not implemented for Thumb-2. */
21666 if (TARGET_THUMB)
21668 output_operand_lossage ("predicated Thumb instruction");
21669 return;
21671 if (current_insn_predicate != NULL)
21673 output_operand_lossage
21674 ("predicated instruction in conditional sequence");
21675 return;
21678 fputs (arm_condition_codes[arm_current_cc], stream);
21680 else if (current_insn_predicate)
21682 enum arm_cond_code code;
21684 if (TARGET_THUMB1)
21686 output_operand_lossage ("predicated Thumb instruction");
21687 return;
21690 code = get_arm_condition_code (current_insn_predicate);
21691 fputs (arm_condition_codes[code], stream);
21696 /* Globally reserved letters: acln
21697 Puncutation letters currently used: @_|?().!#
21698 Lower case letters currently used: bcdefhimpqtvwxyz
21699 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21700 Letters previously used, but now deprecated/obsolete: sVWXYZ.
21702 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21704 If CODE is 'd', then the X is a condition operand and the instruction
21705 should only be executed if the condition is true.
21706 if CODE is 'D', then the X is a condition operand and the instruction
21707 should only be executed if the condition is false: however, if the mode
21708 of the comparison is CCFPEmode, then always execute the instruction -- we
21709 do this because in these circumstances !GE does not necessarily imply LT;
21710 in these cases the instruction pattern will take care to make sure that
21711 an instruction containing %d will follow, thereby undoing the effects of
21712 doing this instruction unconditionally.
21713 If CODE is 'N' then X is a floating point operand that must be negated
21714 before output.
21715 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21716 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21717 static void
21718 arm_print_operand (FILE *stream, rtx x, int code)
21720 switch (code)
21722 case '@':
21723 fputs (ASM_COMMENT_START, stream);
21724 return;
21726 case '_':
21727 fputs (user_label_prefix, stream);
21728 return;
21730 case '|':
21731 fputs (REGISTER_PREFIX, stream);
21732 return;
21734 case '?':
21735 arm_print_condition (stream);
21736 return;
21738 case '.':
21739 /* The current condition code for a condition code setting instruction.
21740 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21741 fputc('s', stream);
21742 arm_print_condition (stream);
21743 return;
21745 case '!':
21746 /* If the instruction is conditionally executed then print
21747 the current condition code, otherwise print 's'. */
21748 gcc_assert (TARGET_THUMB2);
21749 if (current_insn_predicate)
21750 arm_print_condition (stream);
21751 else
21752 fputc('s', stream);
21753 break;
21755 /* %# is a "break" sequence. It doesn't output anything, but is used to
21756 separate e.g. operand numbers from following text, if that text consists
21757 of further digits which we don't want to be part of the operand
21758 number. */
21759 case '#':
21760 return;
21762 case 'N':
21764 REAL_VALUE_TYPE r;
21765 r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
21766 fprintf (stream, "%s", fp_const_from_val (&r));
21768 return;
21770 /* An integer or symbol address without a preceding # sign. */
21771 case 'c':
21772 switch (GET_CODE (x))
21774 case CONST_INT:
21775 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
21776 break;
21778 case SYMBOL_REF:
21779 output_addr_const (stream, x);
21780 break;
21782 case CONST:
21783 if (GET_CODE (XEXP (x, 0)) == PLUS
21784 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
21786 output_addr_const (stream, x);
21787 break;
21789 /* Fall through. */
21791 default:
21792 output_operand_lossage ("Unsupported operand for code '%c'", code);
21794 return;
21796 /* An integer that we want to print in HEX. */
21797 case 'x':
21798 switch (GET_CODE (x))
21800 case CONST_INT:
21801 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
21802 break;
21804 default:
21805 output_operand_lossage ("Unsupported operand for code '%c'", code);
21807 return;
21809 case 'B':
21810 if (CONST_INT_P (x))
21812 HOST_WIDE_INT val;
21813 val = ARM_SIGN_EXTEND (~INTVAL (x));
21814 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
21816 else
21818 putc ('~', stream);
21819 output_addr_const (stream, x);
21821 return;
21823 case 'b':
21824 /* Print the log2 of a CONST_INT. */
21826 HOST_WIDE_INT val;
21828 if (!CONST_INT_P (x)
21829 || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
21830 output_operand_lossage ("Unsupported operand for code '%c'", code);
21831 else
21832 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21834 return;
21836 case 'L':
21837 /* The low 16 bits of an immediate constant. */
21838 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
21839 return;
21841 case 'i':
21842 fprintf (stream, "%s", arithmetic_instr (x, 1));
21843 return;
21845 case 'I':
21846 fprintf (stream, "%s", arithmetic_instr (x, 0));
21847 return;
21849 case 'S':
21851 HOST_WIDE_INT val;
21852 const char *shift;
21854 shift = shift_op (x, &val);
21856 if (shift)
21858 fprintf (stream, ", %s ", shift);
21859 if (val == -1)
21860 arm_print_operand (stream, XEXP (x, 1), 0);
21861 else
21862 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21865 return;
21867 /* An explanation of the 'Q', 'R' and 'H' register operands:
21869 In a pair of registers containing a DI or DF value the 'Q'
21870 operand returns the register number of the register containing
21871 the least significant part of the value. The 'R' operand returns
21872 the register number of the register containing the most
21873 significant part of the value.
21875 The 'H' operand returns the higher of the two register numbers.
21876 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21877 same as the 'Q' operand, since the most significant part of the
21878 value is held in the lower number register. The reverse is true
21879 on systems where WORDS_BIG_ENDIAN is false.
21881 The purpose of these operands is to distinguish between cases
21882 where the endian-ness of the values is important (for example
21883 when they are added together), and cases where the endian-ness
21884 is irrelevant, but the order of register operations is important.
21885 For example when loading a value from memory into a register
21886 pair, the endian-ness does not matter. Provided that the value
21887 from the lower memory address is put into the lower numbered
21888 register, and the value from the higher address is put into the
21889 higher numbered register, the load will work regardless of whether
21890 the value being loaded is big-wordian or little-wordian. The
21891 order of the two register loads can matter however, if the address
21892 of the memory location is actually held in one of the registers
21893 being overwritten by the load.
21895 The 'Q' and 'R' constraints are also available for 64-bit
21896 constants. */
21897 case 'Q':
21898 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21900 rtx part = gen_lowpart (SImode, x);
21901 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21902 return;
21905 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21907 output_operand_lossage ("invalid operand for code '%c'", code);
21908 return;
21911 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
21912 return;
21914 case 'R':
21915 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21917 machine_mode mode = GET_MODE (x);
21918 rtx part;
21920 if (mode == VOIDmode)
21921 mode = DImode;
21922 part = gen_highpart_mode (SImode, mode, x);
21923 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21924 return;
21927 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21929 output_operand_lossage ("invalid operand for code '%c'", code);
21930 return;
21933 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
21934 return;
21936 case 'H':
21937 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21939 output_operand_lossage ("invalid operand for code '%c'", code);
21940 return;
21943 asm_fprintf (stream, "%r", REGNO (x) + 1);
21944 return;
21946 case 'J':
21947 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21949 output_operand_lossage ("invalid operand for code '%c'", code);
21950 return;
21953 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
21954 return;
21956 case 'K':
21957 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21959 output_operand_lossage ("invalid operand for code '%c'", code);
21960 return;
21963 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
21964 return;
21966 case 'm':
21967 asm_fprintf (stream, "%r",
21968 REG_P (XEXP (x, 0))
21969 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
21970 return;
21972 case 'M':
21973 asm_fprintf (stream, "{%r-%r}",
21974 REGNO (x),
21975 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
21976 return;
21978 /* Like 'M', but writing doubleword vector registers, for use by Neon
21979 insns. */
21980 case 'h':
21982 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
21983 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
21984 if (numregs == 1)
21985 asm_fprintf (stream, "{d%d}", regno);
21986 else
21987 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
21989 return;
21991 case 'd':
21992 /* CONST_TRUE_RTX means always -- that's the default. */
21993 if (x == const_true_rtx)
21994 return;
21996 if (!COMPARISON_P (x))
21998 output_operand_lossage ("invalid operand for code '%c'", code);
21999 return;
22002 fputs (arm_condition_codes[get_arm_condition_code (x)],
22003 stream);
22004 return;
22006 case 'D':
22007 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
22008 want to do that. */
22009 if (x == const_true_rtx)
22011 output_operand_lossage ("instruction never executed");
22012 return;
22014 if (!COMPARISON_P (x))
22016 output_operand_lossage ("invalid operand for code '%c'", code);
22017 return;
22020 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
22021 (get_arm_condition_code (x))],
22022 stream);
22023 return;
22025 case 's':
22026 case 'V':
22027 case 'W':
22028 case 'X':
22029 case 'Y':
22030 case 'Z':
22031 /* Former Maverick support, removed after GCC-4.7. */
22032 output_operand_lossage ("obsolete Maverick format code '%c'", code);
22033 return;
22035 case 'U':
22036 if (!REG_P (x)
22037 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
22038 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
22039 /* Bad value for wCG register number. */
22041 output_operand_lossage ("invalid operand for code '%c'", code);
22042 return;
22045 else
22046 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
22047 return;
22049 /* Print an iWMMXt control register name. */
22050 case 'w':
22051 if (!CONST_INT_P (x)
22052 || INTVAL (x) < 0
22053 || INTVAL (x) >= 16)
22054 /* Bad value for wC register number. */
22056 output_operand_lossage ("invalid operand for code '%c'", code);
22057 return;
22060 else
22062 static const char * wc_reg_names [16] =
22064 "wCID", "wCon", "wCSSF", "wCASF",
22065 "wC4", "wC5", "wC6", "wC7",
22066 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
22067 "wC12", "wC13", "wC14", "wC15"
22070 fputs (wc_reg_names [INTVAL (x)], stream);
22072 return;
22074 /* Print the high single-precision register of a VFP double-precision
22075 register. */
22076 case 'p':
22078 machine_mode mode = GET_MODE (x);
22079 int regno;
22081 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
22083 output_operand_lossage ("invalid operand for code '%c'", code);
22084 return;
22087 regno = REGNO (x);
22088 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
22090 output_operand_lossage ("invalid operand for code '%c'", code);
22091 return;
22094 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
22096 return;
22098 /* Print a VFP/Neon double precision or quad precision register name. */
22099 case 'P':
22100 case 'q':
22102 machine_mode mode = GET_MODE (x);
22103 int is_quad = (code == 'q');
22104 int regno;
22106 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
22108 output_operand_lossage ("invalid operand for code '%c'", code);
22109 return;
22112 if (!REG_P (x)
22113 || !IS_VFP_REGNUM (REGNO (x)))
22115 output_operand_lossage ("invalid operand for code '%c'", code);
22116 return;
22119 regno = REGNO (x);
22120 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
22121 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
22123 output_operand_lossage ("invalid operand for code '%c'", code);
22124 return;
22127 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
22128 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
22130 return;
22132 /* These two codes print the low/high doubleword register of a Neon quad
22133 register, respectively. For pair-structure types, can also print
22134 low/high quadword registers. */
22135 case 'e':
22136 case 'f':
22138 machine_mode mode = GET_MODE (x);
22139 int regno;
22141 if ((GET_MODE_SIZE (mode) != 16
22142 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
22144 output_operand_lossage ("invalid operand for code '%c'", code);
22145 return;
22148 regno = REGNO (x);
22149 if (!NEON_REGNO_OK_FOR_QUAD (regno))
22151 output_operand_lossage ("invalid operand for code '%c'", code);
22152 return;
22155 if (GET_MODE_SIZE (mode) == 16)
22156 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
22157 + (code == 'f' ? 1 : 0));
22158 else
22159 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
22160 + (code == 'f' ? 1 : 0));
22162 return;
22164 /* Print a VFPv3 floating-point constant, represented as an integer
22165 index. */
22166 case 'G':
22168 int index = vfp3_const_double_index (x);
22169 gcc_assert (index != -1);
22170 fprintf (stream, "%d", index);
22172 return;
22174 /* Print bits representing opcode features for Neon.
22176 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
22177 and polynomials as unsigned.
22179 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22181 Bit 2 is 1 for rounding functions, 0 otherwise. */
22183 /* Identify the type as 's', 'u', 'p' or 'f'. */
22184 case 'T':
22186 HOST_WIDE_INT bits = INTVAL (x);
22187 fputc ("uspf"[bits & 3], stream);
22189 return;
22191 /* Likewise, but signed and unsigned integers are both 'i'. */
22192 case 'F':
22194 HOST_WIDE_INT bits = INTVAL (x);
22195 fputc ("iipf"[bits & 3], stream);
22197 return;
22199 /* As for 'T', but emit 'u' instead of 'p'. */
22200 case 't':
22202 HOST_WIDE_INT bits = INTVAL (x);
22203 fputc ("usuf"[bits & 3], stream);
22205 return;
22207 /* Bit 2: rounding (vs none). */
22208 case 'O':
22210 HOST_WIDE_INT bits = INTVAL (x);
22211 fputs ((bits & 4) != 0 ? "r" : "", stream);
22213 return;
22215 /* Memory operand for vld1/vst1 instruction. */
22216 case 'A':
22218 rtx addr;
22219 bool postinc = FALSE;
22220 rtx postinc_reg = NULL;
22221 unsigned align, memsize, align_bits;
22223 gcc_assert (MEM_P (x));
22224 addr = XEXP (x, 0);
22225 if (GET_CODE (addr) == POST_INC)
22227 postinc = 1;
22228 addr = XEXP (addr, 0);
22230 if (GET_CODE (addr) == POST_MODIFY)
22232 postinc_reg = XEXP( XEXP (addr, 1), 1);
22233 addr = XEXP (addr, 0);
22235 asm_fprintf (stream, "[%r", REGNO (addr));
22237 /* We know the alignment of this access, so we can emit a hint in the
22238 instruction (for some alignments) as an aid to the memory subsystem
22239 of the target. */
22240 align = MEM_ALIGN (x) >> 3;
22241 memsize = MEM_SIZE (x);
22243 /* Only certain alignment specifiers are supported by the hardware. */
22244 if (memsize == 32 && (align % 32) == 0)
22245 align_bits = 256;
22246 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
22247 align_bits = 128;
22248 else if (memsize >= 8 && (align % 8) == 0)
22249 align_bits = 64;
22250 else
22251 align_bits = 0;
22253 if (align_bits != 0)
22254 asm_fprintf (stream, ":%d", align_bits);
22256 asm_fprintf (stream, "]");
22258 if (postinc)
22259 fputs("!", stream);
22260 if (postinc_reg)
22261 asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22263 return;
22265 case 'C':
22267 rtx addr;
22269 gcc_assert (MEM_P (x));
22270 addr = XEXP (x, 0);
22271 gcc_assert (REG_P (addr));
22272 asm_fprintf (stream, "[%r]", REGNO (addr));
22274 return;
22276 /* Translate an S register number into a D register number and element index. */
22277 case 'y':
22279 machine_mode mode = GET_MODE (x);
22280 int regno;
22282 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22284 output_operand_lossage ("invalid operand for code '%c'", code);
22285 return;
22288 regno = REGNO (x);
22289 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22291 output_operand_lossage ("invalid operand for code '%c'", code);
22292 return;
22295 regno = regno - FIRST_VFP_REGNUM;
22296 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22298 return;
22300 case 'v':
22301 gcc_assert (CONST_DOUBLE_P (x));
22302 int result;
22303 result = vfp3_const_double_for_fract_bits (x);
22304 if (result == 0)
22305 result = vfp3_const_double_for_bits (x);
22306 fprintf (stream, "#%d", result);
22307 return;
22309 /* Register specifier for vld1.16/vst1.16. Translate the S register
22310 number into a D register number and element index. */
22311 case 'z':
22313 machine_mode mode = GET_MODE (x);
22314 int regno;
22316 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22318 output_operand_lossage ("invalid operand for code '%c'", code);
22319 return;
22322 regno = REGNO (x);
22323 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22325 output_operand_lossage ("invalid operand for code '%c'", code);
22326 return;
22329 regno = regno - FIRST_VFP_REGNUM;
22330 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22332 return;
22334 default:
22335 if (x == 0)
22337 output_operand_lossage ("missing operand");
22338 return;
22341 switch (GET_CODE (x))
22343 case REG:
22344 asm_fprintf (stream, "%r", REGNO (x));
22345 break;
22347 case MEM:
22348 output_address (GET_MODE (x), XEXP (x, 0));
22349 break;
22351 case CONST_DOUBLE:
22353 char fpstr[20];
22354 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22355 sizeof (fpstr), 0, 1);
22356 fprintf (stream, "#%s", fpstr);
22358 break;
22360 default:
22361 gcc_assert (GET_CODE (x) != NEG);
22362 fputc ('#', stream);
22363 if (GET_CODE (x) == HIGH)
22365 fputs (":lower16:", stream);
22366 x = XEXP (x, 0);
22369 output_addr_const (stream, x);
22370 break;
22375 /* Target hook for printing a memory address. */
22376 static void
22377 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
22379 if (TARGET_32BIT)
22381 int is_minus = GET_CODE (x) == MINUS;
22383 if (REG_P (x))
22384 asm_fprintf (stream, "[%r]", REGNO (x));
22385 else if (GET_CODE (x) == PLUS || is_minus)
22387 rtx base = XEXP (x, 0);
22388 rtx index = XEXP (x, 1);
22389 HOST_WIDE_INT offset = 0;
22390 if (!REG_P (base)
22391 || (REG_P (index) && REGNO (index) == SP_REGNUM))
22393 /* Ensure that BASE is a register. */
22394 /* (one of them must be). */
22395 /* Also ensure the SP is not used as in index register. */
22396 std::swap (base, index);
22398 switch (GET_CODE (index))
22400 case CONST_INT:
22401 offset = INTVAL (index);
22402 if (is_minus)
22403 offset = -offset;
22404 asm_fprintf (stream, "[%r, #%wd]",
22405 REGNO (base), offset);
22406 break;
22408 case REG:
22409 asm_fprintf (stream, "[%r, %s%r]",
22410 REGNO (base), is_minus ? "-" : "",
22411 REGNO (index));
22412 break;
22414 case MULT:
22415 case ASHIFTRT:
22416 case LSHIFTRT:
22417 case ASHIFT:
22418 case ROTATERT:
22420 asm_fprintf (stream, "[%r, %s%r",
22421 REGNO (base), is_minus ? "-" : "",
22422 REGNO (XEXP (index, 0)));
22423 arm_print_operand (stream, index, 'S');
22424 fputs ("]", stream);
22425 break;
22428 default:
22429 gcc_unreachable ();
22432 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22433 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22435 gcc_assert (REG_P (XEXP (x, 0)));
22437 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22438 asm_fprintf (stream, "[%r, #%s%d]!",
22439 REGNO (XEXP (x, 0)),
22440 GET_CODE (x) == PRE_DEC ? "-" : "",
22441 GET_MODE_SIZE (mode));
22442 else
22443 asm_fprintf (stream, "[%r], #%s%d",
22444 REGNO (XEXP (x, 0)),
22445 GET_CODE (x) == POST_DEC ? "-" : "",
22446 GET_MODE_SIZE (mode));
22448 else if (GET_CODE (x) == PRE_MODIFY)
22450 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22451 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22452 asm_fprintf (stream, "#%wd]!",
22453 INTVAL (XEXP (XEXP (x, 1), 1)));
22454 else
22455 asm_fprintf (stream, "%r]!",
22456 REGNO (XEXP (XEXP (x, 1), 1)));
22458 else if (GET_CODE (x) == POST_MODIFY)
22460 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22461 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22462 asm_fprintf (stream, "#%wd",
22463 INTVAL (XEXP (XEXP (x, 1), 1)));
22464 else
22465 asm_fprintf (stream, "%r",
22466 REGNO (XEXP (XEXP (x, 1), 1)));
22468 else output_addr_const (stream, x);
22470 else
22472 if (REG_P (x))
22473 asm_fprintf (stream, "[%r]", REGNO (x));
22474 else if (GET_CODE (x) == POST_INC)
22475 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22476 else if (GET_CODE (x) == PLUS)
22478 gcc_assert (REG_P (XEXP (x, 0)));
22479 if (CONST_INT_P (XEXP (x, 1)))
22480 asm_fprintf (stream, "[%r, #%wd]",
22481 REGNO (XEXP (x, 0)),
22482 INTVAL (XEXP (x, 1)));
22483 else
22484 asm_fprintf (stream, "[%r, %r]",
22485 REGNO (XEXP (x, 0)),
22486 REGNO (XEXP (x, 1)));
22488 else
22489 output_addr_const (stream, x);
22493 /* Target hook for indicating whether a punctuation character for
22494 TARGET_PRINT_OPERAND is valid. */
22495 static bool
22496 arm_print_operand_punct_valid_p (unsigned char code)
22498 return (code == '@' || code == '|' || code == '.'
22499 || code == '(' || code == ')' || code == '#'
22500 || (TARGET_32BIT && (code == '?'))
22501 || (TARGET_THUMB2 && (code == '!'))
22502 || (TARGET_THUMB && (code == '_')));
22505 /* Target hook for assembling integer objects. The ARM version needs to
22506 handle word-sized values specially. */
22507 static bool
22508 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22510 machine_mode mode;
22512 if (size == UNITS_PER_WORD && aligned_p)
22514 fputs ("\t.word\t", asm_out_file);
22515 output_addr_const (asm_out_file, x);
22517 /* Mark symbols as position independent. We only do this in the
22518 .text segment, not in the .data segment. */
22519 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22520 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22522 /* See legitimize_pic_address for an explanation of the
22523 TARGET_VXWORKS_RTP check. */
22524 /* References to weak symbols cannot be resolved locally:
22525 they may be overridden by a non-weak definition at link
22526 time. */
22527 if (!arm_pic_data_is_text_relative
22528 || (GET_CODE (x) == SYMBOL_REF
22529 && (!SYMBOL_REF_LOCAL_P (x)
22530 || (SYMBOL_REF_DECL (x)
22531 ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0))))
22532 fputs ("(GOT)", asm_out_file);
22533 else
22534 fputs ("(GOTOFF)", asm_out_file);
22536 fputc ('\n', asm_out_file);
22537 return true;
22540 mode = GET_MODE (x);
22542 if (arm_vector_mode_supported_p (mode))
22544 int i, units;
22546 gcc_assert (GET_CODE (x) == CONST_VECTOR);
22548 units = CONST_VECTOR_NUNITS (x);
22549 size = GET_MODE_UNIT_SIZE (mode);
22551 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22552 for (i = 0; i < units; i++)
22554 rtx elt = CONST_VECTOR_ELT (x, i);
22555 assemble_integer
22556 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22558 else
22559 for (i = 0; i < units; i++)
22561 rtx elt = CONST_VECTOR_ELT (x, i);
22562 assemble_real
22563 (*CONST_DOUBLE_REAL_VALUE (elt), GET_MODE_INNER (mode),
22564 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22567 return true;
22570 return default_assemble_integer (x, size, aligned_p);
22573 static void
22574 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22576 section *s;
22578 if (!TARGET_AAPCS_BASED)
22580 (is_ctor ?
22581 default_named_section_asm_out_constructor
22582 : default_named_section_asm_out_destructor) (symbol, priority);
22583 return;
22586 /* Put these in the .init_array section, using a special relocation. */
22587 if (priority != DEFAULT_INIT_PRIORITY)
22589 char buf[18];
22590 sprintf (buf, "%s.%.5u",
22591 is_ctor ? ".init_array" : ".fini_array",
22592 priority);
22593 s = get_section (buf, SECTION_WRITE, NULL_TREE);
22595 else if (is_ctor)
22596 s = ctors_section;
22597 else
22598 s = dtors_section;
22600 switch_to_section (s);
22601 assemble_align (POINTER_SIZE);
22602 fputs ("\t.word\t", asm_out_file);
22603 output_addr_const (asm_out_file, symbol);
22604 fputs ("(target1)\n", asm_out_file);
22607 /* Add a function to the list of static constructors. */
22609 static void
22610 arm_elf_asm_constructor (rtx symbol, int priority)
22612 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22615 /* Add a function to the list of static destructors. */
22617 static void
22618 arm_elf_asm_destructor (rtx symbol, int priority)
22620 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22623 /* A finite state machine takes care of noticing whether or not instructions
22624 can be conditionally executed, and thus decrease execution time and code
22625 size by deleting branch instructions. The fsm is controlled by
22626 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22628 /* The state of the fsm controlling condition codes are:
22629 0: normal, do nothing special
22630 1: make ASM_OUTPUT_OPCODE not output this instruction
22631 2: make ASM_OUTPUT_OPCODE not output this instruction
22632 3: make instructions conditional
22633 4: make instructions conditional
22635 State transitions (state->state by whom under condition):
22636 0 -> 1 final_prescan_insn if the `target' is a label
22637 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22638 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22639 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22640 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22641 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22642 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22643 (the target insn is arm_target_insn).
22645 If the jump clobbers the conditions then we use states 2 and 4.
22647 A similar thing can be done with conditional return insns.
22649 XXX In case the `target' is an unconditional branch, this conditionalising
22650 of the instructions always reduces code size, but not always execution
22651 time. But then, I want to reduce the code size to somewhere near what
22652 /bin/cc produces. */
22654 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22655 instructions. When a COND_EXEC instruction is seen the subsequent
22656 instructions are scanned so that multiple conditional instructions can be
22657 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22658 specify the length and true/false mask for the IT block. These will be
22659 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22661 /* Returns the index of the ARM condition code string in
22662 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22663 COMPARISON should be an rtx like `(eq (...) (...))'. */
22665 enum arm_cond_code
22666 maybe_get_arm_condition_code (rtx comparison)
22668 machine_mode mode = GET_MODE (XEXP (comparison, 0));
22669 enum arm_cond_code code;
22670 enum rtx_code comp_code = GET_CODE (comparison);
22672 if (GET_MODE_CLASS (mode) != MODE_CC)
22673 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
22674 XEXP (comparison, 1));
22676 switch (mode)
22678 case CC_DNEmode: code = ARM_NE; goto dominance;
22679 case CC_DEQmode: code = ARM_EQ; goto dominance;
22680 case CC_DGEmode: code = ARM_GE; goto dominance;
22681 case CC_DGTmode: code = ARM_GT; goto dominance;
22682 case CC_DLEmode: code = ARM_LE; goto dominance;
22683 case CC_DLTmode: code = ARM_LT; goto dominance;
22684 case CC_DGEUmode: code = ARM_CS; goto dominance;
22685 case CC_DGTUmode: code = ARM_HI; goto dominance;
22686 case CC_DLEUmode: code = ARM_LS; goto dominance;
22687 case CC_DLTUmode: code = ARM_CC;
22689 dominance:
22690 if (comp_code == EQ)
22691 return ARM_INVERSE_CONDITION_CODE (code);
22692 if (comp_code == NE)
22693 return code;
22694 return ARM_NV;
22696 case CC_NOOVmode:
22697 switch (comp_code)
22699 case NE: return ARM_NE;
22700 case EQ: return ARM_EQ;
22701 case GE: return ARM_PL;
22702 case LT: return ARM_MI;
22703 default: return ARM_NV;
22706 case CC_Zmode:
22707 switch (comp_code)
22709 case NE: return ARM_NE;
22710 case EQ: return ARM_EQ;
22711 default: return ARM_NV;
22714 case CC_Nmode:
22715 switch (comp_code)
22717 case NE: return ARM_MI;
22718 case EQ: return ARM_PL;
22719 default: return ARM_NV;
22722 case CCFPEmode:
22723 case CCFPmode:
22724 /* We can handle all cases except UNEQ and LTGT. */
22725 switch (comp_code)
22727 case GE: return ARM_GE;
22728 case GT: return ARM_GT;
22729 case LE: return ARM_LS;
22730 case LT: return ARM_MI;
22731 case NE: return ARM_NE;
22732 case EQ: return ARM_EQ;
22733 case ORDERED: return ARM_VC;
22734 case UNORDERED: return ARM_VS;
22735 case UNLT: return ARM_LT;
22736 case UNLE: return ARM_LE;
22737 case UNGT: return ARM_HI;
22738 case UNGE: return ARM_PL;
22739 /* UNEQ and LTGT do not have a representation. */
22740 case UNEQ: /* Fall through. */
22741 case LTGT: /* Fall through. */
22742 default: return ARM_NV;
22745 case CC_SWPmode:
22746 switch (comp_code)
22748 case NE: return ARM_NE;
22749 case EQ: return ARM_EQ;
22750 case GE: return ARM_LE;
22751 case GT: return ARM_LT;
22752 case LE: return ARM_GE;
22753 case LT: return ARM_GT;
22754 case GEU: return ARM_LS;
22755 case GTU: return ARM_CC;
22756 case LEU: return ARM_CS;
22757 case LTU: return ARM_HI;
22758 default: return ARM_NV;
22761 case CC_Cmode:
22762 switch (comp_code)
22764 case LTU: return ARM_CS;
22765 case GEU: return ARM_CC;
22766 case NE: return ARM_CS;
22767 case EQ: return ARM_CC;
22768 default: return ARM_NV;
22771 case CC_CZmode:
22772 switch (comp_code)
22774 case NE: return ARM_NE;
22775 case EQ: return ARM_EQ;
22776 case GEU: return ARM_CS;
22777 case GTU: return ARM_HI;
22778 case LEU: return ARM_LS;
22779 case LTU: return ARM_CC;
22780 default: return ARM_NV;
22783 case CC_NCVmode:
22784 switch (comp_code)
22786 case GE: return ARM_GE;
22787 case LT: return ARM_LT;
22788 case GEU: return ARM_CS;
22789 case LTU: return ARM_CC;
22790 default: return ARM_NV;
22793 case CC_Vmode:
22794 switch (comp_code)
22796 case NE: return ARM_VS;
22797 case EQ: return ARM_VC;
22798 default: return ARM_NV;
22801 case CCmode:
22802 switch (comp_code)
22804 case NE: return ARM_NE;
22805 case EQ: return ARM_EQ;
22806 case GE: return ARM_GE;
22807 case GT: return ARM_GT;
22808 case LE: return ARM_LE;
22809 case LT: return ARM_LT;
22810 case GEU: return ARM_CS;
22811 case GTU: return ARM_HI;
22812 case LEU: return ARM_LS;
22813 case LTU: return ARM_CC;
22814 default: return ARM_NV;
22817 default: gcc_unreachable ();
22821 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22822 static enum arm_cond_code
22823 get_arm_condition_code (rtx comparison)
22825 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
22826 gcc_assert (code != ARM_NV);
22827 return code;
22830 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22831 instructions. */
22832 void
22833 thumb2_final_prescan_insn (rtx_insn *insn)
22835 rtx_insn *first_insn = insn;
22836 rtx body = PATTERN (insn);
22837 rtx predicate;
22838 enum arm_cond_code code;
22839 int n;
22840 int mask;
22841 int max;
22843 /* max_insns_skipped in the tune was already taken into account in the
22844 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
22845 just emit the IT blocks as we can. It does not make sense to split
22846 the IT blocks. */
22847 max = MAX_INSN_PER_IT_BLOCK;
22849 /* Remove the previous insn from the count of insns to be output. */
22850 if (arm_condexec_count)
22851 arm_condexec_count--;
22853 /* Nothing to do if we are already inside a conditional block. */
22854 if (arm_condexec_count)
22855 return;
22857 if (GET_CODE (body) != COND_EXEC)
22858 return;
22860 /* Conditional jumps are implemented directly. */
22861 if (JUMP_P (insn))
22862 return;
22864 predicate = COND_EXEC_TEST (body);
22865 arm_current_cc = get_arm_condition_code (predicate);
22867 n = get_attr_ce_count (insn);
22868 arm_condexec_count = 1;
22869 arm_condexec_mask = (1 << n) - 1;
22870 arm_condexec_masklen = n;
22871 /* See if subsequent instructions can be combined into the same block. */
22872 for (;;)
22874 insn = next_nonnote_insn (insn);
22876 /* Jumping into the middle of an IT block is illegal, so a label or
22877 barrier terminates the block. */
22878 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
22879 break;
22881 body = PATTERN (insn);
22882 /* USE and CLOBBER aren't really insns, so just skip them. */
22883 if (GET_CODE (body) == USE
22884 || GET_CODE (body) == CLOBBER)
22885 continue;
22887 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
22888 if (GET_CODE (body) != COND_EXEC)
22889 break;
22890 /* Maximum number of conditionally executed instructions in a block. */
22891 n = get_attr_ce_count (insn);
22892 if (arm_condexec_masklen + n > max)
22893 break;
22895 predicate = COND_EXEC_TEST (body);
22896 code = get_arm_condition_code (predicate);
22897 mask = (1 << n) - 1;
22898 if (arm_current_cc == code)
22899 arm_condexec_mask |= (mask << arm_condexec_masklen);
22900 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
22901 break;
22903 arm_condexec_count++;
22904 arm_condexec_masklen += n;
22906 /* A jump must be the last instruction in a conditional block. */
22907 if (JUMP_P (insn))
22908 break;
22910 /* Restore recog_data (getting the attributes of other insns can
22911 destroy this array, but final.c assumes that it remains intact
22912 across this call). */
22913 extract_constrain_insn_cached (first_insn);
22916 void
22917 arm_final_prescan_insn (rtx_insn *insn)
22919 /* BODY will hold the body of INSN. */
22920 rtx body = PATTERN (insn);
22922 /* This will be 1 if trying to repeat the trick, and things need to be
22923 reversed if it appears to fail. */
22924 int reverse = 0;
22926 /* If we start with a return insn, we only succeed if we find another one. */
22927 int seeking_return = 0;
22928 enum rtx_code return_code = UNKNOWN;
22930 /* START_INSN will hold the insn from where we start looking. This is the
22931 first insn after the following code_label if REVERSE is true. */
22932 rtx_insn *start_insn = insn;
22934 /* If in state 4, check if the target branch is reached, in order to
22935 change back to state 0. */
22936 if (arm_ccfsm_state == 4)
22938 if (insn == arm_target_insn)
22940 arm_target_insn = NULL;
22941 arm_ccfsm_state = 0;
22943 return;
22946 /* If in state 3, it is possible to repeat the trick, if this insn is an
22947 unconditional branch to a label, and immediately following this branch
22948 is the previous target label which is only used once, and the label this
22949 branch jumps to is not too far off. */
22950 if (arm_ccfsm_state == 3)
22952 if (simplejump_p (insn))
22954 start_insn = next_nonnote_insn (start_insn);
22955 if (BARRIER_P (start_insn))
22957 /* XXX Isn't this always a barrier? */
22958 start_insn = next_nonnote_insn (start_insn);
22960 if (LABEL_P (start_insn)
22961 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22962 && LABEL_NUSES (start_insn) == 1)
22963 reverse = TRUE;
22964 else
22965 return;
22967 else if (ANY_RETURN_P (body))
22969 start_insn = next_nonnote_insn (start_insn);
22970 if (BARRIER_P (start_insn))
22971 start_insn = next_nonnote_insn (start_insn);
22972 if (LABEL_P (start_insn)
22973 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22974 && LABEL_NUSES (start_insn) == 1)
22976 reverse = TRUE;
22977 seeking_return = 1;
22978 return_code = GET_CODE (body);
22980 else
22981 return;
22983 else
22984 return;
22987 gcc_assert (!arm_ccfsm_state || reverse);
22988 if (!JUMP_P (insn))
22989 return;
22991 /* This jump might be paralleled with a clobber of the condition codes
22992 the jump should always come first */
22993 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
22994 body = XVECEXP (body, 0, 0);
22996 if (reverse
22997 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
22998 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
23000 int insns_skipped;
23001 int fail = FALSE, succeed = FALSE;
23002 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
23003 int then_not_else = TRUE;
23004 rtx_insn *this_insn = start_insn;
23005 rtx label = 0;
23007 /* Register the insn jumped to. */
23008 if (reverse)
23010 if (!seeking_return)
23011 label = XEXP (SET_SRC (body), 0);
23013 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
23014 label = XEXP (XEXP (SET_SRC (body), 1), 0);
23015 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
23017 label = XEXP (XEXP (SET_SRC (body), 2), 0);
23018 then_not_else = FALSE;
23020 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
23022 seeking_return = 1;
23023 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
23025 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
23027 seeking_return = 1;
23028 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
23029 then_not_else = FALSE;
23031 else
23032 gcc_unreachable ();
23034 /* See how many insns this branch skips, and what kind of insns. If all
23035 insns are okay, and the label or unconditional branch to the same
23036 label is not too far away, succeed. */
23037 for (insns_skipped = 0;
23038 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
23040 rtx scanbody;
23042 this_insn = next_nonnote_insn (this_insn);
23043 if (!this_insn)
23044 break;
23046 switch (GET_CODE (this_insn))
23048 case CODE_LABEL:
23049 /* Succeed if it is the target label, otherwise fail since
23050 control falls in from somewhere else. */
23051 if (this_insn == label)
23053 arm_ccfsm_state = 1;
23054 succeed = TRUE;
23056 else
23057 fail = TRUE;
23058 break;
23060 case BARRIER:
23061 /* Succeed if the following insn is the target label.
23062 Otherwise fail.
23063 If return insns are used then the last insn in a function
23064 will be a barrier. */
23065 this_insn = next_nonnote_insn (this_insn);
23066 if (this_insn && this_insn == label)
23068 arm_ccfsm_state = 1;
23069 succeed = TRUE;
23071 else
23072 fail = TRUE;
23073 break;
23075 case CALL_INSN:
23076 /* The AAPCS says that conditional calls should not be
23077 used since they make interworking inefficient (the
23078 linker can't transform BL<cond> into BLX). That's
23079 only a problem if the machine has BLX. */
23080 if (arm_arch5)
23082 fail = TRUE;
23083 break;
23086 /* Succeed if the following insn is the target label, or
23087 if the following two insns are a barrier and the
23088 target label. */
23089 this_insn = next_nonnote_insn (this_insn);
23090 if (this_insn && BARRIER_P (this_insn))
23091 this_insn = next_nonnote_insn (this_insn);
23093 if (this_insn && this_insn == label
23094 && insns_skipped < max_insns_skipped)
23096 arm_ccfsm_state = 1;
23097 succeed = TRUE;
23099 else
23100 fail = TRUE;
23101 break;
23103 case JUMP_INSN:
23104 /* If this is an unconditional branch to the same label, succeed.
23105 If it is to another label, do nothing. If it is conditional,
23106 fail. */
23107 /* XXX Probably, the tests for SET and the PC are
23108 unnecessary. */
23110 scanbody = PATTERN (this_insn);
23111 if (GET_CODE (scanbody) == SET
23112 && GET_CODE (SET_DEST (scanbody)) == PC)
23114 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
23115 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
23117 arm_ccfsm_state = 2;
23118 succeed = TRUE;
23120 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
23121 fail = TRUE;
23123 /* Fail if a conditional return is undesirable (e.g. on a
23124 StrongARM), but still allow this if optimizing for size. */
23125 else if (GET_CODE (scanbody) == return_code
23126 && !use_return_insn (TRUE, NULL)
23127 && !optimize_size)
23128 fail = TRUE;
23129 else if (GET_CODE (scanbody) == return_code)
23131 arm_ccfsm_state = 2;
23132 succeed = TRUE;
23134 else if (GET_CODE (scanbody) == PARALLEL)
23136 switch (get_attr_conds (this_insn))
23138 case CONDS_NOCOND:
23139 break;
23140 default:
23141 fail = TRUE;
23142 break;
23145 else
23146 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
23148 break;
23150 case INSN:
23151 /* Instructions using or affecting the condition codes make it
23152 fail. */
23153 scanbody = PATTERN (this_insn);
23154 if (!(GET_CODE (scanbody) == SET
23155 || GET_CODE (scanbody) == PARALLEL)
23156 || get_attr_conds (this_insn) != CONDS_NOCOND)
23157 fail = TRUE;
23158 break;
23160 default:
23161 break;
23164 if (succeed)
23166 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
23167 arm_target_label = CODE_LABEL_NUMBER (label);
23168 else
23170 gcc_assert (seeking_return || arm_ccfsm_state == 2);
23172 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
23174 this_insn = next_nonnote_insn (this_insn);
23175 gcc_assert (!this_insn
23176 || (!BARRIER_P (this_insn)
23177 && !LABEL_P (this_insn)));
23179 if (!this_insn)
23181 /* Oh, dear! we ran off the end.. give up. */
23182 extract_constrain_insn_cached (insn);
23183 arm_ccfsm_state = 0;
23184 arm_target_insn = NULL;
23185 return;
23187 arm_target_insn = this_insn;
23190 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23191 what it was. */
23192 if (!reverse)
23193 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
23195 if (reverse || then_not_else)
23196 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
23199 /* Restore recog_data (getting the attributes of other insns can
23200 destroy this array, but final.c assumes that it remains intact
23201 across this call. */
23202 extract_constrain_insn_cached (insn);
23206 /* Output IT instructions. */
23207 void
23208 thumb2_asm_output_opcode (FILE * stream)
23210 char buff[5];
23211 int n;
23213 if (arm_condexec_mask)
23215 for (n = 0; n < arm_condexec_masklen; n++)
23216 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
23217 buff[n] = 0;
23218 asm_fprintf(stream, "i%s\t%s\n\t", buff,
23219 arm_condition_codes[arm_current_cc]);
23220 arm_condexec_mask = 0;
23224 /* Returns true if REGNO is a valid register
23225 for holding a quantity of type MODE. */
23227 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
23229 if (GET_MODE_CLASS (mode) == MODE_CC)
23230 return (regno == CC_REGNUM
23231 || (TARGET_HARD_FLOAT
23232 && regno == VFPCC_REGNUM));
23234 if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
23235 return false;
23237 if (TARGET_THUMB1)
23238 /* For the Thumb we only allow values bigger than SImode in
23239 registers 0 - 6, so that there is always a second low
23240 register available to hold the upper part of the value.
23241 We probably we ought to ensure that the register is the
23242 start of an even numbered register pair. */
23243 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
23245 if (TARGET_HARD_FLOAT && IS_VFP_REGNUM (regno))
23247 if (mode == SFmode || mode == SImode)
23248 return VFP_REGNO_OK_FOR_SINGLE (regno);
23250 if (mode == DFmode)
23251 return VFP_REGNO_OK_FOR_DOUBLE (regno);
23253 if (mode == HFmode)
23254 return VFP_REGNO_OK_FOR_SINGLE (regno);
23256 /* VFP registers can hold HImode values. */
23257 if (mode == HImode)
23258 return VFP_REGNO_OK_FOR_SINGLE (regno);
23260 if (TARGET_NEON)
23261 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
23262 || (VALID_NEON_QREG_MODE (mode)
23263 && NEON_REGNO_OK_FOR_QUAD (regno))
23264 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
23265 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
23266 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
23267 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
23268 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23270 return FALSE;
23273 if (TARGET_REALLY_IWMMXT)
23275 if (IS_IWMMXT_GR_REGNUM (regno))
23276 return mode == SImode;
23278 if (IS_IWMMXT_REGNUM (regno))
23279 return VALID_IWMMXT_REG_MODE (mode);
23282 /* We allow almost any value to be stored in the general registers.
23283 Restrict doubleword quantities to even register pairs in ARM state
23284 so that we can use ldrd. Do not allow very large Neon structure
23285 opaque modes in general registers; they would use too many. */
23286 if (regno <= LAST_ARM_REGNUM)
23288 if (ARM_NUM_REGS (mode) > 4)
23289 return FALSE;
23291 if (TARGET_THUMB2)
23292 return TRUE;
23294 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23297 if (regno == FRAME_POINTER_REGNUM
23298 || regno == ARG_POINTER_REGNUM)
23299 /* We only allow integers in the fake hard registers. */
23300 return GET_MODE_CLASS (mode) == MODE_INT;
23302 return FALSE;
23305 /* Implement MODES_TIEABLE_P. */
23307 bool
23308 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
23310 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23311 return true;
23313 /* We specifically want to allow elements of "structure" modes to
23314 be tieable to the structure. This more general condition allows
23315 other rarer situations too. */
23316 if (TARGET_NEON
23317 && (VALID_NEON_DREG_MODE (mode1)
23318 || VALID_NEON_QREG_MODE (mode1)
23319 || VALID_NEON_STRUCT_MODE (mode1))
23320 && (VALID_NEON_DREG_MODE (mode2)
23321 || VALID_NEON_QREG_MODE (mode2)
23322 || VALID_NEON_STRUCT_MODE (mode2)))
23323 return true;
23325 return false;
23328 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23329 not used in arm mode. */
23331 enum reg_class
23332 arm_regno_class (int regno)
23334 if (regno == PC_REGNUM)
23335 return NO_REGS;
23337 if (TARGET_THUMB1)
23339 if (regno == STACK_POINTER_REGNUM)
23340 return STACK_REG;
23341 if (regno == CC_REGNUM)
23342 return CC_REG;
23343 if (regno < 8)
23344 return LO_REGS;
23345 return HI_REGS;
23348 if (TARGET_THUMB2 && regno < 8)
23349 return LO_REGS;
23351 if ( regno <= LAST_ARM_REGNUM
23352 || regno == FRAME_POINTER_REGNUM
23353 || regno == ARG_POINTER_REGNUM)
23354 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23356 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23357 return TARGET_THUMB2 ? CC_REG : NO_REGS;
23359 if (IS_VFP_REGNUM (regno))
23361 if (regno <= D7_VFP_REGNUM)
23362 return VFP_D0_D7_REGS;
23363 else if (regno <= LAST_LO_VFP_REGNUM)
23364 return VFP_LO_REGS;
23365 else
23366 return VFP_HI_REGS;
23369 if (IS_IWMMXT_REGNUM (regno))
23370 return IWMMXT_REGS;
23372 if (IS_IWMMXT_GR_REGNUM (regno))
23373 return IWMMXT_GR_REGS;
23375 return NO_REGS;
23378 /* Handle a special case when computing the offset
23379 of an argument from the frame pointer. */
23381 arm_debugger_arg_offset (int value, rtx addr)
23383 rtx_insn *insn;
23385 /* We are only interested if dbxout_parms() failed to compute the offset. */
23386 if (value != 0)
23387 return 0;
23389 /* We can only cope with the case where the address is held in a register. */
23390 if (!REG_P (addr))
23391 return 0;
23393 /* If we are using the frame pointer to point at the argument, then
23394 an offset of 0 is correct. */
23395 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23396 return 0;
23398 /* If we are using the stack pointer to point at the
23399 argument, then an offset of 0 is correct. */
23400 /* ??? Check this is consistent with thumb2 frame layout. */
23401 if ((TARGET_THUMB || !frame_pointer_needed)
23402 && REGNO (addr) == SP_REGNUM)
23403 return 0;
23405 /* Oh dear. The argument is pointed to by a register rather
23406 than being held in a register, or being stored at a known
23407 offset from the frame pointer. Since GDB only understands
23408 those two kinds of argument we must translate the address
23409 held in the register into an offset from the frame pointer.
23410 We do this by searching through the insns for the function
23411 looking to see where this register gets its value. If the
23412 register is initialized from the frame pointer plus an offset
23413 then we are in luck and we can continue, otherwise we give up.
23415 This code is exercised by producing debugging information
23416 for a function with arguments like this:
23418 double func (double a, double b, int c, double d) {return d;}
23420 Without this code the stab for parameter 'd' will be set to
23421 an offset of 0 from the frame pointer, rather than 8. */
23423 /* The if() statement says:
23425 If the insn is a normal instruction
23426 and if the insn is setting the value in a register
23427 and if the register being set is the register holding the address of the argument
23428 and if the address is computing by an addition
23429 that involves adding to a register
23430 which is the frame pointer
23431 a constant integer
23433 then... */
23435 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23437 if ( NONJUMP_INSN_P (insn)
23438 && GET_CODE (PATTERN (insn)) == SET
23439 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23440 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23441 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23442 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23443 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23446 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23448 break;
23452 if (value == 0)
23454 debug_rtx (addr);
23455 warning (0, "unable to compute real location of stacked parameter");
23456 value = 8; /* XXX magic hack */
23459 return value;
23462 /* Implement TARGET_PROMOTED_TYPE. */
23464 static tree
23465 arm_promoted_type (const_tree t)
23467 if (SCALAR_FLOAT_TYPE_P (t)
23468 && TYPE_PRECISION (t) == 16
23469 && TYPE_MAIN_VARIANT (t) == arm_fp16_type_node)
23470 return float_type_node;
23471 return NULL_TREE;
23474 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23475 This simply adds HFmode as a supported mode; even though we don't
23476 implement arithmetic on this type directly, it's supported by
23477 optabs conversions, much the way the double-word arithmetic is
23478 special-cased in the default hook. */
23480 static bool
23481 arm_scalar_mode_supported_p (machine_mode mode)
23483 if (mode == HFmode)
23484 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
23485 else if (ALL_FIXED_POINT_MODE_P (mode))
23486 return true;
23487 else
23488 return default_scalar_mode_supported_p (mode);
23491 /* Set the value of FLT_EVAL_METHOD.
23492 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
23494 0: evaluate all operations and constants, whose semantic type has at
23495 most the range and precision of type float, to the range and
23496 precision of float; evaluate all other operations and constants to
23497 the range and precision of the semantic type;
23499 N, where _FloatN is a supported interchange floating type
23500 evaluate all operations and constants, whose semantic type has at
23501 most the range and precision of _FloatN type, to the range and
23502 precision of the _FloatN type; evaluate all other operations and
23503 constants to the range and precision of the semantic type;
23505 If we have the ARMv8.2-A extensions then we support _Float16 in native
23506 precision, so we should set this to 16. Otherwise, we support the type,
23507 but want to evaluate expressions in float precision, so set this to
23508 0. */
23510 static enum flt_eval_method
23511 arm_excess_precision (enum excess_precision_type type)
23513 switch (type)
23515 case EXCESS_PRECISION_TYPE_FAST:
23516 case EXCESS_PRECISION_TYPE_STANDARD:
23517 /* We can calculate either in 16-bit range and precision or
23518 32-bit range and precision. Make that decision based on whether
23519 we have native support for the ARMv8.2-A 16-bit floating-point
23520 instructions or not. */
23521 return (TARGET_VFP_FP16INST
23522 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
23523 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
23524 case EXCESS_PRECISION_TYPE_IMPLICIT:
23525 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
23526 default:
23527 gcc_unreachable ();
23529 return FLT_EVAL_METHOD_UNPREDICTABLE;
23533 /* Implement TARGET_FLOATN_MODE. Make very sure that we don't provide
23534 _Float16 if we are using anything other than ieee format for 16-bit
23535 floating point. Otherwise, punt to the default implementation. */
23536 static machine_mode
23537 arm_floatn_mode (int n, bool extended)
23539 if (!extended && n == 16)
23540 return arm_fp16_format == ARM_FP16_FORMAT_IEEE ? HFmode : VOIDmode;
23542 return default_floatn_mode (n, extended);
23546 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23547 not to early-clobber SRC registers in the process.
23549 We assume that the operands described by SRC and DEST represent a
23550 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
23551 number of components into which the copy has been decomposed. */
23552 void
23553 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
23555 unsigned int i;
23557 if (!reg_overlap_mentioned_p (operands[0], operands[1])
23558 || REGNO (operands[0]) < REGNO (operands[1]))
23560 for (i = 0; i < count; i++)
23562 operands[2 * i] = dest[i];
23563 operands[2 * i + 1] = src[i];
23566 else
23568 for (i = 0; i < count; i++)
23570 operands[2 * i] = dest[count - i - 1];
23571 operands[2 * i + 1] = src[count - i - 1];
23576 /* Split operands into moves from op[1] + op[2] into op[0]. */
23578 void
23579 neon_split_vcombine (rtx operands[3])
23581 unsigned int dest = REGNO (operands[0]);
23582 unsigned int src1 = REGNO (operands[1]);
23583 unsigned int src2 = REGNO (operands[2]);
23584 machine_mode halfmode = GET_MODE (operands[1]);
23585 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
23586 rtx destlo, desthi;
23588 if (src1 == dest && src2 == dest + halfregs)
23590 /* No-op move. Can't split to nothing; emit something. */
23591 emit_note (NOTE_INSN_DELETED);
23592 return;
23595 /* Preserve register attributes for variable tracking. */
23596 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
23597 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
23598 GET_MODE_SIZE (halfmode));
23600 /* Special case of reversed high/low parts. Use VSWP. */
23601 if (src2 == dest && src1 == dest + halfregs)
23603 rtx x = gen_rtx_SET (destlo, operands[1]);
23604 rtx y = gen_rtx_SET (desthi, operands[2]);
23605 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
23606 return;
23609 if (!reg_overlap_mentioned_p (operands[2], destlo))
23611 /* Try to avoid unnecessary moves if part of the result
23612 is in the right place already. */
23613 if (src1 != dest)
23614 emit_move_insn (destlo, operands[1]);
23615 if (src2 != dest + halfregs)
23616 emit_move_insn (desthi, operands[2]);
23618 else
23620 if (src2 != dest + halfregs)
23621 emit_move_insn (desthi, operands[2]);
23622 if (src1 != dest)
23623 emit_move_insn (destlo, operands[1]);
23627 /* Return the number (counting from 0) of
23628 the least significant set bit in MASK. */
23630 inline static int
23631 number_of_first_bit_set (unsigned mask)
23633 return ctz_hwi (mask);
23636 /* Like emit_multi_reg_push, but allowing for a different set of
23637 registers to be described as saved. MASK is the set of registers
23638 to be saved; REAL_REGS is the set of registers to be described as
23639 saved. If REAL_REGS is 0, only describe the stack adjustment. */
23641 static rtx_insn *
23642 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
23644 unsigned long regno;
23645 rtx par[10], tmp, reg;
23646 rtx_insn *insn;
23647 int i, j;
23649 /* Build the parallel of the registers actually being stored. */
23650 for (i = 0; mask; ++i, mask &= mask - 1)
23652 regno = ctz_hwi (mask);
23653 reg = gen_rtx_REG (SImode, regno);
23655 if (i == 0)
23656 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
23657 else
23658 tmp = gen_rtx_USE (VOIDmode, reg);
23660 par[i] = tmp;
23663 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23664 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
23665 tmp = gen_frame_mem (BLKmode, tmp);
23666 tmp = gen_rtx_SET (tmp, par[0]);
23667 par[0] = tmp;
23669 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
23670 insn = emit_insn (tmp);
23672 /* Always build the stack adjustment note for unwind info. */
23673 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23674 tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
23675 par[0] = tmp;
23677 /* Build the parallel of the registers recorded as saved for unwind. */
23678 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
23680 regno = ctz_hwi (real_regs);
23681 reg = gen_rtx_REG (SImode, regno);
23683 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
23684 tmp = gen_frame_mem (SImode, tmp);
23685 tmp = gen_rtx_SET (tmp, reg);
23686 RTX_FRAME_RELATED_P (tmp) = 1;
23687 par[j + 1] = tmp;
23690 if (j == 0)
23691 tmp = par[0];
23692 else
23694 RTX_FRAME_RELATED_P (par[0]) = 1;
23695 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
23698 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
23700 return insn;
23703 /* Emit code to push or pop registers to or from the stack. F is the
23704 assembly file. MASK is the registers to pop. */
23705 static void
23706 thumb_pop (FILE *f, unsigned long mask)
23708 int regno;
23709 int lo_mask = mask & 0xFF;
23710 int pushed_words = 0;
23712 gcc_assert (mask);
23714 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
23716 /* Special case. Do not generate a POP PC statement here, do it in
23717 thumb_exit() */
23718 thumb_exit (f, -1);
23719 return;
23722 fprintf (f, "\tpop\t{");
23724 /* Look at the low registers first. */
23725 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
23727 if (lo_mask & 1)
23729 asm_fprintf (f, "%r", regno);
23731 if ((lo_mask & ~1) != 0)
23732 fprintf (f, ", ");
23734 pushed_words++;
23738 if (mask & (1 << PC_REGNUM))
23740 /* Catch popping the PC. */
23741 if (TARGET_INTERWORK || TARGET_BACKTRACE || crtl->calls_eh_return
23742 || IS_CMSE_ENTRY (arm_current_func_type ()))
23744 /* The PC is never poped directly, instead
23745 it is popped into r3 and then BX is used. */
23746 fprintf (f, "}\n");
23748 thumb_exit (f, -1);
23750 return;
23752 else
23754 if (mask & 0xFF)
23755 fprintf (f, ", ");
23757 asm_fprintf (f, "%r", PC_REGNUM);
23761 fprintf (f, "}\n");
23764 /* Generate code to return from a thumb function.
23765 If 'reg_containing_return_addr' is -1, then the return address is
23766 actually on the stack, at the stack pointer. */
23767 static void
23768 thumb_exit (FILE *f, int reg_containing_return_addr)
23770 unsigned regs_available_for_popping;
23771 unsigned regs_to_pop;
23772 int pops_needed;
23773 unsigned available;
23774 unsigned required;
23775 machine_mode mode;
23776 int size;
23777 int restore_a4 = FALSE;
23779 /* Compute the registers we need to pop. */
23780 regs_to_pop = 0;
23781 pops_needed = 0;
23783 if (reg_containing_return_addr == -1)
23785 regs_to_pop |= 1 << LR_REGNUM;
23786 ++pops_needed;
23789 if (TARGET_BACKTRACE)
23791 /* Restore the (ARM) frame pointer and stack pointer. */
23792 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
23793 pops_needed += 2;
23796 /* If there is nothing to pop then just emit the BX instruction and
23797 return. */
23798 if (pops_needed == 0)
23800 if (crtl->calls_eh_return)
23801 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23803 if (IS_CMSE_ENTRY (arm_current_func_type ()))
23805 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n",
23806 reg_containing_return_addr);
23807 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
23809 else
23810 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23811 return;
23813 /* Otherwise if we are not supporting interworking and we have not created
23814 a backtrace structure and the function was not entered in ARM mode then
23815 just pop the return address straight into the PC. */
23816 else if (!TARGET_INTERWORK
23817 && !TARGET_BACKTRACE
23818 && !is_called_in_ARM_mode (current_function_decl)
23819 && !crtl->calls_eh_return
23820 && !IS_CMSE_ENTRY (arm_current_func_type ()))
23822 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
23823 return;
23826 /* Find out how many of the (return) argument registers we can corrupt. */
23827 regs_available_for_popping = 0;
23829 /* If returning via __builtin_eh_return, the bottom three registers
23830 all contain information needed for the return. */
23831 if (crtl->calls_eh_return)
23832 size = 12;
23833 else
23835 /* If we can deduce the registers used from the function's
23836 return value. This is more reliable that examining
23837 df_regs_ever_live_p () because that will be set if the register is
23838 ever used in the function, not just if the register is used
23839 to hold a return value. */
23841 if (crtl->return_rtx != 0)
23842 mode = GET_MODE (crtl->return_rtx);
23843 else
23844 mode = DECL_MODE (DECL_RESULT (current_function_decl));
23846 size = GET_MODE_SIZE (mode);
23848 if (size == 0)
23850 /* In a void function we can use any argument register.
23851 In a function that returns a structure on the stack
23852 we can use the second and third argument registers. */
23853 if (mode == VOIDmode)
23854 regs_available_for_popping =
23855 (1 << ARG_REGISTER (1))
23856 | (1 << ARG_REGISTER (2))
23857 | (1 << ARG_REGISTER (3));
23858 else
23859 regs_available_for_popping =
23860 (1 << ARG_REGISTER (2))
23861 | (1 << ARG_REGISTER (3));
23863 else if (size <= 4)
23864 regs_available_for_popping =
23865 (1 << ARG_REGISTER (2))
23866 | (1 << ARG_REGISTER (3));
23867 else if (size <= 8)
23868 regs_available_for_popping =
23869 (1 << ARG_REGISTER (3));
23872 /* Match registers to be popped with registers into which we pop them. */
23873 for (available = regs_available_for_popping,
23874 required = regs_to_pop;
23875 required != 0 && available != 0;
23876 available &= ~(available & - available),
23877 required &= ~(required & - required))
23878 -- pops_needed;
23880 /* If we have any popping registers left over, remove them. */
23881 if (available > 0)
23882 regs_available_for_popping &= ~available;
23884 /* Otherwise if we need another popping register we can use
23885 the fourth argument register. */
23886 else if (pops_needed)
23888 /* If we have not found any free argument registers and
23889 reg a4 contains the return address, we must move it. */
23890 if (regs_available_for_popping == 0
23891 && reg_containing_return_addr == LAST_ARG_REGNUM)
23893 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
23894 reg_containing_return_addr = LR_REGNUM;
23896 else if (size > 12)
23898 /* Register a4 is being used to hold part of the return value,
23899 but we have dire need of a free, low register. */
23900 restore_a4 = TRUE;
23902 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
23905 if (reg_containing_return_addr != LAST_ARG_REGNUM)
23907 /* The fourth argument register is available. */
23908 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
23910 --pops_needed;
23914 /* Pop as many registers as we can. */
23915 thumb_pop (f, regs_available_for_popping);
23917 /* Process the registers we popped. */
23918 if (reg_containing_return_addr == -1)
23920 /* The return address was popped into the lowest numbered register. */
23921 regs_to_pop &= ~(1 << LR_REGNUM);
23923 reg_containing_return_addr =
23924 number_of_first_bit_set (regs_available_for_popping);
23926 /* Remove this register for the mask of available registers, so that
23927 the return address will not be corrupted by further pops. */
23928 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
23931 /* If we popped other registers then handle them here. */
23932 if (regs_available_for_popping)
23934 int frame_pointer;
23936 /* Work out which register currently contains the frame pointer. */
23937 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
23939 /* Move it into the correct place. */
23940 asm_fprintf (f, "\tmov\t%r, %r\n",
23941 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
23943 /* (Temporarily) remove it from the mask of popped registers. */
23944 regs_available_for_popping &= ~(1 << frame_pointer);
23945 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
23947 if (regs_available_for_popping)
23949 int stack_pointer;
23951 /* We popped the stack pointer as well,
23952 find the register that contains it. */
23953 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
23955 /* Move it into the stack register. */
23956 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
23958 /* At this point we have popped all necessary registers, so
23959 do not worry about restoring regs_available_for_popping
23960 to its correct value:
23962 assert (pops_needed == 0)
23963 assert (regs_available_for_popping == (1 << frame_pointer))
23964 assert (regs_to_pop == (1 << STACK_POINTER)) */
23966 else
23968 /* Since we have just move the popped value into the frame
23969 pointer, the popping register is available for reuse, and
23970 we know that we still have the stack pointer left to pop. */
23971 regs_available_for_popping |= (1 << frame_pointer);
23975 /* If we still have registers left on the stack, but we no longer have
23976 any registers into which we can pop them, then we must move the return
23977 address into the link register and make available the register that
23978 contained it. */
23979 if (regs_available_for_popping == 0 && pops_needed > 0)
23981 regs_available_for_popping |= 1 << reg_containing_return_addr;
23983 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
23984 reg_containing_return_addr);
23986 reg_containing_return_addr = LR_REGNUM;
23989 /* If we have registers left on the stack then pop some more.
23990 We know that at most we will want to pop FP and SP. */
23991 if (pops_needed > 0)
23993 int popped_into;
23994 int move_to;
23996 thumb_pop (f, regs_available_for_popping);
23998 /* We have popped either FP or SP.
23999 Move whichever one it is into the correct register. */
24000 popped_into = number_of_first_bit_set (regs_available_for_popping);
24001 move_to = number_of_first_bit_set (regs_to_pop);
24003 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
24005 regs_to_pop &= ~(1 << move_to);
24007 --pops_needed;
24010 /* If we still have not popped everything then we must have only
24011 had one register available to us and we are now popping the SP. */
24012 if (pops_needed > 0)
24014 int popped_into;
24016 thumb_pop (f, regs_available_for_popping);
24018 popped_into = number_of_first_bit_set (regs_available_for_popping);
24020 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
24022 assert (regs_to_pop == (1 << STACK_POINTER))
24023 assert (pops_needed == 1)
24027 /* If necessary restore the a4 register. */
24028 if (restore_a4)
24030 if (reg_containing_return_addr != LR_REGNUM)
24032 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24033 reg_containing_return_addr = LR_REGNUM;
24036 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
24039 if (crtl->calls_eh_return)
24040 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
24042 /* Return to caller. */
24043 if (IS_CMSE_ENTRY (arm_current_func_type ()))
24045 /* This is for the cases where LR is not being used to contain the return
24046 address. It may therefore contain information that we might not want
24047 to leak, hence it must be cleared. The value in R0 will never be a
24048 secret at this point, so it is safe to use it, see the clearing code
24049 in 'cmse_nonsecure_entry_clear_before_return'. */
24050 if (reg_containing_return_addr != LR_REGNUM)
24051 asm_fprintf (f, "\tmov\tlr, r0\n");
24053 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr);
24054 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
24056 else
24057 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
24060 /* Scan INSN just before assembler is output for it.
24061 For Thumb-1, we track the status of the condition codes; this
24062 information is used in the cbranchsi4_insn pattern. */
24063 void
24064 thumb1_final_prescan_insn (rtx_insn *insn)
24066 if (flag_print_asm_name)
24067 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
24068 INSN_ADDRESSES (INSN_UID (insn)));
24069 /* Don't overwrite the previous setter when we get to a cbranch. */
24070 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
24072 enum attr_conds conds;
24074 if (cfun->machine->thumb1_cc_insn)
24076 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
24077 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
24078 CC_STATUS_INIT;
24080 conds = get_attr_conds (insn);
24081 if (conds == CONDS_SET)
24083 rtx set = single_set (insn);
24084 cfun->machine->thumb1_cc_insn = insn;
24085 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
24086 cfun->machine->thumb1_cc_op1 = const0_rtx;
24087 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
24088 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
24090 rtx src1 = XEXP (SET_SRC (set), 1);
24091 if (src1 == const0_rtx)
24092 cfun->machine->thumb1_cc_mode = CCmode;
24094 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
24096 /* Record the src register operand instead of dest because
24097 cprop_hardreg pass propagates src. */
24098 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
24101 else if (conds != CONDS_NOCOND)
24102 cfun->machine->thumb1_cc_insn = NULL_RTX;
24105 /* Check if unexpected far jump is used. */
24106 if (cfun->machine->lr_save_eliminated
24107 && get_attr_far_jump (insn) == FAR_JUMP_YES)
24108 internal_error("Unexpected thumb1 far jump");
24112 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
24114 unsigned HOST_WIDE_INT mask = 0xff;
24115 int i;
24117 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
24118 if (val == 0) /* XXX */
24119 return 0;
24121 for (i = 0; i < 25; i++)
24122 if ((val & (mask << i)) == val)
24123 return 1;
24125 return 0;
24128 /* Returns nonzero if the current function contains,
24129 or might contain a far jump. */
24130 static int
24131 thumb_far_jump_used_p (void)
24133 rtx_insn *insn;
24134 bool far_jump = false;
24135 unsigned int func_size = 0;
24137 /* If we have already decided that far jumps may be used,
24138 do not bother checking again, and always return true even if
24139 it turns out that they are not being used. Once we have made
24140 the decision that far jumps are present (and that hence the link
24141 register will be pushed onto the stack) we cannot go back on it. */
24142 if (cfun->machine->far_jump_used)
24143 return 1;
24145 /* If this function is not being called from the prologue/epilogue
24146 generation code then it must be being called from the
24147 INITIAL_ELIMINATION_OFFSET macro. */
24148 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
24150 /* In this case we know that we are being asked about the elimination
24151 of the arg pointer register. If that register is not being used,
24152 then there are no arguments on the stack, and we do not have to
24153 worry that a far jump might force the prologue to push the link
24154 register, changing the stack offsets. In this case we can just
24155 return false, since the presence of far jumps in the function will
24156 not affect stack offsets.
24158 If the arg pointer is live (or if it was live, but has now been
24159 eliminated and so set to dead) then we do have to test to see if
24160 the function might contain a far jump. This test can lead to some
24161 false negatives, since before reload is completed, then length of
24162 branch instructions is not known, so gcc defaults to returning their
24163 longest length, which in turn sets the far jump attribute to true.
24165 A false negative will not result in bad code being generated, but it
24166 will result in a needless push and pop of the link register. We
24167 hope that this does not occur too often.
24169 If we need doubleword stack alignment this could affect the other
24170 elimination offsets so we can't risk getting it wrong. */
24171 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
24172 cfun->machine->arg_pointer_live = 1;
24173 else if (!cfun->machine->arg_pointer_live)
24174 return 0;
24177 /* We should not change far_jump_used during or after reload, as there is
24178 no chance to change stack frame layout. */
24179 if (reload_in_progress || reload_completed)
24180 return 0;
24182 /* Check to see if the function contains a branch
24183 insn with the far jump attribute set. */
24184 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
24186 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
24188 far_jump = true;
24190 func_size += get_attr_length (insn);
24193 /* Attribute far_jump will always be true for thumb1 before
24194 shorten_branch pass. So checking far_jump attribute before
24195 shorten_branch isn't much useful.
24197 Following heuristic tries to estimate more accurately if a far jump
24198 may finally be used. The heuristic is very conservative as there is
24199 no chance to roll-back the decision of not to use far jump.
24201 Thumb1 long branch offset is -2048 to 2046. The worst case is each
24202 2-byte insn is associated with a 4 byte constant pool. Using
24203 function size 2048/3 as the threshold is conservative enough. */
24204 if (far_jump)
24206 if ((func_size * 3) >= 2048)
24208 /* Record the fact that we have decided that
24209 the function does use far jumps. */
24210 cfun->machine->far_jump_used = 1;
24211 return 1;
24215 return 0;
24218 /* Return nonzero if FUNC must be entered in ARM mode. */
24219 static bool
24220 is_called_in_ARM_mode (tree func)
24222 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
24224 /* Ignore the problem about functions whose address is taken. */
24225 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
24226 return true;
24228 #ifdef ARM_PE
24229 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
24230 #else
24231 return false;
24232 #endif
24235 /* Given the stack offsets and register mask in OFFSETS, decide how
24236 many additional registers to push instead of subtracting a constant
24237 from SP. For epilogues the principle is the same except we use pop.
24238 FOR_PROLOGUE indicates which we're generating. */
24239 static int
24240 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
24242 HOST_WIDE_INT amount;
24243 unsigned long live_regs_mask = offsets->saved_regs_mask;
24244 /* Extract a mask of the ones we can give to the Thumb's push/pop
24245 instruction. */
24246 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
24247 /* Then count how many other high registers will need to be pushed. */
24248 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24249 int n_free, reg_base, size;
24251 if (!for_prologue && frame_pointer_needed)
24252 amount = offsets->locals_base - offsets->saved_regs;
24253 else
24254 amount = offsets->outgoing_args - offsets->saved_regs;
24256 /* If the stack frame size is 512 exactly, we can save one load
24257 instruction, which should make this a win even when optimizing
24258 for speed. */
24259 if (!optimize_size && amount != 512)
24260 return 0;
24262 /* Can't do this if there are high registers to push. */
24263 if (high_regs_pushed != 0)
24264 return 0;
24266 /* Shouldn't do it in the prologue if no registers would normally
24267 be pushed at all. In the epilogue, also allow it if we'll have
24268 a pop insn for the PC. */
24269 if (l_mask == 0
24270 && (for_prologue
24271 || TARGET_BACKTRACE
24272 || (live_regs_mask & 1 << LR_REGNUM) == 0
24273 || TARGET_INTERWORK
24274 || crtl->args.pretend_args_size != 0))
24275 return 0;
24277 /* Don't do this if thumb_expand_prologue wants to emit instructions
24278 between the push and the stack frame allocation. */
24279 if (for_prologue
24280 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
24281 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
24282 return 0;
24284 reg_base = 0;
24285 n_free = 0;
24286 if (!for_prologue)
24288 size = arm_size_return_regs ();
24289 reg_base = ARM_NUM_INTS (size);
24290 live_regs_mask >>= reg_base;
24293 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
24294 && (for_prologue || call_used_regs[reg_base + n_free]))
24296 live_regs_mask >>= 1;
24297 n_free++;
24300 if (n_free == 0)
24301 return 0;
24302 gcc_assert (amount / 4 * 4 == amount);
24304 if (amount >= 512 && (amount - n_free * 4) < 512)
24305 return (amount - 508) / 4;
24306 if (amount <= n_free * 4)
24307 return amount / 4;
24308 return 0;
24311 /* The bits which aren't usefully expanded as rtl. */
24312 const char *
24313 thumb1_unexpanded_epilogue (void)
24315 arm_stack_offsets *offsets;
24316 int regno;
24317 unsigned long live_regs_mask = 0;
24318 int high_regs_pushed = 0;
24319 int extra_pop;
24320 int had_to_push_lr;
24321 int size;
24323 if (cfun->machine->return_used_this_function != 0)
24324 return "";
24326 if (IS_NAKED (arm_current_func_type ()))
24327 return "";
24329 offsets = arm_get_frame_offsets ();
24330 live_regs_mask = offsets->saved_regs_mask;
24331 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24333 /* If we can deduce the registers used from the function's return value.
24334 This is more reliable that examining df_regs_ever_live_p () because that
24335 will be set if the register is ever used in the function, not just if
24336 the register is used to hold a return value. */
24337 size = arm_size_return_regs ();
24339 extra_pop = thumb1_extra_regs_pushed (offsets, false);
24340 if (extra_pop > 0)
24342 unsigned long extra_mask = (1 << extra_pop) - 1;
24343 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
24346 /* The prolog may have pushed some high registers to use as
24347 work registers. e.g. the testsuite file:
24348 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24349 compiles to produce:
24350 push {r4, r5, r6, r7, lr}
24351 mov r7, r9
24352 mov r6, r8
24353 push {r6, r7}
24354 as part of the prolog. We have to undo that pushing here. */
24356 if (high_regs_pushed)
24358 unsigned long mask = live_regs_mask & 0xff;
24359 int next_hi_reg;
24361 /* The available low registers depend on the size of the value we are
24362 returning. */
24363 if (size <= 12)
24364 mask |= 1 << 3;
24365 if (size <= 8)
24366 mask |= 1 << 2;
24368 if (mask == 0)
24369 /* Oh dear! We have no low registers into which we can pop
24370 high registers! */
24371 internal_error
24372 ("no low registers available for popping high registers");
24374 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
24375 if (live_regs_mask & (1 << next_hi_reg))
24376 break;
24378 while (high_regs_pushed)
24380 /* Find lo register(s) into which the high register(s) can
24381 be popped. */
24382 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24384 if (mask & (1 << regno))
24385 high_regs_pushed--;
24386 if (high_regs_pushed == 0)
24387 break;
24390 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
24392 /* Pop the values into the low register(s). */
24393 thumb_pop (asm_out_file, mask);
24395 /* Move the value(s) into the high registers. */
24396 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24398 if (mask & (1 << regno))
24400 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
24401 regno);
24403 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
24404 if (live_regs_mask & (1 << next_hi_reg))
24405 break;
24409 live_regs_mask &= ~0x0f00;
24412 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
24413 live_regs_mask &= 0xff;
24415 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
24417 /* Pop the return address into the PC. */
24418 if (had_to_push_lr)
24419 live_regs_mask |= 1 << PC_REGNUM;
24421 /* Either no argument registers were pushed or a backtrace
24422 structure was created which includes an adjusted stack
24423 pointer, so just pop everything. */
24424 if (live_regs_mask)
24425 thumb_pop (asm_out_file, live_regs_mask);
24427 /* We have either just popped the return address into the
24428 PC or it is was kept in LR for the entire function.
24429 Note that thumb_pop has already called thumb_exit if the
24430 PC was in the list. */
24431 if (!had_to_push_lr)
24432 thumb_exit (asm_out_file, LR_REGNUM);
24434 else
24436 /* Pop everything but the return address. */
24437 if (live_regs_mask)
24438 thumb_pop (asm_out_file, live_regs_mask);
24440 if (had_to_push_lr)
24442 if (size > 12)
24444 /* We have no free low regs, so save one. */
24445 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
24446 LAST_ARG_REGNUM);
24449 /* Get the return address into a temporary register. */
24450 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
24452 if (size > 12)
24454 /* Move the return address to lr. */
24455 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
24456 LAST_ARG_REGNUM);
24457 /* Restore the low register. */
24458 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
24459 IP_REGNUM);
24460 regno = LR_REGNUM;
24462 else
24463 regno = LAST_ARG_REGNUM;
24465 else
24466 regno = LR_REGNUM;
24468 /* Remove the argument registers that were pushed onto the stack. */
24469 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
24470 SP_REGNUM, SP_REGNUM,
24471 crtl->args.pretend_args_size);
24473 thumb_exit (asm_out_file, regno);
24476 return "";
24479 /* Functions to save and restore machine-specific function data. */
24480 static struct machine_function *
24481 arm_init_machine_status (void)
24483 struct machine_function *machine;
24484 machine = ggc_cleared_alloc<machine_function> ();
24486 #if ARM_FT_UNKNOWN != 0
24487 machine->func_type = ARM_FT_UNKNOWN;
24488 #endif
24489 return machine;
24492 /* Return an RTX indicating where the return address to the
24493 calling function can be found. */
24495 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
24497 if (count != 0)
24498 return NULL_RTX;
24500 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
24503 /* Do anything needed before RTL is emitted for each function. */
24504 void
24505 arm_init_expanders (void)
24507 /* Arrange to initialize and mark the machine per-function status. */
24508 init_machine_status = arm_init_machine_status;
24510 /* This is to stop the combine pass optimizing away the alignment
24511 adjustment of va_arg. */
24512 /* ??? It is claimed that this should not be necessary. */
24513 if (cfun)
24514 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
24517 /* Check that FUNC is called with a different mode. */
24519 bool
24520 arm_change_mode_p (tree func)
24522 if (TREE_CODE (func) != FUNCTION_DECL)
24523 return false;
24525 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
24527 if (!callee_tree)
24528 callee_tree = target_option_default_node;
24530 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
24531 int flags = callee_opts->x_target_flags;
24533 return (TARGET_THUMB_P (flags) != TARGET_THUMB);
24536 /* Like arm_compute_initial_elimination offset. Simpler because there
24537 isn't an ABI specified frame pointer for Thumb. Instead, we set it
24538 to point at the base of the local variables after static stack
24539 space for a function has been allocated. */
24541 HOST_WIDE_INT
24542 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
24544 arm_stack_offsets *offsets;
24546 offsets = arm_get_frame_offsets ();
24548 switch (from)
24550 case ARG_POINTER_REGNUM:
24551 switch (to)
24553 case STACK_POINTER_REGNUM:
24554 return offsets->outgoing_args - offsets->saved_args;
24556 case FRAME_POINTER_REGNUM:
24557 return offsets->soft_frame - offsets->saved_args;
24559 case ARM_HARD_FRAME_POINTER_REGNUM:
24560 return offsets->saved_regs - offsets->saved_args;
24562 case THUMB_HARD_FRAME_POINTER_REGNUM:
24563 return offsets->locals_base - offsets->saved_args;
24565 default:
24566 gcc_unreachable ();
24568 break;
24570 case FRAME_POINTER_REGNUM:
24571 switch (to)
24573 case STACK_POINTER_REGNUM:
24574 return offsets->outgoing_args - offsets->soft_frame;
24576 case ARM_HARD_FRAME_POINTER_REGNUM:
24577 return offsets->saved_regs - offsets->soft_frame;
24579 case THUMB_HARD_FRAME_POINTER_REGNUM:
24580 return offsets->locals_base - offsets->soft_frame;
24582 default:
24583 gcc_unreachable ();
24585 break;
24587 default:
24588 gcc_unreachable ();
24592 /* Generate the function's prologue. */
24594 void
24595 thumb1_expand_prologue (void)
24597 rtx_insn *insn;
24599 HOST_WIDE_INT amount;
24600 HOST_WIDE_INT size;
24601 arm_stack_offsets *offsets;
24602 unsigned long func_type;
24603 int regno;
24604 unsigned long live_regs_mask;
24605 unsigned long l_mask;
24606 unsigned high_regs_pushed = 0;
24607 bool lr_needs_saving;
24609 func_type = arm_current_func_type ();
24611 /* Naked functions don't have prologues. */
24612 if (IS_NAKED (func_type))
24614 if (flag_stack_usage_info)
24615 current_function_static_stack_size = 0;
24616 return;
24619 if (IS_INTERRUPT (func_type))
24621 error ("interrupt Service Routines cannot be coded in Thumb mode");
24622 return;
24625 if (is_called_in_ARM_mode (current_function_decl))
24626 emit_insn (gen_prologue_thumb1_interwork ());
24628 offsets = arm_get_frame_offsets ();
24629 live_regs_mask = offsets->saved_regs_mask;
24630 lr_needs_saving = live_regs_mask & (1 << LR_REGNUM);
24632 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
24633 l_mask = live_regs_mask & 0x40ff;
24634 /* Then count how many other high registers will need to be pushed. */
24635 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24637 if (crtl->args.pretend_args_size)
24639 rtx x = GEN_INT (-crtl->args.pretend_args_size);
24641 if (cfun->machine->uses_anonymous_args)
24643 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
24644 unsigned long mask;
24646 mask = 1ul << (LAST_ARG_REGNUM + 1);
24647 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
24649 insn = thumb1_emit_multi_reg_push (mask, 0);
24651 else
24653 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24654 stack_pointer_rtx, x));
24656 RTX_FRAME_RELATED_P (insn) = 1;
24659 if (TARGET_BACKTRACE)
24661 HOST_WIDE_INT offset = 0;
24662 unsigned work_register;
24663 rtx work_reg, x, arm_hfp_rtx;
24665 /* We have been asked to create a stack backtrace structure.
24666 The code looks like this:
24668 0 .align 2
24669 0 func:
24670 0 sub SP, #16 Reserve space for 4 registers.
24671 2 push {R7} Push low registers.
24672 4 add R7, SP, #20 Get the stack pointer before the push.
24673 6 str R7, [SP, #8] Store the stack pointer
24674 (before reserving the space).
24675 8 mov R7, PC Get hold of the start of this code + 12.
24676 10 str R7, [SP, #16] Store it.
24677 12 mov R7, FP Get hold of the current frame pointer.
24678 14 str R7, [SP, #4] Store it.
24679 16 mov R7, LR Get hold of the current return address.
24680 18 str R7, [SP, #12] Store it.
24681 20 add R7, SP, #16 Point at the start of the
24682 backtrace structure.
24683 22 mov FP, R7 Put this value into the frame pointer. */
24685 work_register = thumb_find_work_register (live_regs_mask);
24686 work_reg = gen_rtx_REG (SImode, work_register);
24687 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
24689 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24690 stack_pointer_rtx, GEN_INT (-16)));
24691 RTX_FRAME_RELATED_P (insn) = 1;
24693 if (l_mask)
24695 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
24696 RTX_FRAME_RELATED_P (insn) = 1;
24697 lr_needs_saving = false;
24699 offset = bit_count (l_mask) * UNITS_PER_WORD;
24702 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
24703 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24705 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
24706 x = gen_frame_mem (SImode, x);
24707 emit_move_insn (x, work_reg);
24709 /* Make sure that the instruction fetching the PC is in the right place
24710 to calculate "start of backtrace creation code + 12". */
24711 /* ??? The stores using the common WORK_REG ought to be enough to
24712 prevent the scheduler from doing anything weird. Failing that
24713 we could always move all of the following into an UNSPEC_VOLATILE. */
24714 if (l_mask)
24716 x = gen_rtx_REG (SImode, PC_REGNUM);
24717 emit_move_insn (work_reg, x);
24719 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24720 x = gen_frame_mem (SImode, x);
24721 emit_move_insn (x, work_reg);
24723 emit_move_insn (work_reg, arm_hfp_rtx);
24725 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24726 x = gen_frame_mem (SImode, x);
24727 emit_move_insn (x, work_reg);
24729 else
24731 emit_move_insn (work_reg, arm_hfp_rtx);
24733 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24734 x = gen_frame_mem (SImode, x);
24735 emit_move_insn (x, work_reg);
24737 x = gen_rtx_REG (SImode, PC_REGNUM);
24738 emit_move_insn (work_reg, x);
24740 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24741 x = gen_frame_mem (SImode, x);
24742 emit_move_insn (x, work_reg);
24745 x = gen_rtx_REG (SImode, LR_REGNUM);
24746 emit_move_insn (work_reg, x);
24748 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
24749 x = gen_frame_mem (SImode, x);
24750 emit_move_insn (x, work_reg);
24752 x = GEN_INT (offset + 12);
24753 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24755 emit_move_insn (arm_hfp_rtx, work_reg);
24757 /* Optimization: If we are not pushing any low registers but we are going
24758 to push some high registers then delay our first push. This will just
24759 be a push of LR and we can combine it with the push of the first high
24760 register. */
24761 else if ((l_mask & 0xff) != 0
24762 || (high_regs_pushed == 0 && lr_needs_saving))
24764 unsigned long mask = l_mask;
24765 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
24766 insn = thumb1_emit_multi_reg_push (mask, mask);
24767 RTX_FRAME_RELATED_P (insn) = 1;
24768 lr_needs_saving = false;
24771 if (high_regs_pushed)
24773 unsigned pushable_regs;
24774 unsigned next_hi_reg;
24775 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
24776 : crtl->args.info.nregs;
24777 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
24779 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
24780 if (live_regs_mask & (1 << next_hi_reg))
24781 break;
24783 /* Here we need to mask out registers used for passing arguments
24784 even if they can be pushed. This is to avoid using them to stash the high
24785 registers. Such kind of stash may clobber the use of arguments. */
24786 pushable_regs = l_mask & (~arg_regs_mask);
24787 if (lr_needs_saving)
24788 pushable_regs &= ~(1 << LR_REGNUM);
24790 if (pushable_regs == 0)
24791 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
24793 while (high_regs_pushed > 0)
24795 unsigned long real_regs_mask = 0;
24796 unsigned long push_mask = 0;
24798 for (regno = LR_REGNUM; regno >= 0; regno --)
24800 if (pushable_regs & (1 << regno))
24802 emit_move_insn (gen_rtx_REG (SImode, regno),
24803 gen_rtx_REG (SImode, next_hi_reg));
24805 high_regs_pushed --;
24806 real_regs_mask |= (1 << next_hi_reg);
24807 push_mask |= (1 << regno);
24809 if (high_regs_pushed)
24811 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
24812 next_hi_reg --)
24813 if (live_regs_mask & (1 << next_hi_reg))
24814 break;
24816 else
24817 break;
24821 /* If we had to find a work register and we have not yet
24822 saved the LR then add it to the list of regs to push. */
24823 if (lr_needs_saving)
24825 push_mask |= 1 << LR_REGNUM;
24826 real_regs_mask |= 1 << LR_REGNUM;
24827 lr_needs_saving = false;
24830 insn = thumb1_emit_multi_reg_push (push_mask, real_regs_mask);
24831 RTX_FRAME_RELATED_P (insn) = 1;
24835 /* Load the pic register before setting the frame pointer,
24836 so we can use r7 as a temporary work register. */
24837 if (flag_pic && arm_pic_register != INVALID_REGNUM)
24838 arm_load_pic_register (live_regs_mask);
24840 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
24841 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
24842 stack_pointer_rtx);
24844 size = offsets->outgoing_args - offsets->saved_args;
24845 if (flag_stack_usage_info)
24846 current_function_static_stack_size = size;
24848 /* If we have a frame, then do stack checking. FIXME: not implemented. */
24849 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK && size)
24850 sorry ("-fstack-check=specific for Thumb-1");
24852 amount = offsets->outgoing_args - offsets->saved_regs;
24853 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
24854 if (amount)
24856 if (amount < 512)
24858 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
24859 GEN_INT (- amount)));
24860 RTX_FRAME_RELATED_P (insn) = 1;
24862 else
24864 rtx reg, dwarf;
24866 /* The stack decrement is too big for an immediate value in a single
24867 insn. In theory we could issue multiple subtracts, but after
24868 three of them it becomes more space efficient to place the full
24869 value in the constant pool and load into a register. (Also the
24870 ARM debugger really likes to see only one stack decrement per
24871 function). So instead we look for a scratch register into which
24872 we can load the decrement, and then we subtract this from the
24873 stack pointer. Unfortunately on the thumb the only available
24874 scratch registers are the argument registers, and we cannot use
24875 these as they may hold arguments to the function. Instead we
24876 attempt to locate a call preserved register which is used by this
24877 function. If we can find one, then we know that it will have
24878 been pushed at the start of the prologue and so we can corrupt
24879 it now. */
24880 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
24881 if (live_regs_mask & (1 << regno))
24882 break;
24884 gcc_assert(regno <= LAST_LO_REGNUM);
24886 reg = gen_rtx_REG (SImode, regno);
24888 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
24890 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24891 stack_pointer_rtx, reg));
24893 dwarf = gen_rtx_SET (stack_pointer_rtx,
24894 plus_constant (Pmode, stack_pointer_rtx,
24895 -amount));
24896 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
24897 RTX_FRAME_RELATED_P (insn) = 1;
24901 if (frame_pointer_needed)
24902 thumb_set_frame_pointer (offsets);
24904 /* If we are profiling, make sure no instructions are scheduled before
24905 the call to mcount. Similarly if the user has requested no
24906 scheduling in the prolog. Similarly if we want non-call exceptions
24907 using the EABI unwinder, to prevent faulting instructions from being
24908 swapped with a stack adjustment. */
24909 if (crtl->profile || !TARGET_SCHED_PROLOG
24910 || (arm_except_unwind_info (&global_options) == UI_TARGET
24911 && cfun->can_throw_non_call_exceptions))
24912 emit_insn (gen_blockage ());
24914 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
24915 if (live_regs_mask & 0xff)
24916 cfun->machine->lr_save_eliminated = 0;
24919 /* Clear caller saved registers not used to pass return values and leaked
24920 condition flags before exiting a cmse_nonsecure_entry function. */
24922 void
24923 cmse_nonsecure_entry_clear_before_return (void)
24925 uint64_t to_clear_mask[2];
24926 uint32_t padding_bits_to_clear = 0;
24927 uint32_t * padding_bits_to_clear_ptr = &padding_bits_to_clear;
24928 int regno, maxregno = IP_REGNUM;
24929 tree result_type;
24930 rtx result_rtl;
24932 to_clear_mask[0] = (1ULL << (NUM_ARG_REGS)) - 1;
24933 to_clear_mask[0] |= (1ULL << IP_REGNUM);
24935 /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
24936 registers. We also check that TARGET_HARD_FLOAT and !TARGET_THUMB1 hold
24937 to make sure the instructions used to clear them are present. */
24938 if (TARGET_HARD_FLOAT && !TARGET_THUMB1)
24940 uint64_t float_mask = (1ULL << (D7_VFP_REGNUM + 1)) - 1;
24941 maxregno = LAST_VFP_REGNUM;
24943 float_mask &= ~((1ULL << FIRST_VFP_REGNUM) - 1);
24944 to_clear_mask[0] |= float_mask;
24946 float_mask = (1ULL << (maxregno - 63)) - 1;
24947 to_clear_mask[1] = float_mask;
24949 /* Make sure we don't clear the two scratch registers used to clear the
24950 relevant FPSCR bits in output_return_instruction. */
24951 emit_use (gen_rtx_REG (SImode, IP_REGNUM));
24952 to_clear_mask[0] &= ~(1ULL << IP_REGNUM);
24953 emit_use (gen_rtx_REG (SImode, 4));
24954 to_clear_mask[0] &= ~(1ULL << 4);
24957 /* If the user has defined registers to be caller saved, these are no longer
24958 restored by the function before returning and must thus be cleared for
24959 security purposes. */
24960 for (regno = NUM_ARG_REGS; regno < LAST_VFP_REGNUM; regno++)
24962 /* We do not touch registers that can be used to pass arguments as per
24963 the AAPCS, since these should never be made callee-saved by user
24964 options. */
24965 if (IN_RANGE (regno, FIRST_VFP_REGNUM, D7_VFP_REGNUM))
24966 continue;
24967 if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM))
24968 continue;
24969 if (call_used_regs[regno])
24970 to_clear_mask[regno / 64] |= (1ULL << (regno % 64));
24973 /* Make sure we do not clear the registers used to return the result in. */
24974 result_type = TREE_TYPE (DECL_RESULT (current_function_decl));
24975 if (!VOID_TYPE_P (result_type))
24977 result_rtl = arm_function_value (result_type, current_function_decl, 0);
24979 /* No need to check that we return in registers, because we don't
24980 support returning on stack yet. */
24981 to_clear_mask[0]
24982 &= ~compute_not_to_clear_mask (result_type, result_rtl, 0,
24983 padding_bits_to_clear_ptr);
24986 if (padding_bits_to_clear != 0)
24988 rtx reg_rtx;
24989 /* Padding bits to clear is not 0 so we know we are dealing with
24990 returning a composite type, which only uses r0. Let's make sure that
24991 r1-r3 is cleared too, we will use r1 as a scratch register. */
24992 gcc_assert ((to_clear_mask[0] & 0xe) == 0xe);
24994 reg_rtx = gen_rtx_REG (SImode, R1_REGNUM);
24996 /* Fill the lower half of the negated padding_bits_to_clear. */
24997 emit_move_insn (reg_rtx,
24998 GEN_INT ((((~padding_bits_to_clear) << 16u) >> 16u)));
25000 /* Also fill the top half of the negated padding_bits_to_clear. */
25001 if (((~padding_bits_to_clear) >> 16) > 0)
25002 emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode, reg_rtx,
25003 GEN_INT (16),
25004 GEN_INT (16)),
25005 GEN_INT ((~padding_bits_to_clear) >> 16)));
25007 emit_insn (gen_andsi3 (gen_rtx_REG (SImode, R0_REGNUM),
25008 gen_rtx_REG (SImode, R0_REGNUM),
25009 reg_rtx));
25012 for (regno = R0_REGNUM; regno <= maxregno; regno++)
25014 if (!(to_clear_mask[regno / 64] & (1ULL << (regno % 64))))
25015 continue;
25017 if (IS_VFP_REGNUM (regno))
25019 /* If regno is an even vfp register and its successor is also to
25020 be cleared, use vmov. */
25021 if (TARGET_VFP_DOUBLE
25022 && VFP_REGNO_OK_FOR_DOUBLE (regno)
25023 && to_clear_mask[regno / 64] & (1ULL << ((regno % 64) + 1)))
25025 emit_move_insn (gen_rtx_REG (DFmode, regno),
25026 CONST1_RTX (DFmode));
25027 emit_use (gen_rtx_REG (DFmode, regno));
25028 regno++;
25030 else
25032 emit_move_insn (gen_rtx_REG (SFmode, regno),
25033 CONST1_RTX (SFmode));
25034 emit_use (gen_rtx_REG (SFmode, regno));
25037 else
25039 if (TARGET_THUMB1)
25041 if (regno == R0_REGNUM)
25042 emit_move_insn (gen_rtx_REG (SImode, regno),
25043 const0_rtx);
25044 else
25045 /* R0 has either been cleared before, see code above, or it
25046 holds a return value, either way it is not secret
25047 information. */
25048 emit_move_insn (gen_rtx_REG (SImode, regno),
25049 gen_rtx_REG (SImode, R0_REGNUM));
25050 emit_use (gen_rtx_REG (SImode, regno));
25052 else
25054 emit_move_insn (gen_rtx_REG (SImode, regno),
25055 gen_rtx_REG (SImode, LR_REGNUM));
25056 emit_use (gen_rtx_REG (SImode, regno));
25062 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
25063 POP instruction can be generated. LR should be replaced by PC. All
25064 the checks required are already done by USE_RETURN_INSN (). Hence,
25065 all we really need to check here is if single register is to be
25066 returned, or multiple register return. */
25067 void
25068 thumb2_expand_return (bool simple_return)
25070 int i, num_regs;
25071 unsigned long saved_regs_mask;
25072 arm_stack_offsets *offsets;
25074 offsets = arm_get_frame_offsets ();
25075 saved_regs_mask = offsets->saved_regs_mask;
25077 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
25078 if (saved_regs_mask & (1 << i))
25079 num_regs++;
25081 if (!simple_return && saved_regs_mask)
25083 /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
25084 functions or adapt code to handle according to ACLE. This path should
25085 not be reachable for cmse_nonsecure_entry functions though we prefer
25086 to assert it for now to ensure that future code changes do not silently
25087 change this behavior. */
25088 gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
25089 if (num_regs == 1)
25091 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25092 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
25093 rtx addr = gen_rtx_MEM (SImode,
25094 gen_rtx_POST_INC (SImode,
25095 stack_pointer_rtx));
25096 set_mem_alias_set (addr, get_frame_alias_set ());
25097 XVECEXP (par, 0, 0) = ret_rtx;
25098 XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
25099 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
25100 emit_jump_insn (par);
25102 else
25104 saved_regs_mask &= ~ (1 << LR_REGNUM);
25105 saved_regs_mask |= (1 << PC_REGNUM);
25106 arm_emit_multi_reg_pop (saved_regs_mask);
25109 else
25111 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25112 cmse_nonsecure_entry_clear_before_return ();
25113 emit_jump_insn (simple_return_rtx);
25117 void
25118 thumb1_expand_epilogue (void)
25120 HOST_WIDE_INT amount;
25121 arm_stack_offsets *offsets;
25122 int regno;
25124 /* Naked functions don't have prologues. */
25125 if (IS_NAKED (arm_current_func_type ()))
25126 return;
25128 offsets = arm_get_frame_offsets ();
25129 amount = offsets->outgoing_args - offsets->saved_regs;
25131 if (frame_pointer_needed)
25133 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
25134 amount = offsets->locals_base - offsets->saved_regs;
25136 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
25138 gcc_assert (amount >= 0);
25139 if (amount)
25141 emit_insn (gen_blockage ());
25143 if (amount < 512)
25144 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25145 GEN_INT (amount)));
25146 else
25148 /* r3 is always free in the epilogue. */
25149 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
25151 emit_insn (gen_movsi (reg, GEN_INT (amount)));
25152 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
25156 /* Emit a USE (stack_pointer_rtx), so that
25157 the stack adjustment will not be deleted. */
25158 emit_insn (gen_force_register_use (stack_pointer_rtx));
25160 if (crtl->profile || !TARGET_SCHED_PROLOG)
25161 emit_insn (gen_blockage ());
25163 /* Emit a clobber for each insn that will be restored in the epilogue,
25164 so that flow2 will get register lifetimes correct. */
25165 for (regno = 0; regno < 13; regno++)
25166 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
25167 emit_clobber (gen_rtx_REG (SImode, regno));
25169 if (! df_regs_ever_live_p (LR_REGNUM))
25170 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
25172 /* Clear all caller-saved regs that are not used to return. */
25173 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25174 cmse_nonsecure_entry_clear_before_return ();
25177 /* Epilogue code for APCS frame. */
25178 static void
25179 arm_expand_epilogue_apcs_frame (bool really_return)
25181 unsigned long func_type;
25182 unsigned long saved_regs_mask;
25183 int num_regs = 0;
25184 int i;
25185 int floats_from_frame = 0;
25186 arm_stack_offsets *offsets;
25188 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
25189 func_type = arm_current_func_type ();
25191 /* Get frame offsets for ARM. */
25192 offsets = arm_get_frame_offsets ();
25193 saved_regs_mask = offsets->saved_regs_mask;
25195 /* Find the offset of the floating-point save area in the frame. */
25196 floats_from_frame
25197 = (offsets->saved_args
25198 + arm_compute_static_chain_stack_bytes ()
25199 - offsets->frame);
25201 /* Compute how many core registers saved and how far away the floats are. */
25202 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25203 if (saved_regs_mask & (1 << i))
25205 num_regs++;
25206 floats_from_frame += 4;
25209 if (TARGET_HARD_FLOAT)
25211 int start_reg;
25212 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
25214 /* The offset is from IP_REGNUM. */
25215 int saved_size = arm_get_vfp_saved_size ();
25216 if (saved_size > 0)
25218 rtx_insn *insn;
25219 floats_from_frame += saved_size;
25220 insn = emit_insn (gen_addsi3 (ip_rtx,
25221 hard_frame_pointer_rtx,
25222 GEN_INT (-floats_from_frame)));
25223 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
25224 ip_rtx, hard_frame_pointer_rtx);
25227 /* Generate VFP register multi-pop. */
25228 start_reg = FIRST_VFP_REGNUM;
25230 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
25231 /* Look for a case where a reg does not need restoring. */
25232 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25233 && (!df_regs_ever_live_p (i + 1)
25234 || call_used_regs[i + 1]))
25236 if (start_reg != i)
25237 arm_emit_vfp_multi_reg_pop (start_reg,
25238 (i - start_reg) / 2,
25239 gen_rtx_REG (SImode,
25240 IP_REGNUM));
25241 start_reg = i + 2;
25244 /* Restore the remaining regs that we have discovered (or possibly
25245 even all of them, if the conditional in the for loop never
25246 fired). */
25247 if (start_reg != i)
25248 arm_emit_vfp_multi_reg_pop (start_reg,
25249 (i - start_reg) / 2,
25250 gen_rtx_REG (SImode, IP_REGNUM));
25253 if (TARGET_IWMMXT)
25255 /* The frame pointer is guaranteed to be non-double-word aligned, as
25256 it is set to double-word-aligned old_stack_pointer - 4. */
25257 rtx_insn *insn;
25258 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
25260 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
25261 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25263 rtx addr = gen_frame_mem (V2SImode,
25264 plus_constant (Pmode, hard_frame_pointer_rtx,
25265 - lrm_count * 4));
25266 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25267 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25268 gen_rtx_REG (V2SImode, i),
25269 NULL_RTX);
25270 lrm_count += 2;
25274 /* saved_regs_mask should contain IP which contains old stack pointer
25275 at the time of activation creation. Since SP and IP are adjacent registers,
25276 we can restore the value directly into SP. */
25277 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
25278 saved_regs_mask &= ~(1 << IP_REGNUM);
25279 saved_regs_mask |= (1 << SP_REGNUM);
25281 /* There are two registers left in saved_regs_mask - LR and PC. We
25282 only need to restore LR (the return address), but to
25283 save time we can load it directly into PC, unless we need a
25284 special function exit sequence, or we are not really returning. */
25285 if (really_return
25286 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
25287 && !crtl->calls_eh_return)
25288 /* Delete LR from the register mask, so that LR on
25289 the stack is loaded into the PC in the register mask. */
25290 saved_regs_mask &= ~(1 << LR_REGNUM);
25291 else
25292 saved_regs_mask &= ~(1 << PC_REGNUM);
25294 num_regs = bit_count (saved_regs_mask);
25295 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
25297 rtx_insn *insn;
25298 emit_insn (gen_blockage ());
25299 /* Unwind the stack to just below the saved registers. */
25300 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25301 hard_frame_pointer_rtx,
25302 GEN_INT (- 4 * num_regs)));
25304 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
25305 stack_pointer_rtx, hard_frame_pointer_rtx);
25308 arm_emit_multi_reg_pop (saved_regs_mask);
25310 if (IS_INTERRUPT (func_type))
25312 /* Interrupt handlers will have pushed the
25313 IP onto the stack, so restore it now. */
25314 rtx_insn *insn;
25315 rtx addr = gen_rtx_MEM (SImode,
25316 gen_rtx_POST_INC (SImode,
25317 stack_pointer_rtx));
25318 set_mem_alias_set (addr, get_frame_alias_set ());
25319 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
25320 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25321 gen_rtx_REG (SImode, IP_REGNUM),
25322 NULL_RTX);
25325 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
25326 return;
25328 if (crtl->calls_eh_return)
25329 emit_insn (gen_addsi3 (stack_pointer_rtx,
25330 stack_pointer_rtx,
25331 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25333 if (IS_STACKALIGN (func_type))
25334 /* Restore the original stack pointer. Before prologue, the stack was
25335 realigned and the original stack pointer saved in r0. For details,
25336 see comment in arm_expand_prologue. */
25337 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25339 emit_jump_insn (simple_return_rtx);
25342 /* Generate RTL to represent ARM epilogue. Really_return is true if the
25343 function is not a sibcall. */
25344 void
25345 arm_expand_epilogue (bool really_return)
25347 unsigned long func_type;
25348 unsigned long saved_regs_mask;
25349 int num_regs = 0;
25350 int i;
25351 int amount;
25352 arm_stack_offsets *offsets;
25354 func_type = arm_current_func_type ();
25356 /* Naked functions don't have epilogue. Hence, generate return pattern, and
25357 let output_return_instruction take care of instruction emission if any. */
25358 if (IS_NAKED (func_type)
25359 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
25361 if (really_return)
25362 emit_jump_insn (simple_return_rtx);
25363 return;
25366 /* If we are throwing an exception, then we really must be doing a
25367 return, so we can't tail-call. */
25368 gcc_assert (!crtl->calls_eh_return || really_return);
25370 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
25372 arm_expand_epilogue_apcs_frame (really_return);
25373 return;
25376 /* Get frame offsets for ARM. */
25377 offsets = arm_get_frame_offsets ();
25378 saved_regs_mask = offsets->saved_regs_mask;
25379 num_regs = bit_count (saved_regs_mask);
25381 if (frame_pointer_needed)
25383 rtx_insn *insn;
25384 /* Restore stack pointer if necessary. */
25385 if (TARGET_ARM)
25387 /* In ARM mode, frame pointer points to first saved register.
25388 Restore stack pointer to last saved register. */
25389 amount = offsets->frame - offsets->saved_regs;
25391 /* Force out any pending memory operations that reference stacked data
25392 before stack de-allocation occurs. */
25393 emit_insn (gen_blockage ());
25394 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25395 hard_frame_pointer_rtx,
25396 GEN_INT (amount)));
25397 arm_add_cfa_adjust_cfa_note (insn, amount,
25398 stack_pointer_rtx,
25399 hard_frame_pointer_rtx);
25401 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25402 deleted. */
25403 emit_insn (gen_force_register_use (stack_pointer_rtx));
25405 else
25407 /* In Thumb-2 mode, the frame pointer points to the last saved
25408 register. */
25409 amount = offsets->locals_base - offsets->saved_regs;
25410 if (amount)
25412 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
25413 hard_frame_pointer_rtx,
25414 GEN_INT (amount)));
25415 arm_add_cfa_adjust_cfa_note (insn, amount,
25416 hard_frame_pointer_rtx,
25417 hard_frame_pointer_rtx);
25420 /* Force out any pending memory operations that reference stacked data
25421 before stack de-allocation occurs. */
25422 emit_insn (gen_blockage ());
25423 insn = emit_insn (gen_movsi (stack_pointer_rtx,
25424 hard_frame_pointer_rtx));
25425 arm_add_cfa_adjust_cfa_note (insn, 0,
25426 stack_pointer_rtx,
25427 hard_frame_pointer_rtx);
25428 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25429 deleted. */
25430 emit_insn (gen_force_register_use (stack_pointer_rtx));
25433 else
25435 /* Pop off outgoing args and local frame to adjust stack pointer to
25436 last saved register. */
25437 amount = offsets->outgoing_args - offsets->saved_regs;
25438 if (amount)
25440 rtx_insn *tmp;
25441 /* Force out any pending memory operations that reference stacked data
25442 before stack de-allocation occurs. */
25443 emit_insn (gen_blockage ());
25444 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
25445 stack_pointer_rtx,
25446 GEN_INT (amount)));
25447 arm_add_cfa_adjust_cfa_note (tmp, amount,
25448 stack_pointer_rtx, stack_pointer_rtx);
25449 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25450 not deleted. */
25451 emit_insn (gen_force_register_use (stack_pointer_rtx));
25455 if (TARGET_HARD_FLOAT)
25457 /* Generate VFP register multi-pop. */
25458 int end_reg = LAST_VFP_REGNUM + 1;
25460 /* Scan the registers in reverse order. We need to match
25461 any groupings made in the prologue and generate matching
25462 vldm operations. The need to match groups is because,
25463 unlike pop, vldm can only do consecutive regs. */
25464 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
25465 /* Look for a case where a reg does not need restoring. */
25466 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25467 && (!df_regs_ever_live_p (i + 1)
25468 || call_used_regs[i + 1]))
25470 /* Restore the regs discovered so far (from reg+2 to
25471 end_reg). */
25472 if (end_reg > i + 2)
25473 arm_emit_vfp_multi_reg_pop (i + 2,
25474 (end_reg - (i + 2)) / 2,
25475 stack_pointer_rtx);
25476 end_reg = i;
25479 /* Restore the remaining regs that we have discovered (or possibly
25480 even all of them, if the conditional in the for loop never
25481 fired). */
25482 if (end_reg > i + 2)
25483 arm_emit_vfp_multi_reg_pop (i + 2,
25484 (end_reg - (i + 2)) / 2,
25485 stack_pointer_rtx);
25488 if (TARGET_IWMMXT)
25489 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
25490 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25492 rtx_insn *insn;
25493 rtx addr = gen_rtx_MEM (V2SImode,
25494 gen_rtx_POST_INC (SImode,
25495 stack_pointer_rtx));
25496 set_mem_alias_set (addr, get_frame_alias_set ());
25497 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25498 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25499 gen_rtx_REG (V2SImode, i),
25500 NULL_RTX);
25501 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25502 stack_pointer_rtx, stack_pointer_rtx);
25505 if (saved_regs_mask)
25507 rtx insn;
25508 bool return_in_pc = false;
25510 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
25511 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
25512 && !IS_CMSE_ENTRY (func_type)
25513 && !IS_STACKALIGN (func_type)
25514 && really_return
25515 && crtl->args.pretend_args_size == 0
25516 && saved_regs_mask & (1 << LR_REGNUM)
25517 && !crtl->calls_eh_return)
25519 saved_regs_mask &= ~(1 << LR_REGNUM);
25520 saved_regs_mask |= (1 << PC_REGNUM);
25521 return_in_pc = true;
25524 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
25526 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25527 if (saved_regs_mask & (1 << i))
25529 rtx addr = gen_rtx_MEM (SImode,
25530 gen_rtx_POST_INC (SImode,
25531 stack_pointer_rtx));
25532 set_mem_alias_set (addr, get_frame_alias_set ());
25534 if (i == PC_REGNUM)
25536 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25537 XVECEXP (insn, 0, 0) = ret_rtx;
25538 XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
25539 addr);
25540 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
25541 insn = emit_jump_insn (insn);
25543 else
25545 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
25546 addr));
25547 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25548 gen_rtx_REG (SImode, i),
25549 NULL_RTX);
25550 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25551 stack_pointer_rtx,
25552 stack_pointer_rtx);
25556 else
25558 if (TARGET_LDRD
25559 && current_tune->prefer_ldrd_strd
25560 && !optimize_function_for_size_p (cfun))
25562 if (TARGET_THUMB2)
25563 thumb2_emit_ldrd_pop (saved_regs_mask);
25564 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
25565 arm_emit_ldrd_pop (saved_regs_mask);
25566 else
25567 arm_emit_multi_reg_pop (saved_regs_mask);
25569 else
25570 arm_emit_multi_reg_pop (saved_regs_mask);
25573 if (return_in_pc)
25574 return;
25577 amount
25578 = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
25579 if (amount)
25581 int i, j;
25582 rtx dwarf = NULL_RTX;
25583 rtx_insn *tmp =
25584 emit_insn (gen_addsi3 (stack_pointer_rtx,
25585 stack_pointer_rtx,
25586 GEN_INT (amount)));
25588 RTX_FRAME_RELATED_P (tmp) = 1;
25590 if (cfun->machine->uses_anonymous_args)
25592 /* Restore pretend args. Refer arm_expand_prologue on how to save
25593 pretend_args in stack. */
25594 int num_regs = crtl->args.pretend_args_size / 4;
25595 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
25596 for (j = 0, i = 0; j < num_regs; i++)
25597 if (saved_regs_mask & (1 << i))
25599 rtx reg = gen_rtx_REG (SImode, i);
25600 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
25601 j++;
25603 REG_NOTES (tmp) = dwarf;
25605 arm_add_cfa_adjust_cfa_note (tmp, amount,
25606 stack_pointer_rtx, stack_pointer_rtx);
25609 /* Clear all caller-saved regs that are not used to return. */
25610 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25612 /* CMSE_ENTRY always returns. */
25613 gcc_assert (really_return);
25614 cmse_nonsecure_entry_clear_before_return ();
25617 if (!really_return)
25618 return;
25620 if (crtl->calls_eh_return)
25621 emit_insn (gen_addsi3 (stack_pointer_rtx,
25622 stack_pointer_rtx,
25623 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25625 if (IS_STACKALIGN (func_type))
25626 /* Restore the original stack pointer. Before prologue, the stack was
25627 realigned and the original stack pointer saved in r0. For details,
25628 see comment in arm_expand_prologue. */
25629 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25631 emit_jump_insn (simple_return_rtx);
25634 /* Implementation of insn prologue_thumb1_interwork. This is the first
25635 "instruction" of a function called in ARM mode. Swap to thumb mode. */
25637 const char *
25638 thumb1_output_interwork (void)
25640 const char * name;
25641 FILE *f = asm_out_file;
25643 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
25644 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
25645 == SYMBOL_REF);
25646 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25648 /* Generate code sequence to switch us into Thumb mode. */
25649 /* The .code 32 directive has already been emitted by
25650 ASM_DECLARE_FUNCTION_NAME. */
25651 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
25652 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
25654 /* Generate a label, so that the debugger will notice the
25655 change in instruction sets. This label is also used by
25656 the assembler to bypass the ARM code when this function
25657 is called from a Thumb encoded function elsewhere in the
25658 same file. Hence the definition of STUB_NAME here must
25659 agree with the definition in gas/config/tc-arm.c. */
25661 #define STUB_NAME ".real_start_of"
25663 fprintf (f, "\t.code\t16\n");
25664 #ifdef ARM_PE
25665 if (arm_dllexport_name_p (name))
25666 name = arm_strip_name_encoding (name);
25667 #endif
25668 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
25669 fprintf (f, "\t.thumb_func\n");
25670 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
25672 return "";
25675 /* Handle the case of a double word load into a low register from
25676 a computed memory address. The computed address may involve a
25677 register which is overwritten by the load. */
25678 const char *
25679 thumb_load_double_from_address (rtx *operands)
25681 rtx addr;
25682 rtx base;
25683 rtx offset;
25684 rtx arg1;
25685 rtx arg2;
25687 gcc_assert (REG_P (operands[0]));
25688 gcc_assert (MEM_P (operands[1]));
25690 /* Get the memory address. */
25691 addr = XEXP (operands[1], 0);
25693 /* Work out how the memory address is computed. */
25694 switch (GET_CODE (addr))
25696 case REG:
25697 operands[2] = adjust_address (operands[1], SImode, 4);
25699 if (REGNO (operands[0]) == REGNO (addr))
25701 output_asm_insn ("ldr\t%H0, %2", operands);
25702 output_asm_insn ("ldr\t%0, %1", operands);
25704 else
25706 output_asm_insn ("ldr\t%0, %1", operands);
25707 output_asm_insn ("ldr\t%H0, %2", operands);
25709 break;
25711 case CONST:
25712 /* Compute <address> + 4 for the high order load. */
25713 operands[2] = adjust_address (operands[1], SImode, 4);
25715 output_asm_insn ("ldr\t%0, %1", operands);
25716 output_asm_insn ("ldr\t%H0, %2", operands);
25717 break;
25719 case PLUS:
25720 arg1 = XEXP (addr, 0);
25721 arg2 = XEXP (addr, 1);
25723 if (CONSTANT_P (arg1))
25724 base = arg2, offset = arg1;
25725 else
25726 base = arg1, offset = arg2;
25728 gcc_assert (REG_P (base));
25730 /* Catch the case of <address> = <reg> + <reg> */
25731 if (REG_P (offset))
25733 int reg_offset = REGNO (offset);
25734 int reg_base = REGNO (base);
25735 int reg_dest = REGNO (operands[0]);
25737 /* Add the base and offset registers together into the
25738 higher destination register. */
25739 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
25740 reg_dest + 1, reg_base, reg_offset);
25742 /* Load the lower destination register from the address in
25743 the higher destination register. */
25744 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
25745 reg_dest, reg_dest + 1);
25747 /* Load the higher destination register from its own address
25748 plus 4. */
25749 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
25750 reg_dest + 1, reg_dest + 1);
25752 else
25754 /* Compute <address> + 4 for the high order load. */
25755 operands[2] = adjust_address (operands[1], SImode, 4);
25757 /* If the computed address is held in the low order register
25758 then load the high order register first, otherwise always
25759 load the low order register first. */
25760 if (REGNO (operands[0]) == REGNO (base))
25762 output_asm_insn ("ldr\t%H0, %2", operands);
25763 output_asm_insn ("ldr\t%0, %1", operands);
25765 else
25767 output_asm_insn ("ldr\t%0, %1", operands);
25768 output_asm_insn ("ldr\t%H0, %2", operands);
25771 break;
25773 case LABEL_REF:
25774 /* With no registers to worry about we can just load the value
25775 directly. */
25776 operands[2] = adjust_address (operands[1], SImode, 4);
25778 output_asm_insn ("ldr\t%H0, %2", operands);
25779 output_asm_insn ("ldr\t%0, %1", operands);
25780 break;
25782 default:
25783 gcc_unreachable ();
25786 return "";
25789 const char *
25790 thumb_output_move_mem_multiple (int n, rtx *operands)
25792 switch (n)
25794 case 2:
25795 if (REGNO (operands[4]) > REGNO (operands[5]))
25796 std::swap (operands[4], operands[5]);
25798 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
25799 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
25800 break;
25802 case 3:
25803 if (REGNO (operands[4]) > REGNO (operands[5]))
25804 std::swap (operands[4], operands[5]);
25805 if (REGNO (operands[5]) > REGNO (operands[6]))
25806 std::swap (operands[5], operands[6]);
25807 if (REGNO (operands[4]) > REGNO (operands[5]))
25808 std::swap (operands[4], operands[5]);
25810 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
25811 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
25812 break;
25814 default:
25815 gcc_unreachable ();
25818 return "";
25821 /* Output a call-via instruction for thumb state. */
25822 const char *
25823 thumb_call_via_reg (rtx reg)
25825 int regno = REGNO (reg);
25826 rtx *labelp;
25828 gcc_assert (regno < LR_REGNUM);
25830 /* If we are in the normal text section we can use a single instance
25831 per compilation unit. If we are doing function sections, then we need
25832 an entry per section, since we can't rely on reachability. */
25833 if (in_section == text_section)
25835 thumb_call_reg_needed = 1;
25837 if (thumb_call_via_label[regno] == NULL)
25838 thumb_call_via_label[regno] = gen_label_rtx ();
25839 labelp = thumb_call_via_label + regno;
25841 else
25843 if (cfun->machine->call_via[regno] == NULL)
25844 cfun->machine->call_via[regno] = gen_label_rtx ();
25845 labelp = cfun->machine->call_via + regno;
25848 output_asm_insn ("bl\t%a0", labelp);
25849 return "";
25852 /* Routines for generating rtl. */
25853 void
25854 thumb_expand_movmemqi (rtx *operands)
25856 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
25857 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
25858 HOST_WIDE_INT len = INTVAL (operands[2]);
25859 HOST_WIDE_INT offset = 0;
25861 while (len >= 12)
25863 emit_insn (gen_movmem12b (out, in, out, in));
25864 len -= 12;
25867 if (len >= 8)
25869 emit_insn (gen_movmem8b (out, in, out, in));
25870 len -= 8;
25873 if (len >= 4)
25875 rtx reg = gen_reg_rtx (SImode);
25876 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
25877 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
25878 len -= 4;
25879 offset += 4;
25882 if (len >= 2)
25884 rtx reg = gen_reg_rtx (HImode);
25885 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
25886 plus_constant (Pmode, in,
25887 offset))));
25888 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
25889 offset)),
25890 reg));
25891 len -= 2;
25892 offset += 2;
25895 if (len)
25897 rtx reg = gen_reg_rtx (QImode);
25898 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
25899 plus_constant (Pmode, in,
25900 offset))));
25901 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
25902 offset)),
25903 reg));
25907 void
25908 thumb_reload_out_hi (rtx *operands)
25910 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
25913 /* Return the length of a function name prefix
25914 that starts with the character 'c'. */
25915 static int
25916 arm_get_strip_length (int c)
25918 switch (c)
25920 ARM_NAME_ENCODING_LENGTHS
25921 default: return 0;
25925 /* Return a pointer to a function's name with any
25926 and all prefix encodings stripped from it. */
25927 const char *
25928 arm_strip_name_encoding (const char *name)
25930 int skip;
25932 while ((skip = arm_get_strip_length (* name)))
25933 name += skip;
25935 return name;
25938 /* If there is a '*' anywhere in the name's prefix, then
25939 emit the stripped name verbatim, otherwise prepend an
25940 underscore if leading underscores are being used. */
25941 void
25942 arm_asm_output_labelref (FILE *stream, const char *name)
25944 int skip;
25945 int verbatim = 0;
25947 while ((skip = arm_get_strip_length (* name)))
25949 verbatim |= (*name == '*');
25950 name += skip;
25953 if (verbatim)
25954 fputs (name, stream);
25955 else
25956 asm_fprintf (stream, "%U%s", name);
25959 /* This function is used to emit an EABI tag and its associated value.
25960 We emit the numerical value of the tag in case the assembler does not
25961 support textual tags. (Eg gas prior to 2.20). If requested we include
25962 the tag name in a comment so that anyone reading the assembler output
25963 will know which tag is being set.
25965 This function is not static because arm-c.c needs it too. */
25967 void
25968 arm_emit_eabi_attribute (const char *name, int num, int val)
25970 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
25971 if (flag_verbose_asm || flag_debug_asm)
25972 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
25973 asm_fprintf (asm_out_file, "\n");
25976 /* This function is used to print CPU tuning information as comment
25977 in assembler file. Pointers are not printed for now. */
25979 void
25980 arm_print_tune_info (void)
25982 asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune parameters\n");
25983 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "constant_limit:\t%d\n",
25984 current_tune->constant_limit);
25985 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
25986 "max_insns_skipped:\t%d\n", current_tune->max_insns_skipped);
25987 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
25988 "prefetch.num_slots:\t%d\n", current_tune->prefetch.num_slots);
25989 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
25990 "prefetch.l1_cache_size:\t%d\n",
25991 current_tune->prefetch.l1_cache_size);
25992 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
25993 "prefetch.l1_cache_line_size:\t%d\n",
25994 current_tune->prefetch.l1_cache_line_size);
25995 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
25996 "prefer_constant_pool:\t%d\n",
25997 (int) current_tune->prefer_constant_pool);
25998 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
25999 "branch_cost:\t(s:speed, p:predictable)\n");
26000 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\ts&p\tcost\n");
26001 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t00\t%d\n",
26002 current_tune->branch_cost (false, false));
26003 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t01\t%d\n",
26004 current_tune->branch_cost (false, true));
26005 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t10\t%d\n",
26006 current_tune->branch_cost (true, false));
26007 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t11\t%d\n",
26008 current_tune->branch_cost (true, true));
26009 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26010 "prefer_ldrd_strd:\t%d\n",
26011 (int) current_tune->prefer_ldrd_strd);
26012 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26013 "logical_op_non_short_circuit:\t[%d,%d]\n",
26014 (int) current_tune->logical_op_non_short_circuit_thumb,
26015 (int) current_tune->logical_op_non_short_circuit_arm);
26016 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26017 "prefer_neon_for_64bits:\t%d\n",
26018 (int) current_tune->prefer_neon_for_64bits);
26019 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26020 "disparage_flag_setting_t16_encodings:\t%d\n",
26021 (int) current_tune->disparage_flag_setting_t16_encodings);
26022 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26023 "string_ops_prefer_neon:\t%d\n",
26024 (int) current_tune->string_ops_prefer_neon);
26025 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26026 "max_insns_inline_memset:\t%d\n",
26027 current_tune->max_insns_inline_memset);
26028 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "fusible_ops:\t%u\n",
26029 current_tune->fusible_ops);
26030 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "sched_autopref:\t%d\n",
26031 (int) current_tune->sched_autopref);
26034 static void
26035 arm_file_start (void)
26037 int val;
26039 if (TARGET_BPABI)
26041 /* We don't have a specified CPU. Use the architecture to
26042 generate the tags.
26044 Note: it might be better to do this unconditionally, then the
26045 assembler would not need to know about all new CPU names as
26046 they are added. */
26047 if (!arm_active_target.core_name)
26049 /* armv7ve doesn't support any extensions. */
26050 if (strcmp (arm_active_target.arch_name, "armv7ve") == 0)
26052 /* Keep backward compatability for assemblers
26053 which don't support armv7ve. */
26054 asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
26055 asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
26056 asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
26057 asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
26058 asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
26060 else
26062 const char* pos = strchr (arm_active_target.arch_name, '+');
26063 if (pos)
26065 char buf[32];
26066 gcc_assert (strlen (arm_active_target.arch_name)
26067 <= sizeof (buf) / sizeof (*pos));
26068 strncpy (buf, arm_active_target.arch_name,
26069 (pos - arm_active_target.arch_name) * sizeof (*pos));
26070 buf[pos - arm_active_target.arch_name] = '\0';
26071 asm_fprintf (asm_out_file, "\t.arch %s\n", buf);
26072 asm_fprintf (asm_out_file, "\t.arch_extension %s\n", pos + 1);
26074 else
26075 asm_fprintf (asm_out_file, "\t.arch %s\n",
26076 arm_active_target.arch_name);
26079 else if (strncmp (arm_active_target.core_name, "generic", 7) == 0)
26080 asm_fprintf (asm_out_file, "\t.arch %s\n",
26081 arm_active_target.core_name + 8);
26082 else
26084 const char* truncated_name
26085 = arm_rewrite_selected_cpu (arm_active_target.core_name);
26086 asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
26089 if (print_tune_info)
26090 arm_print_tune_info ();
26092 if (! TARGET_SOFT_FLOAT)
26094 if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
26095 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
26097 if (TARGET_HARD_FLOAT_ABI)
26098 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
26101 /* Some of these attributes only apply when the corresponding features
26102 are used. However we don't have any easy way of figuring this out.
26103 Conservatively record the setting that would have been used. */
26105 if (flag_rounding_math)
26106 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
26108 if (!flag_unsafe_math_optimizations)
26110 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
26111 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
26113 if (flag_signaling_nans)
26114 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
26116 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
26117 flag_finite_math_only ? 1 : 3);
26119 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
26120 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
26121 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
26122 flag_short_enums ? 1 : 2);
26124 /* Tag_ABI_optimization_goals. */
26125 if (optimize_size)
26126 val = 4;
26127 else if (optimize >= 2)
26128 val = 2;
26129 else if (optimize)
26130 val = 1;
26131 else
26132 val = 6;
26133 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
26135 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
26136 unaligned_access);
26138 if (arm_fp16_format)
26139 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
26140 (int) arm_fp16_format);
26142 if (arm_lang_output_object_attributes_hook)
26143 arm_lang_output_object_attributes_hook();
26146 default_file_start ();
26149 static void
26150 arm_file_end (void)
26152 int regno;
26154 if (NEED_INDICATE_EXEC_STACK)
26155 /* Add .note.GNU-stack. */
26156 file_end_indicate_exec_stack ();
26158 if (! thumb_call_reg_needed)
26159 return;
26161 switch_to_section (text_section);
26162 asm_fprintf (asm_out_file, "\t.code 16\n");
26163 ASM_OUTPUT_ALIGN (asm_out_file, 1);
26165 for (regno = 0; regno < LR_REGNUM; regno++)
26167 rtx label = thumb_call_via_label[regno];
26169 if (label != 0)
26171 targetm.asm_out.internal_label (asm_out_file, "L",
26172 CODE_LABEL_NUMBER (label));
26173 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
26178 #ifndef ARM_PE
26179 /* Symbols in the text segment can be accessed without indirecting via the
26180 constant pool; it may take an extra binary operation, but this is still
26181 faster than indirecting via memory. Don't do this when not optimizing,
26182 since we won't be calculating al of the offsets necessary to do this
26183 simplification. */
26185 static void
26186 arm_encode_section_info (tree decl, rtx rtl, int first)
26188 if (optimize > 0 && TREE_CONSTANT (decl))
26189 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
26191 default_encode_section_info (decl, rtl, first);
26193 #endif /* !ARM_PE */
26195 static void
26196 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
26198 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
26199 && !strcmp (prefix, "L"))
26201 arm_ccfsm_state = 0;
26202 arm_target_insn = NULL;
26204 default_internal_label (stream, prefix, labelno);
26207 /* Output code to add DELTA to the first argument, and then jump
26208 to FUNCTION. Used for C++ multiple inheritance. */
26210 static void
26211 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26212 HOST_WIDE_INT, tree function)
26214 static int thunk_label = 0;
26215 char label[256];
26216 char labelpc[256];
26217 int mi_delta = delta;
26218 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
26219 int shift = 0;
26220 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
26221 ? 1 : 0);
26222 if (mi_delta < 0)
26223 mi_delta = - mi_delta;
26225 final_start_function (emit_barrier (), file, 1);
26227 if (TARGET_THUMB1)
26229 int labelno = thunk_label++;
26230 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
26231 /* Thunks are entered in arm mode when available. */
26232 if (TARGET_THUMB1_ONLY)
26234 /* push r3 so we can use it as a temporary. */
26235 /* TODO: Omit this save if r3 is not used. */
26236 fputs ("\tpush {r3}\n", file);
26237 fputs ("\tldr\tr3, ", file);
26239 else
26241 fputs ("\tldr\tr12, ", file);
26243 assemble_name (file, label);
26244 fputc ('\n', file);
26245 if (flag_pic)
26247 /* If we are generating PIC, the ldr instruction below loads
26248 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
26249 the address of the add + 8, so we have:
26251 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
26252 = target + 1.
26254 Note that we have "+ 1" because some versions of GNU ld
26255 don't set the low bit of the result for R_ARM_REL32
26256 relocations against thumb function symbols.
26257 On ARMv6M this is +4, not +8. */
26258 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
26259 assemble_name (file, labelpc);
26260 fputs (":\n", file);
26261 if (TARGET_THUMB1_ONLY)
26263 /* This is 2 insns after the start of the thunk, so we know it
26264 is 4-byte aligned. */
26265 fputs ("\tadd\tr3, pc, r3\n", file);
26266 fputs ("\tmov r12, r3\n", file);
26268 else
26269 fputs ("\tadd\tr12, pc, r12\n", file);
26271 else if (TARGET_THUMB1_ONLY)
26272 fputs ("\tmov r12, r3\n", file);
26274 if (TARGET_THUMB1_ONLY)
26276 if (mi_delta > 255)
26278 fputs ("\tldr\tr3, ", file);
26279 assemble_name (file, label);
26280 fputs ("+4\n", file);
26281 asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
26282 mi_op, this_regno, this_regno);
26284 else if (mi_delta != 0)
26286 /* Thumb1 unified syntax requires s suffix in instruction name when
26287 one of the operands is immediate. */
26288 asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
26289 mi_op, this_regno, this_regno,
26290 mi_delta);
26293 else
26295 /* TODO: Use movw/movt for large constants when available. */
26296 while (mi_delta != 0)
26298 if ((mi_delta & (3 << shift)) == 0)
26299 shift += 2;
26300 else
26302 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
26303 mi_op, this_regno, this_regno,
26304 mi_delta & (0xff << shift));
26305 mi_delta &= ~(0xff << shift);
26306 shift += 8;
26310 if (TARGET_THUMB1)
26312 if (TARGET_THUMB1_ONLY)
26313 fputs ("\tpop\t{r3}\n", file);
26315 fprintf (file, "\tbx\tr12\n");
26316 ASM_OUTPUT_ALIGN (file, 2);
26317 assemble_name (file, label);
26318 fputs (":\n", file);
26319 if (flag_pic)
26321 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
26322 rtx tem = XEXP (DECL_RTL (function), 0);
26323 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
26324 pipeline offset is four rather than eight. Adjust the offset
26325 accordingly. */
26326 tem = plus_constant (GET_MODE (tem), tem,
26327 TARGET_THUMB1_ONLY ? -3 : -7);
26328 tem = gen_rtx_MINUS (GET_MODE (tem),
26329 tem,
26330 gen_rtx_SYMBOL_REF (Pmode,
26331 ggc_strdup (labelpc)));
26332 assemble_integer (tem, 4, BITS_PER_WORD, 1);
26334 else
26335 /* Output ".word .LTHUNKn". */
26336 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
26338 if (TARGET_THUMB1_ONLY && mi_delta > 255)
26339 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
26341 else
26343 fputs ("\tb\t", file);
26344 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
26345 if (NEED_PLT_RELOC)
26346 fputs ("(PLT)", file);
26347 fputc ('\n', file);
26350 final_end_function ();
26353 /* MI thunk handling for TARGET_32BIT. */
26355 static void
26356 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26357 HOST_WIDE_INT vcall_offset, tree function)
26359 /* On ARM, this_regno is R0 or R1 depending on
26360 whether the function returns an aggregate or not.
26362 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
26363 function)
26364 ? R1_REGNUM : R0_REGNUM);
26366 rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
26367 rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
26368 reload_completed = 1;
26369 emit_note (NOTE_INSN_PROLOGUE_END);
26371 /* Add DELTA to THIS_RTX. */
26372 if (delta != 0)
26373 arm_split_constant (PLUS, Pmode, NULL_RTX,
26374 delta, this_rtx, this_rtx, false);
26376 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
26377 if (vcall_offset != 0)
26379 /* Load *THIS_RTX. */
26380 emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
26381 /* Compute *THIS_RTX + VCALL_OFFSET. */
26382 arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
26383 false);
26384 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
26385 emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
26386 emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
26389 /* Generate a tail call to the target function. */
26390 if (!TREE_USED (function))
26392 assemble_external (function);
26393 TREE_USED (function) = 1;
26395 rtx funexp = XEXP (DECL_RTL (function), 0);
26396 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
26397 rtx_insn * insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
26398 SIBLING_CALL_P (insn) = 1;
26400 insn = get_insns ();
26401 shorten_branches (insn);
26402 final_start_function (insn, file, 1);
26403 final (insn, file, 1);
26404 final_end_function ();
26406 /* Stop pretending this is a post-reload pass. */
26407 reload_completed = 0;
26410 /* Output code to add DELTA to the first argument, and then jump
26411 to FUNCTION. Used for C++ multiple inheritance. */
26413 static void
26414 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
26415 HOST_WIDE_INT vcall_offset, tree function)
26417 if (TARGET_32BIT)
26418 arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
26419 else
26420 arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
26424 arm_emit_vector_const (FILE *file, rtx x)
26426 int i;
26427 const char * pattern;
26429 gcc_assert (GET_CODE (x) == CONST_VECTOR);
26431 switch (GET_MODE (x))
26433 case V2SImode: pattern = "%08x"; break;
26434 case V4HImode: pattern = "%04x"; break;
26435 case V8QImode: pattern = "%02x"; break;
26436 default: gcc_unreachable ();
26439 fprintf (file, "0x");
26440 for (i = CONST_VECTOR_NUNITS (x); i--;)
26442 rtx element;
26444 element = CONST_VECTOR_ELT (x, i);
26445 fprintf (file, pattern, INTVAL (element));
26448 return 1;
26451 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
26452 HFmode constant pool entries are actually loaded with ldr. */
26453 void
26454 arm_emit_fp16_const (rtx c)
26456 long bits;
26458 bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
26459 if (WORDS_BIG_ENDIAN)
26460 assemble_zeros (2);
26461 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
26462 if (!WORDS_BIG_ENDIAN)
26463 assemble_zeros (2);
26466 const char *
26467 arm_output_load_gr (rtx *operands)
26469 rtx reg;
26470 rtx offset;
26471 rtx wcgr;
26472 rtx sum;
26474 if (!MEM_P (operands [1])
26475 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
26476 || !REG_P (reg = XEXP (sum, 0))
26477 || !CONST_INT_P (offset = XEXP (sum, 1))
26478 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
26479 return "wldrw%?\t%0, %1";
26481 /* Fix up an out-of-range load of a GR register. */
26482 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
26483 wcgr = operands[0];
26484 operands[0] = reg;
26485 output_asm_insn ("ldr%?\t%0, %1", operands);
26487 operands[0] = wcgr;
26488 operands[1] = reg;
26489 output_asm_insn ("tmcr%?\t%0, %1", operands);
26490 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
26492 return "";
26495 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
26497 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
26498 named arg and all anonymous args onto the stack.
26499 XXX I know the prologue shouldn't be pushing registers, but it is faster
26500 that way. */
26502 static void
26503 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
26504 machine_mode mode,
26505 tree type,
26506 int *pretend_size,
26507 int second_time ATTRIBUTE_UNUSED)
26509 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
26510 int nregs;
26512 cfun->machine->uses_anonymous_args = 1;
26513 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
26515 nregs = pcum->aapcs_ncrn;
26516 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
26517 nregs++;
26519 else
26520 nregs = pcum->nregs;
26522 if (nregs < NUM_ARG_REGS)
26523 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
26526 /* We can't rely on the caller doing the proper promotion when
26527 using APCS or ATPCS. */
26529 static bool
26530 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
26532 return !TARGET_AAPCS_BASED;
26535 static machine_mode
26536 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
26537 machine_mode mode,
26538 int *punsignedp ATTRIBUTE_UNUSED,
26539 const_tree fntype ATTRIBUTE_UNUSED,
26540 int for_return ATTRIBUTE_UNUSED)
26542 if (GET_MODE_CLASS (mode) == MODE_INT
26543 && GET_MODE_SIZE (mode) < 4)
26544 return SImode;
26546 return mode;
26549 /* AAPCS based ABIs use short enums by default. */
26551 static bool
26552 arm_default_short_enums (void)
26554 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
26558 /* AAPCS requires that anonymous bitfields affect structure alignment. */
26560 static bool
26561 arm_align_anon_bitfield (void)
26563 return TARGET_AAPCS_BASED;
26567 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
26569 static tree
26570 arm_cxx_guard_type (void)
26572 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
26576 /* The EABI says test the least significant bit of a guard variable. */
26578 static bool
26579 arm_cxx_guard_mask_bit (void)
26581 return TARGET_AAPCS_BASED;
26585 /* The EABI specifies that all array cookies are 8 bytes long. */
26587 static tree
26588 arm_get_cookie_size (tree type)
26590 tree size;
26592 if (!TARGET_AAPCS_BASED)
26593 return default_cxx_get_cookie_size (type);
26595 size = build_int_cst (sizetype, 8);
26596 return size;
26600 /* The EABI says that array cookies should also contain the element size. */
26602 static bool
26603 arm_cookie_has_size (void)
26605 return TARGET_AAPCS_BASED;
26609 /* The EABI says constructors and destructors should return a pointer to
26610 the object constructed/destroyed. */
26612 static bool
26613 arm_cxx_cdtor_returns_this (void)
26615 return TARGET_AAPCS_BASED;
26618 /* The EABI says that an inline function may never be the key
26619 method. */
26621 static bool
26622 arm_cxx_key_method_may_be_inline (void)
26624 return !TARGET_AAPCS_BASED;
26627 static void
26628 arm_cxx_determine_class_data_visibility (tree decl)
26630 if (!TARGET_AAPCS_BASED
26631 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
26632 return;
26634 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
26635 is exported. However, on systems without dynamic vague linkage,
26636 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
26637 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
26638 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
26639 else
26640 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
26641 DECL_VISIBILITY_SPECIFIED (decl) = 1;
26644 static bool
26645 arm_cxx_class_data_always_comdat (void)
26647 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26648 vague linkage if the class has no key function. */
26649 return !TARGET_AAPCS_BASED;
26653 /* The EABI says __aeabi_atexit should be used to register static
26654 destructors. */
26656 static bool
26657 arm_cxx_use_aeabi_atexit (void)
26659 return TARGET_AAPCS_BASED;
26663 void
26664 arm_set_return_address (rtx source, rtx scratch)
26666 arm_stack_offsets *offsets;
26667 HOST_WIDE_INT delta;
26668 rtx addr;
26669 unsigned long saved_regs;
26671 offsets = arm_get_frame_offsets ();
26672 saved_regs = offsets->saved_regs_mask;
26674 if ((saved_regs & (1 << LR_REGNUM)) == 0)
26675 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26676 else
26678 if (frame_pointer_needed)
26679 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
26680 else
26682 /* LR will be the first saved register. */
26683 delta = offsets->outgoing_args - (offsets->frame + 4);
26686 if (delta >= 4096)
26688 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
26689 GEN_INT (delta & ~4095)));
26690 addr = scratch;
26691 delta &= 4095;
26693 else
26694 addr = stack_pointer_rtx;
26696 addr = plus_constant (Pmode, addr, delta);
26698 /* The store needs to be marked as frame related in order to prevent
26699 DSE from deleting it as dead if it is based on fp. */
26700 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26701 RTX_FRAME_RELATED_P (insn) = 1;
26702 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26707 void
26708 thumb_set_return_address (rtx source, rtx scratch)
26710 arm_stack_offsets *offsets;
26711 HOST_WIDE_INT delta;
26712 HOST_WIDE_INT limit;
26713 int reg;
26714 rtx addr;
26715 unsigned long mask;
26717 emit_use (source);
26719 offsets = arm_get_frame_offsets ();
26720 mask = offsets->saved_regs_mask;
26721 if (mask & (1 << LR_REGNUM))
26723 limit = 1024;
26724 /* Find the saved regs. */
26725 if (frame_pointer_needed)
26727 delta = offsets->soft_frame - offsets->saved_args;
26728 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
26729 if (TARGET_THUMB1)
26730 limit = 128;
26732 else
26734 delta = offsets->outgoing_args - offsets->saved_args;
26735 reg = SP_REGNUM;
26737 /* Allow for the stack frame. */
26738 if (TARGET_THUMB1 && TARGET_BACKTRACE)
26739 delta -= 16;
26740 /* The link register is always the first saved register. */
26741 delta -= 4;
26743 /* Construct the address. */
26744 addr = gen_rtx_REG (SImode, reg);
26745 if (delta > limit)
26747 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
26748 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
26749 addr = scratch;
26751 else
26752 addr = plus_constant (Pmode, addr, delta);
26754 /* The store needs to be marked as frame related in order to prevent
26755 DSE from deleting it as dead if it is based on fp. */
26756 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26757 RTX_FRAME_RELATED_P (insn) = 1;
26758 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26760 else
26761 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26764 /* Implements target hook vector_mode_supported_p. */
26765 bool
26766 arm_vector_mode_supported_p (machine_mode mode)
26768 /* Neon also supports V2SImode, etc. listed in the clause below. */
26769 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
26770 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
26771 || mode == V2DImode || mode == V8HFmode))
26772 return true;
26774 if ((TARGET_NEON || TARGET_IWMMXT)
26775 && ((mode == V2SImode)
26776 || (mode == V4HImode)
26777 || (mode == V8QImode)))
26778 return true;
26780 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
26781 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
26782 || mode == V2HAmode))
26783 return true;
26785 return false;
26788 /* Implements target hook array_mode_supported_p. */
26790 static bool
26791 arm_array_mode_supported_p (machine_mode mode,
26792 unsigned HOST_WIDE_INT nelems)
26794 if (TARGET_NEON
26795 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
26796 && (nelems >= 2 && nelems <= 4))
26797 return true;
26799 return false;
26802 /* Use the option -mvectorize-with-neon-double to override the use of quardword
26803 registers when autovectorizing for Neon, at least until multiple vector
26804 widths are supported properly by the middle-end. */
26806 static machine_mode
26807 arm_preferred_simd_mode (machine_mode mode)
26809 if (TARGET_NEON)
26810 switch (mode)
26812 case SFmode:
26813 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
26814 case SImode:
26815 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
26816 case HImode:
26817 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
26818 case QImode:
26819 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
26820 case DImode:
26821 if (!TARGET_NEON_VECTORIZE_DOUBLE)
26822 return V2DImode;
26823 break;
26825 default:;
26828 if (TARGET_REALLY_IWMMXT)
26829 switch (mode)
26831 case SImode:
26832 return V2SImode;
26833 case HImode:
26834 return V4HImode;
26835 case QImode:
26836 return V8QImode;
26838 default:;
26841 return word_mode;
26844 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
26846 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
26847 using r0-r4 for function arguments, r7 for the stack frame and don't have
26848 enough left over to do doubleword arithmetic. For Thumb-2 all the
26849 potentially problematic instructions accept high registers so this is not
26850 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
26851 that require many low registers. */
26852 static bool
26853 arm_class_likely_spilled_p (reg_class_t rclass)
26855 if ((TARGET_THUMB1 && rclass == LO_REGS)
26856 || rclass == CC_REG)
26857 return true;
26859 return false;
26862 /* Implements target hook small_register_classes_for_mode_p. */
26863 bool
26864 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
26866 return TARGET_THUMB1;
26869 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
26870 ARM insns and therefore guarantee that the shift count is modulo 256.
26871 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
26872 guarantee no particular behavior for out-of-range counts. */
26874 static unsigned HOST_WIDE_INT
26875 arm_shift_truncation_mask (machine_mode mode)
26877 return mode == SImode ? 255 : 0;
26881 /* Map internal gcc register numbers to DWARF2 register numbers. */
26883 unsigned int
26884 arm_dbx_register_number (unsigned int regno)
26886 if (regno < 16)
26887 return regno;
26889 if (IS_VFP_REGNUM (regno))
26891 /* See comment in arm_dwarf_register_span. */
26892 if (VFP_REGNO_OK_FOR_SINGLE (regno))
26893 return 64 + regno - FIRST_VFP_REGNUM;
26894 else
26895 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
26898 if (IS_IWMMXT_GR_REGNUM (regno))
26899 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
26901 if (IS_IWMMXT_REGNUM (regno))
26902 return 112 + regno - FIRST_IWMMXT_REGNUM;
26904 return DWARF_FRAME_REGISTERS;
26907 /* Dwarf models VFPv3 registers as 32 64-bit registers.
26908 GCC models tham as 64 32-bit registers, so we need to describe this to
26909 the DWARF generation code. Other registers can use the default. */
26910 static rtx
26911 arm_dwarf_register_span (rtx rtl)
26913 machine_mode mode;
26914 unsigned regno;
26915 rtx parts[16];
26916 int nregs;
26917 int i;
26919 regno = REGNO (rtl);
26920 if (!IS_VFP_REGNUM (regno))
26921 return NULL_RTX;
26923 /* XXX FIXME: The EABI defines two VFP register ranges:
26924 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
26925 256-287: D0-D31
26926 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
26927 corresponding D register. Until GDB supports this, we shall use the
26928 legacy encodings. We also use these encodings for D0-D15 for
26929 compatibility with older debuggers. */
26930 mode = GET_MODE (rtl);
26931 if (GET_MODE_SIZE (mode) < 8)
26932 return NULL_RTX;
26934 if (VFP_REGNO_OK_FOR_SINGLE (regno))
26936 nregs = GET_MODE_SIZE (mode) / 4;
26937 for (i = 0; i < nregs; i += 2)
26938 if (TARGET_BIG_END)
26940 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
26941 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
26943 else
26945 parts[i] = gen_rtx_REG (SImode, regno + i);
26946 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
26949 else
26951 nregs = GET_MODE_SIZE (mode) / 8;
26952 for (i = 0; i < nregs; i++)
26953 parts[i] = gen_rtx_REG (DImode, regno + i);
26956 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
26959 #if ARM_UNWIND_INFO
26960 /* Emit unwind directives for a store-multiple instruction or stack pointer
26961 push during alignment.
26962 These should only ever be generated by the function prologue code, so
26963 expect them to have a particular form.
26964 The store-multiple instruction sometimes pushes pc as the last register,
26965 although it should not be tracked into unwind information, or for -Os
26966 sometimes pushes some dummy registers before first register that needs
26967 to be tracked in unwind information; such dummy registers are there just
26968 to avoid separate stack adjustment, and will not be restored in the
26969 epilogue. */
26971 static void
26972 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
26974 int i;
26975 HOST_WIDE_INT offset;
26976 HOST_WIDE_INT nregs;
26977 int reg_size;
26978 unsigned reg;
26979 unsigned lastreg;
26980 unsigned padfirst = 0, padlast = 0;
26981 rtx e;
26983 e = XVECEXP (p, 0, 0);
26984 gcc_assert (GET_CODE (e) == SET);
26986 /* First insn will adjust the stack pointer. */
26987 gcc_assert (GET_CODE (e) == SET
26988 && REG_P (SET_DEST (e))
26989 && REGNO (SET_DEST (e)) == SP_REGNUM
26990 && GET_CODE (SET_SRC (e)) == PLUS);
26992 offset = -INTVAL (XEXP (SET_SRC (e), 1));
26993 nregs = XVECLEN (p, 0) - 1;
26994 gcc_assert (nregs);
26996 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
26997 if (reg < 16)
26999 /* For -Os dummy registers can be pushed at the beginning to
27000 avoid separate stack pointer adjustment. */
27001 e = XVECEXP (p, 0, 1);
27002 e = XEXP (SET_DEST (e), 0);
27003 if (GET_CODE (e) == PLUS)
27004 padfirst = INTVAL (XEXP (e, 1));
27005 gcc_assert (padfirst == 0 || optimize_size);
27006 /* The function prologue may also push pc, but not annotate it as it is
27007 never restored. We turn this into a stack pointer adjustment. */
27008 e = XVECEXP (p, 0, nregs);
27009 e = XEXP (SET_DEST (e), 0);
27010 if (GET_CODE (e) == PLUS)
27011 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
27012 else
27013 padlast = offset - 4;
27014 gcc_assert (padlast == 0 || padlast == 4);
27015 if (padlast == 4)
27016 fprintf (asm_out_file, "\t.pad #4\n");
27017 reg_size = 4;
27018 fprintf (asm_out_file, "\t.save {");
27020 else if (IS_VFP_REGNUM (reg))
27022 reg_size = 8;
27023 fprintf (asm_out_file, "\t.vsave {");
27025 else
27026 /* Unknown register type. */
27027 gcc_unreachable ();
27029 /* If the stack increment doesn't match the size of the saved registers,
27030 something has gone horribly wrong. */
27031 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
27033 offset = padfirst;
27034 lastreg = 0;
27035 /* The remaining insns will describe the stores. */
27036 for (i = 1; i <= nregs; i++)
27038 /* Expect (set (mem <addr>) (reg)).
27039 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
27040 e = XVECEXP (p, 0, i);
27041 gcc_assert (GET_CODE (e) == SET
27042 && MEM_P (SET_DEST (e))
27043 && REG_P (SET_SRC (e)));
27045 reg = REGNO (SET_SRC (e));
27046 gcc_assert (reg >= lastreg);
27048 if (i != 1)
27049 fprintf (asm_out_file, ", ");
27050 /* We can't use %r for vfp because we need to use the
27051 double precision register names. */
27052 if (IS_VFP_REGNUM (reg))
27053 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
27054 else
27055 asm_fprintf (asm_out_file, "%r", reg);
27057 if (flag_checking)
27059 /* Check that the addresses are consecutive. */
27060 e = XEXP (SET_DEST (e), 0);
27061 if (GET_CODE (e) == PLUS)
27062 gcc_assert (REG_P (XEXP (e, 0))
27063 && REGNO (XEXP (e, 0)) == SP_REGNUM
27064 && CONST_INT_P (XEXP (e, 1))
27065 && offset == INTVAL (XEXP (e, 1)));
27066 else
27067 gcc_assert (i == 1
27068 && REG_P (e)
27069 && REGNO (e) == SP_REGNUM);
27070 offset += reg_size;
27073 fprintf (asm_out_file, "}\n");
27074 if (padfirst)
27075 fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
27078 /* Emit unwind directives for a SET. */
27080 static void
27081 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
27083 rtx e0;
27084 rtx e1;
27085 unsigned reg;
27087 e0 = XEXP (p, 0);
27088 e1 = XEXP (p, 1);
27089 switch (GET_CODE (e0))
27091 case MEM:
27092 /* Pushing a single register. */
27093 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
27094 || !REG_P (XEXP (XEXP (e0, 0), 0))
27095 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
27096 abort ();
27098 asm_fprintf (asm_out_file, "\t.save ");
27099 if (IS_VFP_REGNUM (REGNO (e1)))
27100 asm_fprintf(asm_out_file, "{d%d}\n",
27101 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
27102 else
27103 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
27104 break;
27106 case REG:
27107 if (REGNO (e0) == SP_REGNUM)
27109 /* A stack increment. */
27110 if (GET_CODE (e1) != PLUS
27111 || !REG_P (XEXP (e1, 0))
27112 || REGNO (XEXP (e1, 0)) != SP_REGNUM
27113 || !CONST_INT_P (XEXP (e1, 1)))
27114 abort ();
27116 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
27117 -INTVAL (XEXP (e1, 1)));
27119 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
27121 HOST_WIDE_INT offset;
27123 if (GET_CODE (e1) == PLUS)
27125 if (!REG_P (XEXP (e1, 0))
27126 || !CONST_INT_P (XEXP (e1, 1)))
27127 abort ();
27128 reg = REGNO (XEXP (e1, 0));
27129 offset = INTVAL (XEXP (e1, 1));
27130 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
27131 HARD_FRAME_POINTER_REGNUM, reg,
27132 offset);
27134 else if (REG_P (e1))
27136 reg = REGNO (e1);
27137 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
27138 HARD_FRAME_POINTER_REGNUM, reg);
27140 else
27141 abort ();
27143 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
27145 /* Move from sp to reg. */
27146 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
27148 else if (GET_CODE (e1) == PLUS
27149 && REG_P (XEXP (e1, 0))
27150 && REGNO (XEXP (e1, 0)) == SP_REGNUM
27151 && CONST_INT_P (XEXP (e1, 1)))
27153 /* Set reg to offset from sp. */
27154 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
27155 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
27157 else
27158 abort ();
27159 break;
27161 default:
27162 abort ();
27167 /* Emit unwind directives for the given insn. */
27169 static void
27170 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
27172 rtx note, pat;
27173 bool handled_one = false;
27175 if (arm_except_unwind_info (&global_options) != UI_TARGET)
27176 return;
27178 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27179 && (TREE_NOTHROW (current_function_decl)
27180 || crtl->all_throwers_are_sibcalls))
27181 return;
27183 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
27184 return;
27186 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
27188 switch (REG_NOTE_KIND (note))
27190 case REG_FRAME_RELATED_EXPR:
27191 pat = XEXP (note, 0);
27192 goto found;
27194 case REG_CFA_REGISTER:
27195 pat = XEXP (note, 0);
27196 if (pat == NULL)
27198 pat = PATTERN (insn);
27199 if (GET_CODE (pat) == PARALLEL)
27200 pat = XVECEXP (pat, 0, 0);
27203 /* Only emitted for IS_STACKALIGN re-alignment. */
27205 rtx dest, src;
27206 unsigned reg;
27208 src = SET_SRC (pat);
27209 dest = SET_DEST (pat);
27211 gcc_assert (src == stack_pointer_rtx);
27212 reg = REGNO (dest);
27213 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
27214 reg + 0x90, reg);
27216 handled_one = true;
27217 break;
27219 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
27220 to get correct dwarf information for shrink-wrap. We should not
27221 emit unwind information for it because these are used either for
27222 pretend arguments or notes to adjust sp and restore registers from
27223 stack. */
27224 case REG_CFA_DEF_CFA:
27225 case REG_CFA_ADJUST_CFA:
27226 case REG_CFA_RESTORE:
27227 return;
27229 case REG_CFA_EXPRESSION:
27230 case REG_CFA_OFFSET:
27231 /* ??? Only handling here what we actually emit. */
27232 gcc_unreachable ();
27234 default:
27235 break;
27238 if (handled_one)
27239 return;
27240 pat = PATTERN (insn);
27241 found:
27243 switch (GET_CODE (pat))
27245 case SET:
27246 arm_unwind_emit_set (asm_out_file, pat);
27247 break;
27249 case SEQUENCE:
27250 /* Store multiple. */
27251 arm_unwind_emit_sequence (asm_out_file, pat);
27252 break;
27254 default:
27255 abort();
27260 /* Output a reference from a function exception table to the type_info
27261 object X. The EABI specifies that the symbol should be relocated by
27262 an R_ARM_TARGET2 relocation. */
27264 static bool
27265 arm_output_ttype (rtx x)
27267 fputs ("\t.word\t", asm_out_file);
27268 output_addr_const (asm_out_file, x);
27269 /* Use special relocations for symbol references. */
27270 if (!CONST_INT_P (x))
27271 fputs ("(TARGET2)", asm_out_file);
27272 fputc ('\n', asm_out_file);
27274 return TRUE;
27277 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
27279 static void
27280 arm_asm_emit_except_personality (rtx personality)
27282 fputs ("\t.personality\t", asm_out_file);
27283 output_addr_const (asm_out_file, personality);
27284 fputc ('\n', asm_out_file);
27286 #endif /* ARM_UNWIND_INFO */
27288 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
27290 static void
27291 arm_asm_init_sections (void)
27293 #if ARM_UNWIND_INFO
27294 exception_section = get_unnamed_section (0, output_section_asm_op,
27295 "\t.handlerdata");
27296 #endif /* ARM_UNWIND_INFO */
27298 #ifdef OBJECT_FORMAT_ELF
27299 if (target_pure_code)
27300 text_section->unnamed.data = "\t.section .text,\"0x20000006\",%progbits";
27301 #endif
27304 /* Output unwind directives for the start/end of a function. */
27306 void
27307 arm_output_fn_unwind (FILE * f, bool prologue)
27309 if (arm_except_unwind_info (&global_options) != UI_TARGET)
27310 return;
27312 if (prologue)
27313 fputs ("\t.fnstart\n", f);
27314 else
27316 /* If this function will never be unwound, then mark it as such.
27317 The came condition is used in arm_unwind_emit to suppress
27318 the frame annotations. */
27319 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27320 && (TREE_NOTHROW (current_function_decl)
27321 || crtl->all_throwers_are_sibcalls))
27322 fputs("\t.cantunwind\n", f);
27324 fputs ("\t.fnend\n", f);
27328 static bool
27329 arm_emit_tls_decoration (FILE *fp, rtx x)
27331 enum tls_reloc reloc;
27332 rtx val;
27334 val = XVECEXP (x, 0, 0);
27335 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
27337 output_addr_const (fp, val);
27339 switch (reloc)
27341 case TLS_GD32:
27342 fputs ("(tlsgd)", fp);
27343 break;
27344 case TLS_LDM32:
27345 fputs ("(tlsldm)", fp);
27346 break;
27347 case TLS_LDO32:
27348 fputs ("(tlsldo)", fp);
27349 break;
27350 case TLS_IE32:
27351 fputs ("(gottpoff)", fp);
27352 break;
27353 case TLS_LE32:
27354 fputs ("(tpoff)", fp);
27355 break;
27356 case TLS_DESCSEQ:
27357 fputs ("(tlsdesc)", fp);
27358 break;
27359 default:
27360 gcc_unreachable ();
27363 switch (reloc)
27365 case TLS_GD32:
27366 case TLS_LDM32:
27367 case TLS_IE32:
27368 case TLS_DESCSEQ:
27369 fputs (" + (. - ", fp);
27370 output_addr_const (fp, XVECEXP (x, 0, 2));
27371 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
27372 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
27373 output_addr_const (fp, XVECEXP (x, 0, 3));
27374 fputc (')', fp);
27375 break;
27376 default:
27377 break;
27380 return TRUE;
27383 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
27385 static void
27386 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
27388 gcc_assert (size == 4);
27389 fputs ("\t.word\t", file);
27390 output_addr_const (file, x);
27391 fputs ("(tlsldo)", file);
27394 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
27396 static bool
27397 arm_output_addr_const_extra (FILE *fp, rtx x)
27399 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
27400 return arm_emit_tls_decoration (fp, x);
27401 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
27403 char label[256];
27404 int labelno = INTVAL (XVECEXP (x, 0, 0));
27406 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
27407 assemble_name_raw (fp, label);
27409 return TRUE;
27411 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
27413 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
27414 if (GOT_PCREL)
27415 fputs ("+.", fp);
27416 fputs ("-(", fp);
27417 output_addr_const (fp, XVECEXP (x, 0, 0));
27418 fputc (')', fp);
27419 return TRUE;
27421 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
27423 output_addr_const (fp, XVECEXP (x, 0, 0));
27424 if (GOT_PCREL)
27425 fputs ("+.", fp);
27426 fputs ("-(", fp);
27427 output_addr_const (fp, XVECEXP (x, 0, 1));
27428 fputc (')', fp);
27429 return TRUE;
27431 else if (GET_CODE (x) == CONST_VECTOR)
27432 return arm_emit_vector_const (fp, x);
27434 return FALSE;
27437 /* Output assembly for a shift instruction.
27438 SET_FLAGS determines how the instruction modifies the condition codes.
27439 0 - Do not set condition codes.
27440 1 - Set condition codes.
27441 2 - Use smallest instruction. */
27442 const char *
27443 arm_output_shift(rtx * operands, int set_flags)
27445 char pattern[100];
27446 static const char flag_chars[3] = {'?', '.', '!'};
27447 const char *shift;
27448 HOST_WIDE_INT val;
27449 char c;
27451 c = flag_chars[set_flags];
27452 shift = shift_op(operands[3], &val);
27453 if (shift)
27455 if (val != -1)
27456 operands[2] = GEN_INT(val);
27457 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
27459 else
27460 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
27462 output_asm_insn (pattern, operands);
27463 return "";
27466 /* Output assembly for a WMMX immediate shift instruction. */
27467 const char *
27468 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
27470 int shift = INTVAL (operands[2]);
27471 char templ[50];
27472 machine_mode opmode = GET_MODE (operands[0]);
27474 gcc_assert (shift >= 0);
27476 /* If the shift value in the register versions is > 63 (for D qualifier),
27477 31 (for W qualifier) or 15 (for H qualifier). */
27478 if (((opmode == V4HImode) && (shift > 15))
27479 || ((opmode == V2SImode) && (shift > 31))
27480 || ((opmode == DImode) && (shift > 63)))
27482 if (wror_or_wsra)
27484 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27485 output_asm_insn (templ, operands);
27486 if (opmode == DImode)
27488 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
27489 output_asm_insn (templ, operands);
27492 else
27494 /* The destination register will contain all zeros. */
27495 sprintf (templ, "wzero\t%%0");
27496 output_asm_insn (templ, operands);
27498 return "";
27501 if ((opmode == DImode) && (shift > 32))
27503 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27504 output_asm_insn (templ, operands);
27505 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
27506 output_asm_insn (templ, operands);
27508 else
27510 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
27511 output_asm_insn (templ, operands);
27513 return "";
27516 /* Output assembly for a WMMX tinsr instruction. */
27517 const char *
27518 arm_output_iwmmxt_tinsr (rtx *operands)
27520 int mask = INTVAL (operands[3]);
27521 int i;
27522 char templ[50];
27523 int units = mode_nunits[GET_MODE (operands[0])];
27524 gcc_assert ((mask & (mask - 1)) == 0);
27525 for (i = 0; i < units; ++i)
27527 if ((mask & 0x01) == 1)
27529 break;
27531 mask >>= 1;
27533 gcc_assert (i < units);
27535 switch (GET_MODE (operands[0]))
27537 case V8QImode:
27538 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
27539 break;
27540 case V4HImode:
27541 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
27542 break;
27543 case V2SImode:
27544 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
27545 break;
27546 default:
27547 gcc_unreachable ();
27548 break;
27550 output_asm_insn (templ, operands);
27552 return "";
27555 /* Output a Thumb-1 casesi dispatch sequence. */
27556 const char *
27557 thumb1_output_casesi (rtx *operands)
27559 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
27561 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27563 switch (GET_MODE(diff_vec))
27565 case QImode:
27566 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27567 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
27568 case HImode:
27569 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27570 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
27571 case SImode:
27572 return "bl\t%___gnu_thumb1_case_si";
27573 default:
27574 gcc_unreachable ();
27578 /* Output a Thumb-2 casesi instruction. */
27579 const char *
27580 thumb2_output_casesi (rtx *operands)
27582 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
27584 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27586 output_asm_insn ("cmp\t%0, %1", operands);
27587 output_asm_insn ("bhi\t%l3", operands);
27588 switch (GET_MODE(diff_vec))
27590 case QImode:
27591 return "tbb\t[%|pc, %0]";
27592 case HImode:
27593 return "tbh\t[%|pc, %0, lsl #1]";
27594 case SImode:
27595 if (flag_pic)
27597 output_asm_insn ("adr\t%4, %l2", operands);
27598 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
27599 output_asm_insn ("add\t%4, %4, %5", operands);
27600 return "bx\t%4";
27602 else
27604 output_asm_insn ("adr\t%4, %l2", operands);
27605 return "ldr\t%|pc, [%4, %0, lsl #2]";
27607 default:
27608 gcc_unreachable ();
27612 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
27613 per-core tuning structs. */
27614 static int
27615 arm_issue_rate (void)
27617 return current_tune->issue_rate;
27620 /* Return how many instructions should scheduler lookahead to choose the
27621 best one. */
27622 static int
27623 arm_first_cycle_multipass_dfa_lookahead (void)
27625 int issue_rate = arm_issue_rate ();
27627 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
27630 /* Enable modeling of L2 auto-prefetcher. */
27631 static int
27632 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
27634 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
27637 const char *
27638 arm_mangle_type (const_tree type)
27640 /* The ARM ABI documents (10th October 2008) say that "__va_list"
27641 has to be managled as if it is in the "std" namespace. */
27642 if (TARGET_AAPCS_BASED
27643 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
27644 return "St9__va_list";
27646 /* Half-precision float. */
27647 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
27648 return "Dh";
27650 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
27651 builtin type. */
27652 if (TYPE_NAME (type) != NULL)
27653 return arm_mangle_builtin_type (type);
27655 /* Use the default mangling. */
27656 return NULL;
27659 /* Order of allocation of core registers for Thumb: this allocation is
27660 written over the corresponding initial entries of the array
27661 initialized with REG_ALLOC_ORDER. We allocate all low registers
27662 first. Saving and restoring a low register is usually cheaper than
27663 using a call-clobbered high register. */
27665 static const int thumb_core_reg_alloc_order[] =
27667 3, 2, 1, 0, 4, 5, 6, 7,
27668 12, 14, 8, 9, 10, 11
27671 /* Adjust register allocation order when compiling for Thumb. */
27673 void
27674 arm_order_regs_for_local_alloc (void)
27676 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
27677 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
27678 if (TARGET_THUMB)
27679 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
27680 sizeof (thumb_core_reg_alloc_order));
27683 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
27685 bool
27686 arm_frame_pointer_required (void)
27688 if (SUBTARGET_FRAME_POINTER_REQUIRED)
27689 return true;
27691 /* If the function receives nonlocal gotos, it needs to save the frame
27692 pointer in the nonlocal_goto_save_area object. */
27693 if (cfun->has_nonlocal_label)
27694 return true;
27696 /* The frame pointer is required for non-leaf APCS frames. */
27697 if (TARGET_ARM && TARGET_APCS_FRAME && !crtl->is_leaf)
27698 return true;
27700 /* If we are probing the stack in the prologue, we will have a faulting
27701 instruction prior to the stack adjustment and this requires a frame
27702 pointer if we want to catch the exception using the EABI unwinder. */
27703 if (!IS_INTERRUPT (arm_current_func_type ())
27704 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK
27705 && arm_except_unwind_info (&global_options) == UI_TARGET
27706 && cfun->can_throw_non_call_exceptions)
27708 HOST_WIDE_INT size = get_frame_size ();
27710 /* That's irrelevant if there is no stack adjustment. */
27711 if (size <= 0)
27712 return false;
27714 /* That's relevant only if there is a stack probe. */
27715 if (crtl->is_leaf && !cfun->calls_alloca)
27717 /* We don't have the final size of the frame so adjust. */
27718 size += 32 * UNITS_PER_WORD;
27719 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
27720 return true;
27722 else
27723 return true;
27726 return false;
27729 /* Only thumb1 can't support conditional execution, so return true if
27730 the target is not thumb1. */
27731 static bool
27732 arm_have_conditional_execution (void)
27734 return !TARGET_THUMB1;
27737 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
27738 static HOST_WIDE_INT
27739 arm_vector_alignment (const_tree type)
27741 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
27743 if (TARGET_AAPCS_BASED)
27744 align = MIN (align, 64);
27746 return align;
27749 static unsigned int
27750 arm_autovectorize_vector_sizes (void)
27752 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
27755 static bool
27756 arm_vector_alignment_reachable (const_tree type, bool is_packed)
27758 /* Vectors which aren't in packed structures will not be less aligned than
27759 the natural alignment of their element type, so this is safe. */
27760 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27761 return !is_packed;
27763 return default_builtin_vector_alignment_reachable (type, is_packed);
27766 static bool
27767 arm_builtin_support_vector_misalignment (machine_mode mode,
27768 const_tree type, int misalignment,
27769 bool is_packed)
27771 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27773 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
27775 if (is_packed)
27776 return align == 1;
27778 /* If the misalignment is unknown, we should be able to handle the access
27779 so long as it is not to a member of a packed data structure. */
27780 if (misalignment == -1)
27781 return true;
27783 /* Return true if the misalignment is a multiple of the natural alignment
27784 of the vector's element type. This is probably always going to be
27785 true in practice, since we've already established that this isn't a
27786 packed access. */
27787 return ((misalignment % align) == 0);
27790 return default_builtin_support_vector_misalignment (mode, type, misalignment,
27791 is_packed);
27794 static void
27795 arm_conditional_register_usage (void)
27797 int regno;
27799 if (TARGET_THUMB1 && optimize_size)
27801 /* When optimizing for size on Thumb-1, it's better not
27802 to use the HI regs, because of the overhead of
27803 stacking them. */
27804 for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
27805 fixed_regs[regno] = call_used_regs[regno] = 1;
27808 /* The link register can be clobbered by any branch insn,
27809 but we have no way to track that at present, so mark
27810 it as unavailable. */
27811 if (TARGET_THUMB1)
27812 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
27814 if (TARGET_32BIT && TARGET_HARD_FLOAT)
27816 /* VFPv3 registers are disabled when earlier VFP
27817 versions are selected due to the definition of
27818 LAST_VFP_REGNUM. */
27819 for (regno = FIRST_VFP_REGNUM;
27820 regno <= LAST_VFP_REGNUM; ++ regno)
27822 fixed_regs[regno] = 0;
27823 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
27824 || regno >= FIRST_VFP_REGNUM + 32;
27828 if (TARGET_REALLY_IWMMXT)
27830 regno = FIRST_IWMMXT_GR_REGNUM;
27831 /* The 2002/10/09 revision of the XScale ABI has wCG0
27832 and wCG1 as call-preserved registers. The 2002/11/21
27833 revision changed this so that all wCG registers are
27834 scratch registers. */
27835 for (regno = FIRST_IWMMXT_GR_REGNUM;
27836 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
27837 fixed_regs[regno] = 0;
27838 /* The XScale ABI has wR0 - wR9 as scratch registers,
27839 the rest as call-preserved registers. */
27840 for (regno = FIRST_IWMMXT_REGNUM;
27841 regno <= LAST_IWMMXT_REGNUM; ++ regno)
27843 fixed_regs[regno] = 0;
27844 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
27848 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
27850 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27851 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27853 else if (TARGET_APCS_STACK)
27855 fixed_regs[10] = 1;
27856 call_used_regs[10] = 1;
27858 /* -mcaller-super-interworking reserves r11 for calls to
27859 _interwork_r11_call_via_rN(). Making the register global
27860 is an easy way of ensuring that it remains valid for all
27861 calls. */
27862 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
27863 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
27865 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27866 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27867 if (TARGET_CALLER_INTERWORKING)
27868 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27870 SUBTARGET_CONDITIONAL_REGISTER_USAGE
27873 static reg_class_t
27874 arm_preferred_rename_class (reg_class_t rclass)
27876 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
27877 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
27878 and code size can be reduced. */
27879 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
27880 return LO_REGS;
27881 else
27882 return NO_REGS;
27885 /* Compute the attribute "length" of insn "*push_multi".
27886 So this function MUST be kept in sync with that insn pattern. */
27888 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
27890 int i, regno, hi_reg;
27891 int num_saves = XVECLEN (parallel_op, 0);
27893 /* ARM mode. */
27894 if (TARGET_ARM)
27895 return 4;
27896 /* Thumb1 mode. */
27897 if (TARGET_THUMB1)
27898 return 2;
27900 /* Thumb2 mode. */
27901 regno = REGNO (first_op);
27902 /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
27903 list is 8-bit. Normally this means all registers in the list must be
27904 LO_REGS, that is (R0 -R7). If any HI_REGS used, then we must use 32-bit
27905 encodings. There is one exception for PUSH that LR in HI_REGS can be used
27906 with 16-bit encoding. */
27907 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
27908 for (i = 1; i < num_saves && !hi_reg; i++)
27910 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
27911 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
27914 if (!hi_reg)
27915 return 2;
27916 return 4;
27919 /* Compute the attribute "length" of insn. Currently, this function is used
27920 for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
27921 "*pop_multiple_with_writeback_and_return". OPERANDS is the toplevel PARALLEL
27922 rtx, RETURN_PC is true if OPERANDS contains return insn. WRITE_BACK_P is
27923 true if OPERANDS contains insn which explicit updates base register. */
27926 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
27928 /* ARM mode. */
27929 if (TARGET_ARM)
27930 return 4;
27931 /* Thumb1 mode. */
27932 if (TARGET_THUMB1)
27933 return 2;
27935 rtx parallel_op = operands[0];
27936 /* Initialize to elements number of PARALLEL. */
27937 unsigned indx = XVECLEN (parallel_op, 0) - 1;
27938 /* Initialize the value to base register. */
27939 unsigned regno = REGNO (operands[1]);
27940 /* Skip return and write back pattern.
27941 We only need register pop pattern for later analysis. */
27942 unsigned first_indx = 0;
27943 first_indx += return_pc ? 1 : 0;
27944 first_indx += write_back_p ? 1 : 0;
27946 /* A pop operation can be done through LDM or POP. If the base register is SP
27947 and if it's with write back, then a LDM will be alias of POP. */
27948 bool pop_p = (regno == SP_REGNUM && write_back_p);
27949 bool ldm_p = !pop_p;
27951 /* Check base register for LDM. */
27952 if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
27953 return 4;
27955 /* Check each register in the list. */
27956 for (; indx >= first_indx; indx--)
27958 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
27959 /* For POP, PC in HI_REGS can be used with 16-bit encoding. See similar
27960 comment in arm_attr_length_push_multi. */
27961 if (REGNO_REG_CLASS (regno) == HI_REGS
27962 && (regno != PC_REGNUM || ldm_p))
27963 return 4;
27966 return 2;
27969 /* Compute the number of instructions emitted by output_move_double. */
27971 arm_count_output_move_double_insns (rtx *operands)
27973 int count;
27974 rtx ops[2];
27975 /* output_move_double may modify the operands array, so call it
27976 here on a copy of the array. */
27977 ops[0] = operands[0];
27978 ops[1] = operands[1];
27979 output_move_double (ops, false, &count);
27980 return count;
27984 vfp3_const_double_for_fract_bits (rtx operand)
27986 REAL_VALUE_TYPE r0;
27988 if (!CONST_DOUBLE_P (operand))
27989 return 0;
27991 r0 = *CONST_DOUBLE_REAL_VALUE (operand);
27992 if (exact_real_inverse (DFmode, &r0)
27993 && !REAL_VALUE_NEGATIVE (r0))
27995 if (exact_real_truncate (DFmode, &r0))
27997 HOST_WIDE_INT value = real_to_integer (&r0);
27998 value = value & 0xffffffff;
27999 if ((value != 0) && ( (value & (value - 1)) == 0))
28001 int ret = exact_log2 (value);
28002 gcc_assert (IN_RANGE (ret, 0, 31));
28003 return ret;
28007 return 0;
28010 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
28011 log2 is in [1, 32], return that log2. Otherwise return -1.
28012 This is used in the patterns for vcvt.s32.f32 floating-point to
28013 fixed-point conversions. */
28016 vfp3_const_double_for_bits (rtx x)
28018 const REAL_VALUE_TYPE *r;
28020 if (!CONST_DOUBLE_P (x))
28021 return -1;
28023 r = CONST_DOUBLE_REAL_VALUE (x);
28025 if (REAL_VALUE_NEGATIVE (*r)
28026 || REAL_VALUE_ISNAN (*r)
28027 || REAL_VALUE_ISINF (*r)
28028 || !real_isinteger (r, SFmode))
28029 return -1;
28031 HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
28033 /* The exact_log2 above will have returned -1 if this is
28034 not an exact log2. */
28035 if (!IN_RANGE (hwint, 1, 32))
28036 return -1;
28038 return hwint;
28042 /* Emit a memory barrier around an atomic sequence according to MODEL. */
28044 static void
28045 arm_pre_atomic_barrier (enum memmodel model)
28047 if (need_atomic_barrier_p (model, true))
28048 emit_insn (gen_memory_barrier ());
28051 static void
28052 arm_post_atomic_barrier (enum memmodel model)
28054 if (need_atomic_barrier_p (model, false))
28055 emit_insn (gen_memory_barrier ());
28058 /* Emit the load-exclusive and store-exclusive instructions.
28059 Use acquire and release versions if necessary. */
28061 static void
28062 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
28064 rtx (*gen) (rtx, rtx);
28066 if (acq)
28068 switch (mode)
28070 case QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
28071 case HImode: gen = gen_arm_load_acquire_exclusivehi; break;
28072 case SImode: gen = gen_arm_load_acquire_exclusivesi; break;
28073 case DImode: gen = gen_arm_load_acquire_exclusivedi; break;
28074 default:
28075 gcc_unreachable ();
28078 else
28080 switch (mode)
28082 case QImode: gen = gen_arm_load_exclusiveqi; break;
28083 case HImode: gen = gen_arm_load_exclusivehi; break;
28084 case SImode: gen = gen_arm_load_exclusivesi; break;
28085 case DImode: gen = gen_arm_load_exclusivedi; break;
28086 default:
28087 gcc_unreachable ();
28091 emit_insn (gen (rval, mem));
28094 static void
28095 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
28096 rtx mem, bool rel)
28098 rtx (*gen) (rtx, rtx, rtx);
28100 if (rel)
28102 switch (mode)
28104 case QImode: gen = gen_arm_store_release_exclusiveqi; break;
28105 case HImode: gen = gen_arm_store_release_exclusivehi; break;
28106 case SImode: gen = gen_arm_store_release_exclusivesi; break;
28107 case DImode: gen = gen_arm_store_release_exclusivedi; break;
28108 default:
28109 gcc_unreachable ();
28112 else
28114 switch (mode)
28116 case QImode: gen = gen_arm_store_exclusiveqi; break;
28117 case HImode: gen = gen_arm_store_exclusivehi; break;
28118 case SImode: gen = gen_arm_store_exclusivesi; break;
28119 case DImode: gen = gen_arm_store_exclusivedi; break;
28120 default:
28121 gcc_unreachable ();
28125 emit_insn (gen (bval, rval, mem));
28128 /* Mark the previous jump instruction as unlikely. */
28130 static void
28131 emit_unlikely_jump (rtx insn)
28133 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
28135 rtx_insn *jump = emit_jump_insn (insn);
28136 add_int_reg_note (jump, REG_BR_PROB, very_unlikely);
28139 /* Expand a compare and swap pattern. */
28141 void
28142 arm_expand_compare_and_swap (rtx operands[])
28144 rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
28145 machine_mode mode;
28146 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx);
28148 bval = operands[0];
28149 rval = operands[1];
28150 mem = operands[2];
28151 oldval = operands[3];
28152 newval = operands[4];
28153 is_weak = operands[5];
28154 mod_s = operands[6];
28155 mod_f = operands[7];
28156 mode = GET_MODE (mem);
28158 /* Normally the succ memory model must be stronger than fail, but in the
28159 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
28160 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
28162 if (TARGET_HAVE_LDACQ
28163 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
28164 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
28165 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
28167 switch (mode)
28169 case QImode:
28170 case HImode:
28171 /* For narrow modes, we're going to perform the comparison in SImode,
28172 so do the zero-extension now. */
28173 rval = gen_reg_rtx (SImode);
28174 oldval = convert_modes (SImode, mode, oldval, true);
28175 /* FALLTHRU */
28177 case SImode:
28178 /* Force the value into a register if needed. We waited until after
28179 the zero-extension above to do this properly. */
28180 if (!arm_add_operand (oldval, SImode))
28181 oldval = force_reg (SImode, oldval);
28182 break;
28184 case DImode:
28185 if (!cmpdi_operand (oldval, mode))
28186 oldval = force_reg (mode, oldval);
28187 break;
28189 default:
28190 gcc_unreachable ();
28193 switch (mode)
28195 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
28196 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
28197 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
28198 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
28199 default:
28200 gcc_unreachable ();
28203 bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CCmode, CC_REGNUM);
28204 emit_insn (gen (bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f));
28206 if (mode == QImode || mode == HImode)
28207 emit_move_insn (operands[1], gen_lowpart (mode, rval));
28209 /* In all cases, we arrange for success to be signaled by Z set.
28210 This arrangement allows for the boolean result to be used directly
28211 in a subsequent branch, post optimization. For Thumb-1 targets, the
28212 boolean negation of the result is also stored in bval because Thumb-1
28213 backend lacks dependency tracking for CC flag due to flag-setting not
28214 being represented at RTL level. */
28215 if (TARGET_THUMB1)
28216 emit_insn (gen_cstoresi_eq0_thumb1 (bval, bdst));
28217 else
28219 x = gen_rtx_EQ (SImode, bdst, const0_rtx);
28220 emit_insn (gen_rtx_SET (bval, x));
28224 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
28225 another memory store between the load-exclusive and store-exclusive can
28226 reset the monitor from Exclusive to Open state. This means we must wait
28227 until after reload to split the pattern, lest we get a register spill in
28228 the middle of the atomic sequence. Success of the compare and swap is
28229 indicated by the Z flag set for 32bit targets and by neg_bval being zero
28230 for Thumb-1 targets (ie. negation of the boolean value returned by
28231 atomic_compare_and_swapmode standard pattern in operand 0). */
28233 void
28234 arm_split_compare_and_swap (rtx operands[])
28236 rtx rval, mem, oldval, newval, neg_bval;
28237 machine_mode mode;
28238 enum memmodel mod_s, mod_f;
28239 bool is_weak;
28240 rtx_code_label *label1, *label2;
28241 rtx x, cond;
28243 rval = operands[1];
28244 mem = operands[2];
28245 oldval = operands[3];
28246 newval = operands[4];
28247 is_weak = (operands[5] != const0_rtx);
28248 mod_s = memmodel_from_int (INTVAL (operands[6]));
28249 mod_f = memmodel_from_int (INTVAL (operands[7]));
28250 neg_bval = TARGET_THUMB1 ? operands[0] : operands[8];
28251 mode = GET_MODE (mem);
28253 bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
28255 bool use_acquire = TARGET_HAVE_LDACQ
28256 && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28257 || is_mm_release (mod_s));
28259 bool use_release = TARGET_HAVE_LDACQ
28260 && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28261 || is_mm_acquire (mod_s));
28263 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
28264 a full barrier is emitted after the store-release. */
28265 if (is_armv8_sync)
28266 use_acquire = false;
28268 /* Checks whether a barrier is needed and emits one accordingly. */
28269 if (!(use_acquire || use_release))
28270 arm_pre_atomic_barrier (mod_s);
28272 label1 = NULL;
28273 if (!is_weak)
28275 label1 = gen_label_rtx ();
28276 emit_label (label1);
28278 label2 = gen_label_rtx ();
28280 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
28282 /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
28283 as required to communicate with arm_expand_compare_and_swap. */
28284 if (TARGET_32BIT)
28286 cond = arm_gen_compare_reg (NE, rval, oldval, neg_bval);
28287 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28288 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
28289 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
28290 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
28292 else
28294 emit_move_insn (neg_bval, const1_rtx);
28295 cond = gen_rtx_NE (VOIDmode, rval, oldval);
28296 if (thumb1_cmpneg_operand (oldval, SImode))
28297 emit_unlikely_jump (gen_cbranchsi4_scratch (neg_bval, rval, oldval,
28298 label2, cond));
28299 else
28300 emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2));
28303 arm_emit_store_exclusive (mode, neg_bval, mem, newval, use_release);
28305 /* Weak or strong, we want EQ to be true for success, so that we
28306 match the flags that we got from the compare above. */
28307 if (TARGET_32BIT)
28309 cond = gen_rtx_REG (CCmode, CC_REGNUM);
28310 x = gen_rtx_COMPARE (CCmode, neg_bval, const0_rtx);
28311 emit_insn (gen_rtx_SET (cond, x));
28314 if (!is_weak)
28316 /* Z is set to boolean value of !neg_bval, as required to communicate
28317 with arm_expand_compare_and_swap. */
28318 x = gen_rtx_NE (VOIDmode, neg_bval, const0_rtx);
28319 emit_unlikely_jump (gen_cbranchsi4 (x, neg_bval, const0_rtx, label1));
28322 if (!is_mm_relaxed (mod_f))
28323 emit_label (label2);
28325 /* Checks whether a barrier is needed and emits one accordingly. */
28326 if (is_armv8_sync
28327 || !(use_acquire || use_release))
28328 arm_post_atomic_barrier (mod_s);
28330 if (is_mm_relaxed (mod_f))
28331 emit_label (label2);
28334 /* Split an atomic operation pattern. Operation is given by CODE and is one
28335 of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
28336 operation). Operation is performed on the content at MEM and on VALUE
28337 following the memory model MODEL_RTX. The content at MEM before and after
28338 the operation is returned in OLD_OUT and NEW_OUT respectively while the
28339 success of the operation is returned in COND. Using a scratch register or
28340 an operand register for these determines what result is returned for that
28341 pattern. */
28343 void
28344 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
28345 rtx value, rtx model_rtx, rtx cond)
28347 enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
28348 machine_mode mode = GET_MODE (mem);
28349 machine_mode wmode = (mode == DImode ? DImode : SImode);
28350 rtx_code_label *label;
28351 bool all_low_regs, bind_old_new;
28352 rtx x;
28354 bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
28356 bool use_acquire = TARGET_HAVE_LDACQ
28357 && !(is_mm_relaxed (model) || is_mm_consume (model)
28358 || is_mm_release (model));
28360 bool use_release = TARGET_HAVE_LDACQ
28361 && !(is_mm_relaxed (model) || is_mm_consume (model)
28362 || is_mm_acquire (model));
28364 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
28365 a full barrier is emitted after the store-release. */
28366 if (is_armv8_sync)
28367 use_acquire = false;
28369 /* Checks whether a barrier is needed and emits one accordingly. */
28370 if (!(use_acquire || use_release))
28371 arm_pre_atomic_barrier (model);
28373 label = gen_label_rtx ();
28374 emit_label (label);
28376 if (new_out)
28377 new_out = gen_lowpart (wmode, new_out);
28378 if (old_out)
28379 old_out = gen_lowpart (wmode, old_out);
28380 else
28381 old_out = new_out;
28382 value = simplify_gen_subreg (wmode, value, mode, 0);
28384 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
28386 /* Does the operation require destination and first operand to use the same
28387 register? This is decided by register constraints of relevant insn
28388 patterns in thumb1.md. */
28389 gcc_assert (!new_out || REG_P (new_out));
28390 all_low_regs = REG_P (value) && REGNO_REG_CLASS (REGNO (value)) == LO_REGS
28391 && new_out && REGNO_REG_CLASS (REGNO (new_out)) == LO_REGS
28392 && REGNO_REG_CLASS (REGNO (old_out)) == LO_REGS;
28393 bind_old_new =
28394 (TARGET_THUMB1
28395 && code != SET
28396 && code != MINUS
28397 && (code != PLUS || (!all_low_regs && !satisfies_constraint_L (value))));
28399 /* We want to return the old value while putting the result of the operation
28400 in the same register as the old value so copy the old value over to the
28401 destination register and use that register for the operation. */
28402 if (old_out && bind_old_new)
28404 emit_move_insn (new_out, old_out);
28405 old_out = new_out;
28408 switch (code)
28410 case SET:
28411 new_out = value;
28412 break;
28414 case NOT:
28415 x = gen_rtx_AND (wmode, old_out, value);
28416 emit_insn (gen_rtx_SET (new_out, x));
28417 x = gen_rtx_NOT (wmode, new_out);
28418 emit_insn (gen_rtx_SET (new_out, x));
28419 break;
28421 case MINUS:
28422 if (CONST_INT_P (value))
28424 value = GEN_INT (-INTVAL (value));
28425 code = PLUS;
28427 /* FALLTHRU */
28429 case PLUS:
28430 if (mode == DImode)
28432 /* DImode plus/minus need to clobber flags. */
28433 /* The adddi3 and subdi3 patterns are incorrectly written so that
28434 they require matching operands, even when we could easily support
28435 three operands. Thankfully, this can be fixed up post-splitting,
28436 as the individual add+adc patterns do accept three operands and
28437 post-reload cprop can make these moves go away. */
28438 emit_move_insn (new_out, old_out);
28439 if (code == PLUS)
28440 x = gen_adddi3 (new_out, new_out, value);
28441 else
28442 x = gen_subdi3 (new_out, new_out, value);
28443 emit_insn (x);
28444 break;
28446 /* FALLTHRU */
28448 default:
28449 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
28450 emit_insn (gen_rtx_SET (new_out, x));
28451 break;
28454 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
28455 use_release);
28457 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28458 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
28460 /* Checks whether a barrier is needed and emits one accordingly. */
28461 if (is_armv8_sync
28462 || !(use_acquire || use_release))
28463 arm_post_atomic_barrier (model);
28466 #define MAX_VECT_LEN 16
28468 struct expand_vec_perm_d
28470 rtx target, op0, op1;
28471 unsigned char perm[MAX_VECT_LEN];
28472 machine_mode vmode;
28473 unsigned char nelt;
28474 bool one_vector_p;
28475 bool testing_p;
28478 /* Generate a variable permutation. */
28480 static void
28481 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
28483 machine_mode vmode = GET_MODE (target);
28484 bool one_vector_p = rtx_equal_p (op0, op1);
28486 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
28487 gcc_checking_assert (GET_MODE (op0) == vmode);
28488 gcc_checking_assert (GET_MODE (op1) == vmode);
28489 gcc_checking_assert (GET_MODE (sel) == vmode);
28490 gcc_checking_assert (TARGET_NEON);
28492 if (one_vector_p)
28494 if (vmode == V8QImode)
28495 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
28496 else
28497 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
28499 else
28501 rtx pair;
28503 if (vmode == V8QImode)
28505 pair = gen_reg_rtx (V16QImode);
28506 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
28507 pair = gen_lowpart (TImode, pair);
28508 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
28510 else
28512 pair = gen_reg_rtx (OImode);
28513 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
28514 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
28519 void
28520 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
28522 machine_mode vmode = GET_MODE (target);
28523 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
28524 bool one_vector_p = rtx_equal_p (op0, op1);
28525 rtx rmask[MAX_VECT_LEN], mask;
28527 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28528 numbering of elements for big-endian, we must reverse the order. */
28529 gcc_checking_assert (!BYTES_BIG_ENDIAN);
28531 /* The VTBL instruction does not use a modulo index, so we must take care
28532 of that ourselves. */
28533 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
28534 for (i = 0; i < nelt; ++i)
28535 rmask[i] = mask;
28536 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
28537 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
28539 arm_expand_vec_perm_1 (target, op0, op1, sel);
28542 /* Map lane ordering between architectural lane order, and GCC lane order,
28543 taking into account ABI. See comment above output_move_neon for details. */
28545 static int
28546 neon_endian_lane_map (machine_mode mode, int lane)
28548 if (BYTES_BIG_ENDIAN)
28550 int nelems = GET_MODE_NUNITS (mode);
28551 /* Reverse lane order. */
28552 lane = (nelems - 1 - lane);
28553 /* Reverse D register order, to match ABI. */
28554 if (GET_MODE_SIZE (mode) == 16)
28555 lane = lane ^ (nelems / 2);
28557 return lane;
28560 /* Some permutations index into pairs of vectors, this is a helper function
28561 to map indexes into those pairs of vectors. */
28563 static int
28564 neon_pair_endian_lane_map (machine_mode mode, int lane)
28566 int nelem = GET_MODE_NUNITS (mode);
28567 if (BYTES_BIG_ENDIAN)
28568 lane =
28569 neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
28570 return lane;
28573 /* Generate or test for an insn that supports a constant permutation. */
28575 /* Recognize patterns for the VUZP insns. */
28577 static bool
28578 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
28580 unsigned int i, odd, mask, nelt = d->nelt;
28581 rtx out0, out1, in0, in1;
28582 rtx (*gen)(rtx, rtx, rtx, rtx);
28583 int first_elem;
28584 int swap_nelt;
28586 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28587 return false;
28589 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
28590 big endian pattern on 64 bit vectors, so we correct for that. */
28591 swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
28592 && GET_MODE_SIZE (d->vmode) == 8 ? d->nelt : 0;
28594 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
28596 if (first_elem == neon_endian_lane_map (d->vmode, 0))
28597 odd = 0;
28598 else if (first_elem == neon_endian_lane_map (d->vmode, 1))
28599 odd = 1;
28600 else
28601 return false;
28602 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28604 for (i = 0; i < nelt; i++)
28606 unsigned elt =
28607 (neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
28608 if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
28609 return false;
28612 /* Success! */
28613 if (d->testing_p)
28614 return true;
28616 switch (d->vmode)
28618 case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
28619 case V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
28620 case V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
28621 case V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
28622 case V8HFmode: gen = gen_neon_vuzpv8hf_internal; break;
28623 case V4HFmode: gen = gen_neon_vuzpv4hf_internal; break;
28624 case V4SImode: gen = gen_neon_vuzpv4si_internal; break;
28625 case V2SImode: gen = gen_neon_vuzpv2si_internal; break;
28626 case V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
28627 case V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
28628 default:
28629 gcc_unreachable ();
28632 in0 = d->op0;
28633 in1 = d->op1;
28634 if (swap_nelt != 0)
28635 std::swap (in0, in1);
28637 out0 = d->target;
28638 out1 = gen_reg_rtx (d->vmode);
28639 if (odd)
28640 std::swap (out0, out1);
28642 emit_insn (gen (out0, in0, in1, out1));
28643 return true;
28646 /* Recognize patterns for the VZIP insns. */
28648 static bool
28649 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
28651 unsigned int i, high, mask, nelt = d->nelt;
28652 rtx out0, out1, in0, in1;
28653 rtx (*gen)(rtx, rtx, rtx, rtx);
28654 int first_elem;
28655 bool is_swapped;
28657 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28658 return false;
28660 is_swapped = BYTES_BIG_ENDIAN;
28662 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
28664 high = nelt / 2;
28665 if (first_elem == neon_endian_lane_map (d->vmode, high))
28667 else if (first_elem == neon_endian_lane_map (d->vmode, 0))
28668 high = 0;
28669 else
28670 return false;
28671 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28673 for (i = 0; i < nelt / 2; i++)
28675 unsigned elt =
28676 neon_pair_endian_lane_map (d->vmode, i + high) & mask;
28677 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
28678 != elt)
28679 return false;
28680 elt =
28681 neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
28682 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
28683 != elt)
28684 return false;
28687 /* Success! */
28688 if (d->testing_p)
28689 return true;
28691 switch (d->vmode)
28693 case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
28694 case V8QImode: gen = gen_neon_vzipv8qi_internal; break;
28695 case V8HImode: gen = gen_neon_vzipv8hi_internal; break;
28696 case V4HImode: gen = gen_neon_vzipv4hi_internal; break;
28697 case V8HFmode: gen = gen_neon_vzipv8hf_internal; break;
28698 case V4HFmode: gen = gen_neon_vzipv4hf_internal; break;
28699 case V4SImode: gen = gen_neon_vzipv4si_internal; break;
28700 case V2SImode: gen = gen_neon_vzipv2si_internal; break;
28701 case V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
28702 case V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
28703 default:
28704 gcc_unreachable ();
28707 in0 = d->op0;
28708 in1 = d->op1;
28709 if (is_swapped)
28710 std::swap (in0, in1);
28712 out0 = d->target;
28713 out1 = gen_reg_rtx (d->vmode);
28714 if (high)
28715 std::swap (out0, out1);
28717 emit_insn (gen (out0, in0, in1, out1));
28718 return true;
28721 /* Recognize patterns for the VREV insns. */
28723 static bool
28724 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
28726 unsigned int i, j, diff, nelt = d->nelt;
28727 rtx (*gen)(rtx, rtx);
28729 if (!d->one_vector_p)
28730 return false;
28732 diff = d->perm[0];
28733 switch (diff)
28735 case 7:
28736 switch (d->vmode)
28738 case V16QImode: gen = gen_neon_vrev64v16qi; break;
28739 case V8QImode: gen = gen_neon_vrev64v8qi; break;
28740 default:
28741 return false;
28743 break;
28744 case 3:
28745 switch (d->vmode)
28747 case V16QImode: gen = gen_neon_vrev32v16qi; break;
28748 case V8QImode: gen = gen_neon_vrev32v8qi; break;
28749 case V8HImode: gen = gen_neon_vrev64v8hi; break;
28750 case V4HImode: gen = gen_neon_vrev64v4hi; break;
28751 case V8HFmode: gen = gen_neon_vrev64v8hf; break;
28752 case V4HFmode: gen = gen_neon_vrev64v4hf; break;
28753 default:
28754 return false;
28756 break;
28757 case 1:
28758 switch (d->vmode)
28760 case V16QImode: gen = gen_neon_vrev16v16qi; break;
28761 case V8QImode: gen = gen_neon_vrev16v8qi; break;
28762 case V8HImode: gen = gen_neon_vrev32v8hi; break;
28763 case V4HImode: gen = gen_neon_vrev32v4hi; break;
28764 case V4SImode: gen = gen_neon_vrev64v4si; break;
28765 case V2SImode: gen = gen_neon_vrev64v2si; break;
28766 case V4SFmode: gen = gen_neon_vrev64v4sf; break;
28767 case V2SFmode: gen = gen_neon_vrev64v2sf; break;
28768 default:
28769 return false;
28771 break;
28772 default:
28773 return false;
28776 for (i = 0; i < nelt ; i += diff + 1)
28777 for (j = 0; j <= diff; j += 1)
28779 /* This is guaranteed to be true as the value of diff
28780 is 7, 3, 1 and we should have enough elements in the
28781 queue to generate this. Getting a vector mask with a
28782 value of diff other than these values implies that
28783 something is wrong by the time we get here. */
28784 gcc_assert (i + j < nelt);
28785 if (d->perm[i + j] != i + diff - j)
28786 return false;
28789 /* Success! */
28790 if (d->testing_p)
28791 return true;
28793 emit_insn (gen (d->target, d->op0));
28794 return true;
28797 /* Recognize patterns for the VTRN insns. */
28799 static bool
28800 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
28802 unsigned int i, odd, mask, nelt = d->nelt;
28803 rtx out0, out1, in0, in1;
28804 rtx (*gen)(rtx, rtx, rtx, rtx);
28806 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28807 return false;
28809 /* Note that these are little-endian tests. Adjust for big-endian later. */
28810 if (d->perm[0] == 0)
28811 odd = 0;
28812 else if (d->perm[0] == 1)
28813 odd = 1;
28814 else
28815 return false;
28816 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28818 for (i = 0; i < nelt; i += 2)
28820 if (d->perm[i] != i + odd)
28821 return false;
28822 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
28823 return false;
28826 /* Success! */
28827 if (d->testing_p)
28828 return true;
28830 switch (d->vmode)
28832 case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
28833 case V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
28834 case V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
28835 case V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
28836 case V8HFmode: gen = gen_neon_vtrnv8hf_internal; break;
28837 case V4HFmode: gen = gen_neon_vtrnv4hf_internal; break;
28838 case V4SImode: gen = gen_neon_vtrnv4si_internal; break;
28839 case V2SImode: gen = gen_neon_vtrnv2si_internal; break;
28840 case V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
28841 case V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
28842 default:
28843 gcc_unreachable ();
28846 in0 = d->op0;
28847 in1 = d->op1;
28848 if (BYTES_BIG_ENDIAN)
28850 std::swap (in0, in1);
28851 odd = !odd;
28854 out0 = d->target;
28855 out1 = gen_reg_rtx (d->vmode);
28856 if (odd)
28857 std::swap (out0, out1);
28859 emit_insn (gen (out0, in0, in1, out1));
28860 return true;
28863 /* Recognize patterns for the VEXT insns. */
28865 static bool
28866 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
28868 unsigned int i, nelt = d->nelt;
28869 rtx (*gen) (rtx, rtx, rtx, rtx);
28870 rtx offset;
28872 unsigned int location;
28874 unsigned int next = d->perm[0] + 1;
28876 /* TODO: Handle GCC's numbering of elements for big-endian. */
28877 if (BYTES_BIG_ENDIAN)
28878 return false;
28880 /* Check if the extracted indexes are increasing by one. */
28881 for (i = 1; i < nelt; next++, i++)
28883 /* If we hit the most significant element of the 2nd vector in
28884 the previous iteration, no need to test further. */
28885 if (next == 2 * nelt)
28886 return false;
28888 /* If we are operating on only one vector: it could be a
28889 rotation. If there are only two elements of size < 64, let
28890 arm_evpc_neon_vrev catch it. */
28891 if (d->one_vector_p && (next == nelt))
28893 if ((nelt == 2) && (d->vmode != V2DImode))
28894 return false;
28895 else
28896 next = 0;
28899 if (d->perm[i] != next)
28900 return false;
28903 location = d->perm[0];
28905 switch (d->vmode)
28907 case V16QImode: gen = gen_neon_vextv16qi; break;
28908 case V8QImode: gen = gen_neon_vextv8qi; break;
28909 case V4HImode: gen = gen_neon_vextv4hi; break;
28910 case V8HImode: gen = gen_neon_vextv8hi; break;
28911 case V2SImode: gen = gen_neon_vextv2si; break;
28912 case V4SImode: gen = gen_neon_vextv4si; break;
28913 case V4HFmode: gen = gen_neon_vextv4hf; break;
28914 case V8HFmode: gen = gen_neon_vextv8hf; break;
28915 case V2SFmode: gen = gen_neon_vextv2sf; break;
28916 case V4SFmode: gen = gen_neon_vextv4sf; break;
28917 case V2DImode: gen = gen_neon_vextv2di; break;
28918 default:
28919 return false;
28922 /* Success! */
28923 if (d->testing_p)
28924 return true;
28926 offset = GEN_INT (location);
28927 emit_insn (gen (d->target, d->op0, d->op1, offset));
28928 return true;
28931 /* The NEON VTBL instruction is a fully variable permuation that's even
28932 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
28933 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
28934 can do slightly better by expanding this as a constant where we don't
28935 have to apply a mask. */
28937 static bool
28938 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
28940 rtx rperm[MAX_VECT_LEN], sel;
28941 machine_mode vmode = d->vmode;
28942 unsigned int i, nelt = d->nelt;
28944 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28945 numbering of elements for big-endian, we must reverse the order. */
28946 if (BYTES_BIG_ENDIAN)
28947 return false;
28949 if (d->testing_p)
28950 return true;
28952 /* Generic code will try constant permutation twice. Once with the
28953 original mode and again with the elements lowered to QImode.
28954 So wait and don't do the selector expansion ourselves. */
28955 if (vmode != V8QImode && vmode != V16QImode)
28956 return false;
28958 for (i = 0; i < nelt; ++i)
28959 rperm[i] = GEN_INT (d->perm[i]);
28960 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
28961 sel = force_reg (vmode, sel);
28963 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
28964 return true;
28967 static bool
28968 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
28970 /* Check if the input mask matches vext before reordering the
28971 operands. */
28972 if (TARGET_NEON)
28973 if (arm_evpc_neon_vext (d))
28974 return true;
28976 /* The pattern matching functions above are written to look for a small
28977 number to begin the sequence (0, 1, N/2). If we begin with an index
28978 from the second operand, we can swap the operands. */
28979 if (d->perm[0] >= d->nelt)
28981 unsigned i, nelt = d->nelt;
28983 for (i = 0; i < nelt; ++i)
28984 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
28986 std::swap (d->op0, d->op1);
28989 if (TARGET_NEON)
28991 if (arm_evpc_neon_vuzp (d))
28992 return true;
28993 if (arm_evpc_neon_vzip (d))
28994 return true;
28995 if (arm_evpc_neon_vrev (d))
28996 return true;
28997 if (arm_evpc_neon_vtrn (d))
28998 return true;
28999 return arm_evpc_neon_vtbl (d);
29001 return false;
29004 /* Expand a vec_perm_const pattern. */
29006 bool
29007 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
29009 struct expand_vec_perm_d d;
29010 int i, nelt, which;
29012 d.target = target;
29013 d.op0 = op0;
29014 d.op1 = op1;
29016 d.vmode = GET_MODE (target);
29017 gcc_assert (VECTOR_MODE_P (d.vmode));
29018 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
29019 d.testing_p = false;
29021 for (i = which = 0; i < nelt; ++i)
29023 rtx e = XVECEXP (sel, 0, i);
29024 int ei = INTVAL (e) & (2 * nelt - 1);
29025 which |= (ei < nelt ? 1 : 2);
29026 d.perm[i] = ei;
29029 switch (which)
29031 default:
29032 gcc_unreachable();
29034 case 3:
29035 d.one_vector_p = false;
29036 if (!rtx_equal_p (op0, op1))
29037 break;
29039 /* The elements of PERM do not suggest that only the first operand
29040 is used, but both operands are identical. Allow easier matching
29041 of the permutation by folding the permutation into the single
29042 input vector. */
29043 /* FALLTHRU */
29044 case 2:
29045 for (i = 0; i < nelt; ++i)
29046 d.perm[i] &= nelt - 1;
29047 d.op0 = op1;
29048 d.one_vector_p = true;
29049 break;
29051 case 1:
29052 d.op1 = op0;
29053 d.one_vector_p = true;
29054 break;
29057 return arm_expand_vec_perm_const_1 (&d);
29060 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
29062 static bool
29063 arm_vectorize_vec_perm_const_ok (machine_mode vmode,
29064 const unsigned char *sel)
29066 struct expand_vec_perm_d d;
29067 unsigned int i, nelt, which;
29068 bool ret;
29070 d.vmode = vmode;
29071 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
29072 d.testing_p = true;
29073 memcpy (d.perm, sel, nelt);
29075 /* Categorize the set of elements in the selector. */
29076 for (i = which = 0; i < nelt; ++i)
29078 unsigned char e = d.perm[i];
29079 gcc_assert (e < 2 * nelt);
29080 which |= (e < nelt ? 1 : 2);
29083 /* For all elements from second vector, fold the elements to first. */
29084 if (which == 2)
29085 for (i = 0; i < nelt; ++i)
29086 d.perm[i] -= nelt;
29088 /* Check whether the mask can be applied to the vector type. */
29089 d.one_vector_p = (which != 3);
29091 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
29092 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
29093 if (!d.one_vector_p)
29094 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
29096 start_sequence ();
29097 ret = arm_expand_vec_perm_const_1 (&d);
29098 end_sequence ();
29100 return ret;
29103 bool
29104 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
29106 /* If we are soft float and we do not have ldrd
29107 then all auto increment forms are ok. */
29108 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
29109 return true;
29111 switch (code)
29113 /* Post increment and Pre Decrement are supported for all
29114 instruction forms except for vector forms. */
29115 case ARM_POST_INC:
29116 case ARM_PRE_DEC:
29117 if (VECTOR_MODE_P (mode))
29119 if (code != ARM_PRE_DEC)
29120 return true;
29121 else
29122 return false;
29125 return true;
29127 case ARM_POST_DEC:
29128 case ARM_PRE_INC:
29129 /* Without LDRD and mode size greater than
29130 word size, there is no point in auto-incrementing
29131 because ldm and stm will not have these forms. */
29132 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
29133 return false;
29135 /* Vector and floating point modes do not support
29136 these auto increment forms. */
29137 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
29138 return false;
29140 return true;
29142 default:
29143 return false;
29147 return false;
29150 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
29151 on ARM, since we know that shifts by negative amounts are no-ops.
29152 Additionally, the default expansion code is not available or suitable
29153 for post-reload insn splits (this can occur when the register allocator
29154 chooses not to do a shift in NEON).
29156 This function is used in both initial expand and post-reload splits, and
29157 handles all kinds of 64-bit shifts.
29159 Input requirements:
29160 - It is safe for the input and output to be the same register, but
29161 early-clobber rules apply for the shift amount and scratch registers.
29162 - Shift by register requires both scratch registers. In all other cases
29163 the scratch registers may be NULL.
29164 - Ashiftrt by a register also clobbers the CC register. */
29165 void
29166 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
29167 rtx amount, rtx scratch1, rtx scratch2)
29169 rtx out_high = gen_highpart (SImode, out);
29170 rtx out_low = gen_lowpart (SImode, out);
29171 rtx in_high = gen_highpart (SImode, in);
29172 rtx in_low = gen_lowpart (SImode, in);
29174 /* Terminology:
29175 in = the register pair containing the input value.
29176 out = the destination register pair.
29177 up = the high- or low-part of each pair.
29178 down = the opposite part to "up".
29179 In a shift, we can consider bits to shift from "up"-stream to
29180 "down"-stream, so in a left-shift "up" is the low-part and "down"
29181 is the high-part of each register pair. */
29183 rtx out_up = code == ASHIFT ? out_low : out_high;
29184 rtx out_down = code == ASHIFT ? out_high : out_low;
29185 rtx in_up = code == ASHIFT ? in_low : in_high;
29186 rtx in_down = code == ASHIFT ? in_high : in_low;
29188 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
29189 gcc_assert (out
29190 && (REG_P (out) || GET_CODE (out) == SUBREG)
29191 && GET_MODE (out) == DImode);
29192 gcc_assert (in
29193 && (REG_P (in) || GET_CODE (in) == SUBREG)
29194 && GET_MODE (in) == DImode);
29195 gcc_assert (amount
29196 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
29197 && GET_MODE (amount) == SImode)
29198 || CONST_INT_P (amount)));
29199 gcc_assert (scratch1 == NULL
29200 || (GET_CODE (scratch1) == SCRATCH)
29201 || (GET_MODE (scratch1) == SImode
29202 && REG_P (scratch1)));
29203 gcc_assert (scratch2 == NULL
29204 || (GET_CODE (scratch2) == SCRATCH)
29205 || (GET_MODE (scratch2) == SImode
29206 && REG_P (scratch2)));
29207 gcc_assert (!REG_P (out) || !REG_P (amount)
29208 || !HARD_REGISTER_P (out)
29209 || (REGNO (out) != REGNO (amount)
29210 && REGNO (out) + 1 != REGNO (amount)));
29212 /* Macros to make following code more readable. */
29213 #define SUB_32(DEST,SRC) \
29214 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
29215 #define RSB_32(DEST,SRC) \
29216 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
29217 #define SUB_S_32(DEST,SRC) \
29218 gen_addsi3_compare0 ((DEST), (SRC), \
29219 GEN_INT (-32))
29220 #define SET(DEST,SRC) \
29221 gen_rtx_SET ((DEST), (SRC))
29222 #define SHIFT(CODE,SRC,AMOUNT) \
29223 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
29224 #define LSHIFT(CODE,SRC,AMOUNT) \
29225 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
29226 SImode, (SRC), (AMOUNT))
29227 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
29228 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
29229 SImode, (SRC), (AMOUNT))
29230 #define ORR(A,B) \
29231 gen_rtx_IOR (SImode, (A), (B))
29232 #define BRANCH(COND,LABEL) \
29233 gen_arm_cond_branch ((LABEL), \
29234 gen_rtx_ ## COND (CCmode, cc_reg, \
29235 const0_rtx), \
29236 cc_reg)
29238 /* Shifts by register and shifts by constant are handled separately. */
29239 if (CONST_INT_P (amount))
29241 /* We have a shift-by-constant. */
29243 /* First, handle out-of-range shift amounts.
29244 In both cases we try to match the result an ARM instruction in a
29245 shift-by-register would give. This helps reduce execution
29246 differences between optimization levels, but it won't stop other
29247 parts of the compiler doing different things. This is "undefined
29248 behavior, in any case. */
29249 if (INTVAL (amount) <= 0)
29250 emit_insn (gen_movdi (out, in));
29251 else if (INTVAL (amount) >= 64)
29253 if (code == ASHIFTRT)
29255 rtx const31_rtx = GEN_INT (31);
29256 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
29257 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
29259 else
29260 emit_insn (gen_movdi (out, const0_rtx));
29263 /* Now handle valid shifts. */
29264 else if (INTVAL (amount) < 32)
29266 /* Shifts by a constant less than 32. */
29267 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
29269 /* Clearing the out register in DImode first avoids lots
29270 of spilling and results in less stack usage.
29271 Later this redundant insn is completely removed.
29272 Do that only if "in" and "out" are different registers. */
29273 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29274 emit_insn (SET (out, const0_rtx));
29275 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29276 emit_insn (SET (out_down,
29277 ORR (REV_LSHIFT (code, in_up, reverse_amount),
29278 out_down)));
29279 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29281 else
29283 /* Shifts by a constant greater than 31. */
29284 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
29286 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29287 emit_insn (SET (out, const0_rtx));
29288 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
29289 if (code == ASHIFTRT)
29290 emit_insn (gen_ashrsi3 (out_up, in_up,
29291 GEN_INT (31)));
29292 else
29293 emit_insn (SET (out_up, const0_rtx));
29296 else
29298 /* We have a shift-by-register. */
29299 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
29301 /* This alternative requires the scratch registers. */
29302 gcc_assert (scratch1 && REG_P (scratch1));
29303 gcc_assert (scratch2 && REG_P (scratch2));
29305 /* We will need the values "amount-32" and "32-amount" later.
29306 Swapping them around now allows the later code to be more general. */
29307 switch (code)
29309 case ASHIFT:
29310 emit_insn (SUB_32 (scratch1, amount));
29311 emit_insn (RSB_32 (scratch2, amount));
29312 break;
29313 case ASHIFTRT:
29314 emit_insn (RSB_32 (scratch1, amount));
29315 /* Also set CC = amount > 32. */
29316 emit_insn (SUB_S_32 (scratch2, amount));
29317 break;
29318 case LSHIFTRT:
29319 emit_insn (RSB_32 (scratch1, amount));
29320 emit_insn (SUB_32 (scratch2, amount));
29321 break;
29322 default:
29323 gcc_unreachable ();
29326 /* Emit code like this:
29328 arithmetic-left:
29329 out_down = in_down << amount;
29330 out_down = (in_up << (amount - 32)) | out_down;
29331 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
29332 out_up = in_up << amount;
29334 arithmetic-right:
29335 out_down = in_down >> amount;
29336 out_down = (in_up << (32 - amount)) | out_down;
29337 if (amount < 32)
29338 out_down = ((signed)in_up >> (amount - 32)) | out_down;
29339 out_up = in_up << amount;
29341 logical-right:
29342 out_down = in_down >> amount;
29343 out_down = (in_up << (32 - amount)) | out_down;
29344 if (amount < 32)
29345 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
29346 out_up = in_up << amount;
29348 The ARM and Thumb2 variants are the same but implemented slightly
29349 differently. If this were only called during expand we could just
29350 use the Thumb2 case and let combine do the right thing, but this
29351 can also be called from post-reload splitters. */
29353 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29355 if (!TARGET_THUMB2)
29357 /* Emit code for ARM mode. */
29358 emit_insn (SET (out_down,
29359 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
29360 if (code == ASHIFTRT)
29362 rtx_code_label *done_label = gen_label_rtx ();
29363 emit_jump_insn (BRANCH (LT, done_label));
29364 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
29365 out_down)));
29366 emit_label (done_label);
29368 else
29369 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
29370 out_down)));
29372 else
29374 /* Emit code for Thumb2 mode.
29375 Thumb2 can't do shift and or in one insn. */
29376 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
29377 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
29379 if (code == ASHIFTRT)
29381 rtx_code_label *done_label = gen_label_rtx ();
29382 emit_jump_insn (BRANCH (LT, done_label));
29383 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
29384 emit_insn (SET (out_down, ORR (out_down, scratch2)));
29385 emit_label (done_label);
29387 else
29389 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
29390 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
29394 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29397 #undef SUB_32
29398 #undef RSB_32
29399 #undef SUB_S_32
29400 #undef SET
29401 #undef SHIFT
29402 #undef LSHIFT
29403 #undef REV_LSHIFT
29404 #undef ORR
29405 #undef BRANCH
29408 /* Returns true if the pattern is a valid symbolic address, which is either a
29409 symbol_ref or (symbol_ref + addend).
29411 According to the ARM ELF ABI, the initial addend of REL-type relocations
29412 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
29413 literal field of the instruction as a 16-bit signed value in the range
29414 -32768 <= A < 32768. */
29416 bool
29417 arm_valid_symbolic_address_p (rtx addr)
29419 rtx xop0, xop1 = NULL_RTX;
29420 rtx tmp = addr;
29422 if (GET_CODE (tmp) == SYMBOL_REF || GET_CODE (tmp) == LABEL_REF)
29423 return true;
29425 /* (const (plus: symbol_ref const_int)) */
29426 if (GET_CODE (addr) == CONST)
29427 tmp = XEXP (addr, 0);
29429 if (GET_CODE (tmp) == PLUS)
29431 xop0 = XEXP (tmp, 0);
29432 xop1 = XEXP (tmp, 1);
29434 if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
29435 return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
29438 return false;
29441 /* Returns true if a valid comparison operation and makes
29442 the operands in a form that is valid. */
29443 bool
29444 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
29446 enum rtx_code code = GET_CODE (*comparison);
29447 int code_int;
29448 machine_mode mode = (GET_MODE (*op1) == VOIDmode)
29449 ? GET_MODE (*op2) : GET_MODE (*op1);
29451 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
29453 if (code == UNEQ || code == LTGT)
29454 return false;
29456 code_int = (int)code;
29457 arm_canonicalize_comparison (&code_int, op1, op2, 0);
29458 PUT_CODE (*comparison, (enum rtx_code)code_int);
29460 switch (mode)
29462 case SImode:
29463 if (!arm_add_operand (*op1, mode))
29464 *op1 = force_reg (mode, *op1);
29465 if (!arm_add_operand (*op2, mode))
29466 *op2 = force_reg (mode, *op2);
29467 return true;
29469 case DImode:
29470 if (!cmpdi_operand (*op1, mode))
29471 *op1 = force_reg (mode, *op1);
29472 if (!cmpdi_operand (*op2, mode))
29473 *op2 = force_reg (mode, *op2);
29474 return true;
29476 case HFmode:
29477 if (!TARGET_VFP_FP16INST)
29478 break;
29479 /* FP16 comparisons are done in SF mode. */
29480 mode = SFmode;
29481 *op1 = convert_to_mode (mode, *op1, 1);
29482 *op2 = convert_to_mode (mode, *op2, 1);
29483 /* Fall through. */
29484 case SFmode:
29485 case DFmode:
29486 if (!vfp_compare_operand (*op1, mode))
29487 *op1 = force_reg (mode, *op1);
29488 if (!vfp_compare_operand (*op2, mode))
29489 *op2 = force_reg (mode, *op2);
29490 return true;
29491 default:
29492 break;
29495 return false;
29499 /* Maximum number of instructions to set block of memory. */
29500 static int
29501 arm_block_set_max_insns (void)
29503 if (optimize_function_for_size_p (cfun))
29504 return 4;
29505 else
29506 return current_tune->max_insns_inline_memset;
29509 /* Return TRUE if it's profitable to set block of memory for
29510 non-vectorized case. VAL is the value to set the memory
29511 with. LENGTH is the number of bytes to set. ALIGN is the
29512 alignment of the destination memory in bytes. UNALIGNED_P
29513 is TRUE if we can only set the memory with instructions
29514 meeting alignment requirements. USE_STRD_P is TRUE if we
29515 can use strd to set the memory. */
29516 static bool
29517 arm_block_set_non_vect_profit_p (rtx val,
29518 unsigned HOST_WIDE_INT length,
29519 unsigned HOST_WIDE_INT align,
29520 bool unaligned_p, bool use_strd_p)
29522 int num = 0;
29523 /* For leftovers in bytes of 0-7, we can set the memory block using
29524 strb/strh/str with minimum instruction number. */
29525 const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
29527 if (unaligned_p)
29529 num = arm_const_inline_cost (SET, val);
29530 num += length / align + length % align;
29532 else if (use_strd_p)
29534 num = arm_const_double_inline_cost (val);
29535 num += (length >> 3) + leftover[length & 7];
29537 else
29539 num = arm_const_inline_cost (SET, val);
29540 num += (length >> 2) + leftover[length & 3];
29543 /* We may be able to combine last pair STRH/STRB into a single STR
29544 by shifting one byte back. */
29545 if (unaligned_access && length > 3 && (length & 3) == 3)
29546 num--;
29548 return (num <= arm_block_set_max_insns ());
29551 /* Return TRUE if it's profitable to set block of memory for
29552 vectorized case. LENGTH is the number of bytes to set.
29553 ALIGN is the alignment of destination memory in bytes.
29554 MODE is the vector mode used to set the memory. */
29555 static bool
29556 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
29557 unsigned HOST_WIDE_INT align,
29558 machine_mode mode)
29560 int num;
29561 bool unaligned_p = ((align & 3) != 0);
29562 unsigned int nelt = GET_MODE_NUNITS (mode);
29564 /* Instruction loading constant value. */
29565 num = 1;
29566 /* Instructions storing the memory. */
29567 num += (length + nelt - 1) / nelt;
29568 /* Instructions adjusting the address expression. Only need to
29569 adjust address expression if it's 4 bytes aligned and bytes
29570 leftover can only be stored by mis-aligned store instruction. */
29571 if (!unaligned_p && (length & 3) != 0)
29572 num++;
29574 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
29575 if (!unaligned_p && mode == V16QImode)
29576 num--;
29578 return (num <= arm_block_set_max_insns ());
29581 /* Set a block of memory using vectorization instructions for the
29582 unaligned case. We fill the first LENGTH bytes of the memory
29583 area starting from DSTBASE with byte constant VALUE. ALIGN is
29584 the alignment requirement of memory. Return TRUE if succeeded. */
29585 static bool
29586 arm_block_set_unaligned_vect (rtx dstbase,
29587 unsigned HOST_WIDE_INT length,
29588 unsigned HOST_WIDE_INT value,
29589 unsigned HOST_WIDE_INT align)
29591 unsigned int i, j, nelt_v16, nelt_v8, nelt_mode;
29592 rtx dst, mem;
29593 rtx val_elt, val_vec, reg;
29594 rtx rval[MAX_VECT_LEN];
29595 rtx (*gen_func) (rtx, rtx);
29596 machine_mode mode;
29597 unsigned HOST_WIDE_INT v = value;
29598 unsigned int offset = 0;
29599 gcc_assert ((align & 0x3) != 0);
29600 nelt_v8 = GET_MODE_NUNITS (V8QImode);
29601 nelt_v16 = GET_MODE_NUNITS (V16QImode);
29602 if (length >= nelt_v16)
29604 mode = V16QImode;
29605 gen_func = gen_movmisalignv16qi;
29607 else
29609 mode = V8QImode;
29610 gen_func = gen_movmisalignv8qi;
29612 nelt_mode = GET_MODE_NUNITS (mode);
29613 gcc_assert (length >= nelt_mode);
29614 /* Skip if it isn't profitable. */
29615 if (!arm_block_set_vect_profit_p (length, align, mode))
29616 return false;
29618 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29619 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29621 v = sext_hwi (v, BITS_PER_WORD);
29622 val_elt = GEN_INT (v);
29623 for (j = 0; j < nelt_mode; j++)
29624 rval[j] = val_elt;
29626 reg = gen_reg_rtx (mode);
29627 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
29628 /* Emit instruction loading the constant value. */
29629 emit_move_insn (reg, val_vec);
29631 /* Handle nelt_mode bytes in a vector. */
29632 for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
29634 emit_insn ((*gen_func) (mem, reg));
29635 if (i + 2 * nelt_mode <= length)
29637 emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
29638 offset += nelt_mode;
29639 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29643 /* If there are not less than nelt_v8 bytes leftover, we must be in
29644 V16QI mode. */
29645 gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
29647 /* Handle (8, 16) bytes leftover. */
29648 if (i + nelt_v8 < length)
29650 emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
29651 offset += length - i;
29652 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29654 /* We are shifting bytes back, set the alignment accordingly. */
29655 if ((length & 1) != 0 && align >= 2)
29656 set_mem_align (mem, BITS_PER_UNIT);
29658 emit_insn (gen_movmisalignv16qi (mem, reg));
29660 /* Handle (0, 8] bytes leftover. */
29661 else if (i < length && i + nelt_v8 >= length)
29663 if (mode == V16QImode)
29664 reg = gen_lowpart (V8QImode, reg);
29666 emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
29667 + (nelt_mode - nelt_v8))));
29668 offset += (length - i) + (nelt_mode - nelt_v8);
29669 mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
29671 /* We are shifting bytes back, set the alignment accordingly. */
29672 if ((length & 1) != 0 && align >= 2)
29673 set_mem_align (mem, BITS_PER_UNIT);
29675 emit_insn (gen_movmisalignv8qi (mem, reg));
29678 return true;
29681 /* Set a block of memory using vectorization instructions for the
29682 aligned case. We fill the first LENGTH bytes of the memory area
29683 starting from DSTBASE with byte constant VALUE. ALIGN is the
29684 alignment requirement of memory. Return TRUE if succeeded. */
29685 static bool
29686 arm_block_set_aligned_vect (rtx dstbase,
29687 unsigned HOST_WIDE_INT length,
29688 unsigned HOST_WIDE_INT value,
29689 unsigned HOST_WIDE_INT align)
29691 unsigned int i, j, nelt_v8, nelt_v16, nelt_mode;
29692 rtx dst, addr, mem;
29693 rtx val_elt, val_vec, reg;
29694 rtx rval[MAX_VECT_LEN];
29695 machine_mode mode;
29696 unsigned HOST_WIDE_INT v = value;
29697 unsigned int offset = 0;
29699 gcc_assert ((align & 0x3) == 0);
29700 nelt_v8 = GET_MODE_NUNITS (V8QImode);
29701 nelt_v16 = GET_MODE_NUNITS (V16QImode);
29702 if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
29703 mode = V16QImode;
29704 else
29705 mode = V8QImode;
29707 nelt_mode = GET_MODE_NUNITS (mode);
29708 gcc_assert (length >= nelt_mode);
29709 /* Skip if it isn't profitable. */
29710 if (!arm_block_set_vect_profit_p (length, align, mode))
29711 return false;
29713 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29715 v = sext_hwi (v, BITS_PER_WORD);
29716 val_elt = GEN_INT (v);
29717 for (j = 0; j < nelt_mode; j++)
29718 rval[j] = val_elt;
29720 reg = gen_reg_rtx (mode);
29721 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
29722 /* Emit instruction loading the constant value. */
29723 emit_move_insn (reg, val_vec);
29725 i = 0;
29726 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
29727 if (mode == V16QImode)
29729 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29730 emit_insn (gen_movmisalignv16qi (mem, reg));
29731 i += nelt_mode;
29732 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
29733 if (i + nelt_v8 < length && i + nelt_v16 > length)
29735 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
29736 offset += length - nelt_mode;
29737 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29738 /* We are shifting bytes back, set the alignment accordingly. */
29739 if ((length & 0x3) == 0)
29740 set_mem_align (mem, BITS_PER_UNIT * 4);
29741 else if ((length & 0x1) == 0)
29742 set_mem_align (mem, BITS_PER_UNIT * 2);
29743 else
29744 set_mem_align (mem, BITS_PER_UNIT);
29746 emit_insn (gen_movmisalignv16qi (mem, reg));
29747 return true;
29749 /* Fall through for bytes leftover. */
29750 mode = V8QImode;
29751 nelt_mode = GET_MODE_NUNITS (mode);
29752 reg = gen_lowpart (V8QImode, reg);
29755 /* Handle 8 bytes in a vector. */
29756 for (; (i + nelt_mode <= length); i += nelt_mode)
29758 addr = plus_constant (Pmode, dst, i);
29759 mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
29760 emit_move_insn (mem, reg);
29763 /* Handle single word leftover by shifting 4 bytes back. We can
29764 use aligned access for this case. */
29765 if (i + UNITS_PER_WORD == length)
29767 addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
29768 offset += i - UNITS_PER_WORD;
29769 mem = adjust_automodify_address (dstbase, mode, addr, offset);
29770 /* We are shifting 4 bytes back, set the alignment accordingly. */
29771 if (align > UNITS_PER_WORD)
29772 set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
29774 emit_move_insn (mem, reg);
29776 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
29777 We have to use unaligned access for this case. */
29778 else if (i < length)
29780 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
29781 offset += length - nelt_mode;
29782 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29783 /* We are shifting bytes back, set the alignment accordingly. */
29784 if ((length & 1) == 0)
29785 set_mem_align (mem, BITS_PER_UNIT * 2);
29786 else
29787 set_mem_align (mem, BITS_PER_UNIT);
29789 emit_insn (gen_movmisalignv8qi (mem, reg));
29792 return true;
29795 /* Set a block of memory using plain strh/strb instructions, only
29796 using instructions allowed by ALIGN on processor. We fill the
29797 first LENGTH bytes of the memory area starting from DSTBASE
29798 with byte constant VALUE. ALIGN is the alignment requirement
29799 of memory. */
29800 static bool
29801 arm_block_set_unaligned_non_vect (rtx dstbase,
29802 unsigned HOST_WIDE_INT length,
29803 unsigned HOST_WIDE_INT value,
29804 unsigned HOST_WIDE_INT align)
29806 unsigned int i;
29807 rtx dst, addr, mem;
29808 rtx val_exp, val_reg, reg;
29809 machine_mode mode;
29810 HOST_WIDE_INT v = value;
29812 gcc_assert (align == 1 || align == 2);
29814 if (align == 2)
29815 v |= (value << BITS_PER_UNIT);
29817 v = sext_hwi (v, BITS_PER_WORD);
29818 val_exp = GEN_INT (v);
29819 /* Skip if it isn't profitable. */
29820 if (!arm_block_set_non_vect_profit_p (val_exp, length,
29821 align, true, false))
29822 return false;
29824 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29825 mode = (align == 2 ? HImode : QImode);
29826 val_reg = force_reg (SImode, val_exp);
29827 reg = gen_lowpart (mode, val_reg);
29829 for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
29831 addr = plus_constant (Pmode, dst, i);
29832 mem = adjust_automodify_address (dstbase, mode, addr, i);
29833 emit_move_insn (mem, reg);
29836 /* Handle single byte leftover. */
29837 if (i + 1 == length)
29839 reg = gen_lowpart (QImode, val_reg);
29840 addr = plus_constant (Pmode, dst, i);
29841 mem = adjust_automodify_address (dstbase, QImode, addr, i);
29842 emit_move_insn (mem, reg);
29843 i++;
29846 gcc_assert (i == length);
29847 return true;
29850 /* Set a block of memory using plain strd/str/strh/strb instructions,
29851 to permit unaligned copies on processors which support unaligned
29852 semantics for those instructions. We fill the first LENGTH bytes
29853 of the memory area starting from DSTBASE with byte constant VALUE.
29854 ALIGN is the alignment requirement of memory. */
29855 static bool
29856 arm_block_set_aligned_non_vect (rtx dstbase,
29857 unsigned HOST_WIDE_INT length,
29858 unsigned HOST_WIDE_INT value,
29859 unsigned HOST_WIDE_INT align)
29861 unsigned int i;
29862 rtx dst, addr, mem;
29863 rtx val_exp, val_reg, reg;
29864 unsigned HOST_WIDE_INT v;
29865 bool use_strd_p;
29867 use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
29868 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
29870 v = (value | (value << 8) | (value << 16) | (value << 24));
29871 if (length < UNITS_PER_WORD)
29872 v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
29874 if (use_strd_p)
29875 v |= (v << BITS_PER_WORD);
29876 else
29877 v = sext_hwi (v, BITS_PER_WORD);
29879 val_exp = GEN_INT (v);
29880 /* Skip if it isn't profitable. */
29881 if (!arm_block_set_non_vect_profit_p (val_exp, length,
29882 align, false, use_strd_p))
29884 if (!use_strd_p)
29885 return false;
29887 /* Try without strd. */
29888 v = (v >> BITS_PER_WORD);
29889 v = sext_hwi (v, BITS_PER_WORD);
29890 val_exp = GEN_INT (v);
29891 use_strd_p = false;
29892 if (!arm_block_set_non_vect_profit_p (val_exp, length,
29893 align, false, use_strd_p))
29894 return false;
29897 i = 0;
29898 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29899 /* Handle double words using strd if possible. */
29900 if (use_strd_p)
29902 val_reg = force_reg (DImode, val_exp);
29903 reg = val_reg;
29904 for (; (i + 8 <= length); i += 8)
29906 addr = plus_constant (Pmode, dst, i);
29907 mem = adjust_automodify_address (dstbase, DImode, addr, i);
29908 emit_move_insn (mem, reg);
29911 else
29912 val_reg = force_reg (SImode, val_exp);
29914 /* Handle words. */
29915 reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
29916 for (; (i + 4 <= length); i += 4)
29918 addr = plus_constant (Pmode, dst, i);
29919 mem = adjust_automodify_address (dstbase, SImode, addr, i);
29920 if ((align & 3) == 0)
29921 emit_move_insn (mem, reg);
29922 else
29923 emit_insn (gen_unaligned_storesi (mem, reg));
29926 /* Merge last pair of STRH and STRB into a STR if possible. */
29927 if (unaligned_access && i > 0 && (i + 3) == length)
29929 addr = plus_constant (Pmode, dst, i - 1);
29930 mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
29931 /* We are shifting one byte back, set the alignment accordingly. */
29932 if ((align & 1) == 0)
29933 set_mem_align (mem, BITS_PER_UNIT);
29935 /* Most likely this is an unaligned access, and we can't tell at
29936 compilation time. */
29937 emit_insn (gen_unaligned_storesi (mem, reg));
29938 return true;
29941 /* Handle half word leftover. */
29942 if (i + 2 <= length)
29944 reg = gen_lowpart (HImode, val_reg);
29945 addr = plus_constant (Pmode, dst, i);
29946 mem = adjust_automodify_address (dstbase, HImode, addr, i);
29947 if ((align & 1) == 0)
29948 emit_move_insn (mem, reg);
29949 else
29950 emit_insn (gen_unaligned_storehi (mem, reg));
29952 i += 2;
29955 /* Handle single byte leftover. */
29956 if (i + 1 == length)
29958 reg = gen_lowpart (QImode, val_reg);
29959 addr = plus_constant (Pmode, dst, i);
29960 mem = adjust_automodify_address (dstbase, QImode, addr, i);
29961 emit_move_insn (mem, reg);
29964 return true;
29967 /* Set a block of memory using vectorization instructions for both
29968 aligned and unaligned cases. We fill the first LENGTH bytes of
29969 the memory area starting from DSTBASE with byte constant VALUE.
29970 ALIGN is the alignment requirement of memory. */
29971 static bool
29972 arm_block_set_vect (rtx dstbase,
29973 unsigned HOST_WIDE_INT length,
29974 unsigned HOST_WIDE_INT value,
29975 unsigned HOST_WIDE_INT align)
29977 /* Check whether we need to use unaligned store instruction. */
29978 if (((align & 3) != 0 || (length & 3) != 0)
29979 /* Check whether unaligned store instruction is available. */
29980 && (!unaligned_access || BYTES_BIG_ENDIAN))
29981 return false;
29983 if ((align & 3) == 0)
29984 return arm_block_set_aligned_vect (dstbase, length, value, align);
29985 else
29986 return arm_block_set_unaligned_vect (dstbase, length, value, align);
29989 /* Expand string store operation. Firstly we try to do that by using
29990 vectorization instructions, then try with ARM unaligned access and
29991 double-word store if profitable. OPERANDS[0] is the destination,
29992 OPERANDS[1] is the number of bytes, operands[2] is the value to
29993 initialize the memory, OPERANDS[3] is the known alignment of the
29994 destination. */
29995 bool
29996 arm_gen_setmem (rtx *operands)
29998 rtx dstbase = operands[0];
29999 unsigned HOST_WIDE_INT length;
30000 unsigned HOST_WIDE_INT value;
30001 unsigned HOST_WIDE_INT align;
30003 if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
30004 return false;
30006 length = UINTVAL (operands[1]);
30007 if (length > 64)
30008 return false;
30010 value = (UINTVAL (operands[2]) & 0xFF);
30011 align = UINTVAL (operands[3]);
30012 if (TARGET_NEON && length >= 8
30013 && current_tune->string_ops_prefer_neon
30014 && arm_block_set_vect (dstbase, length, value, align))
30015 return true;
30017 if (!unaligned_access && (align & 3) != 0)
30018 return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
30020 return arm_block_set_aligned_non_vect (dstbase, length, value, align);
30024 static bool
30025 arm_macro_fusion_p (void)
30027 return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
30030 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
30031 for MOVW / MOVT macro fusion. */
30033 static bool
30034 arm_sets_movw_movt_fusible_p (rtx prev_set, rtx curr_set)
30036 /* We are trying to fuse
30037 movw imm / movt imm
30038 instructions as a group that gets scheduled together. */
30040 rtx set_dest = SET_DEST (curr_set);
30042 if (GET_MODE (set_dest) != SImode)
30043 return false;
30045 /* We are trying to match:
30046 prev (movw) == (set (reg r0) (const_int imm16))
30047 curr (movt) == (set (zero_extract (reg r0)
30048 (const_int 16)
30049 (const_int 16))
30050 (const_int imm16_1))
30052 prev (movw) == (set (reg r1)
30053 (high (symbol_ref ("SYM"))))
30054 curr (movt) == (set (reg r0)
30055 (lo_sum (reg r1)
30056 (symbol_ref ("SYM")))) */
30058 if (GET_CODE (set_dest) == ZERO_EXTRACT)
30060 if (CONST_INT_P (SET_SRC (curr_set))
30061 && CONST_INT_P (SET_SRC (prev_set))
30062 && REG_P (XEXP (set_dest, 0))
30063 && REG_P (SET_DEST (prev_set))
30064 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
30065 return true;
30068 else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
30069 && REG_P (SET_DEST (curr_set))
30070 && REG_P (SET_DEST (prev_set))
30071 && GET_CODE (SET_SRC (prev_set)) == HIGH
30072 && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
30073 return true;
30075 return false;
30078 static bool
30079 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
30081 rtx prev_set = single_set (prev);
30082 rtx curr_set = single_set (curr);
30084 if (!prev_set
30085 || !curr_set)
30086 return false;
30088 if (any_condjump_p (curr))
30089 return false;
30091 if (!arm_macro_fusion_p ())
30092 return false;
30094 if (current_tune->fusible_ops & tune_params::FUSE_AES_AESMC
30095 && aarch_crypto_can_dual_issue (prev, curr))
30096 return true;
30098 if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT
30099 && arm_sets_movw_movt_fusible_p (prev_set, curr_set))
30100 return true;
30102 return false;
30105 /* Return true iff the instruction fusion described by OP is enabled. */
30106 bool
30107 arm_fusion_enabled_p (tune_params::fuse_ops op)
30109 return current_tune->fusible_ops & op;
30112 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
30113 scheduled for speculative execution. Reject the long-running division
30114 and square-root instructions. */
30116 static bool
30117 arm_sched_can_speculate_insn (rtx_insn *insn)
30119 switch (get_attr_type (insn))
30121 case TYPE_SDIV:
30122 case TYPE_UDIV:
30123 case TYPE_FDIVS:
30124 case TYPE_FDIVD:
30125 case TYPE_FSQRTS:
30126 case TYPE_FSQRTD:
30127 case TYPE_NEON_FP_SQRT_S:
30128 case TYPE_NEON_FP_SQRT_D:
30129 case TYPE_NEON_FP_SQRT_S_Q:
30130 case TYPE_NEON_FP_SQRT_D_Q:
30131 case TYPE_NEON_FP_DIV_S:
30132 case TYPE_NEON_FP_DIV_D:
30133 case TYPE_NEON_FP_DIV_S_Q:
30134 case TYPE_NEON_FP_DIV_D_Q:
30135 return false;
30136 default:
30137 return true;
30141 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
30143 static unsigned HOST_WIDE_INT
30144 arm_asan_shadow_offset (void)
30146 return HOST_WIDE_INT_1U << 29;
30150 /* This is a temporary fix for PR60655. Ideally we need
30151 to handle most of these cases in the generic part but
30152 currently we reject minus (..) (sym_ref). We try to
30153 ameliorate the case with minus (sym_ref1) (sym_ref2)
30154 where they are in the same section. */
30156 static bool
30157 arm_const_not_ok_for_debug_p (rtx p)
30159 tree decl_op0 = NULL;
30160 tree decl_op1 = NULL;
30162 if (GET_CODE (p) == MINUS)
30164 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
30166 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
30167 if (decl_op1
30168 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
30169 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
30171 if ((VAR_P (decl_op1)
30172 || TREE_CODE (decl_op1) == CONST_DECL)
30173 && (VAR_P (decl_op0)
30174 || TREE_CODE (decl_op0) == CONST_DECL))
30175 return (get_variable_section (decl_op1, false)
30176 != get_variable_section (decl_op0, false));
30178 if (TREE_CODE (decl_op1) == LABEL_DECL
30179 && TREE_CODE (decl_op0) == LABEL_DECL)
30180 return (DECL_CONTEXT (decl_op1)
30181 != DECL_CONTEXT (decl_op0));
30184 return true;
30188 return false;
30191 /* return TRUE if x is a reference to a value in a constant pool */
30192 extern bool
30193 arm_is_constant_pool_ref (rtx x)
30195 return (MEM_P (x)
30196 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
30197 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
30200 /* Remember the last target of arm_set_current_function. */
30201 static GTY(()) tree arm_previous_fndecl;
30203 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
30205 void
30206 save_restore_target_globals (tree new_tree)
30208 /* If we have a previous state, use it. */
30209 if (TREE_TARGET_GLOBALS (new_tree))
30210 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
30211 else if (new_tree == target_option_default_node)
30212 restore_target_globals (&default_target_globals);
30213 else
30215 /* Call target_reinit and save the state for TARGET_GLOBALS. */
30216 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
30219 arm_option_params_internal ();
30222 /* Invalidate arm_previous_fndecl. */
30224 void
30225 arm_reset_previous_fndecl (void)
30227 arm_previous_fndecl = NULL_TREE;
30230 /* Establish appropriate back-end context for processing the function
30231 FNDECL. The argument might be NULL to indicate processing at top
30232 level, outside of any function scope. */
30234 static void
30235 arm_set_current_function (tree fndecl)
30237 if (!fndecl || fndecl == arm_previous_fndecl)
30238 return;
30240 tree old_tree = (arm_previous_fndecl
30241 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
30242 : NULL_TREE);
30244 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30246 /* If current function has no attributes but previous one did,
30247 use the default node. */
30248 if (! new_tree && old_tree)
30249 new_tree = target_option_default_node;
30251 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
30252 the default have been handled by save_restore_target_globals from
30253 arm_pragma_target_parse. */
30254 if (old_tree == new_tree)
30255 return;
30257 arm_previous_fndecl = fndecl;
30259 /* First set the target options. */
30260 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
30262 save_restore_target_globals (new_tree);
30265 /* Implement TARGET_OPTION_PRINT. */
30267 static void
30268 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
30270 int flags = ptr->x_target_flags;
30271 const char *fpu_name;
30273 fpu_name = (ptr->x_arm_fpu_index == TARGET_FPU_auto
30274 ? "auto" : all_fpus[ptr->x_arm_fpu_index].name);
30276 fprintf (file, "%*sselected arch %s\n", indent, "",
30277 TARGET_THUMB2_P (flags) ? "thumb2" :
30278 TARGET_THUMB_P (flags) ? "thumb1" :
30279 "arm");
30281 fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_name);
30284 /* Hook to determine if one function can safely inline another. */
30286 static bool
30287 arm_can_inline_p (tree caller, tree callee)
30289 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
30290 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
30291 bool can_inline = true;
30293 struct cl_target_option *caller_opts
30294 = TREE_TARGET_OPTION (caller_tree ? caller_tree
30295 : target_option_default_node);
30297 struct cl_target_option *callee_opts
30298 = TREE_TARGET_OPTION (callee_tree ? callee_tree
30299 : target_option_default_node);
30301 if (callee_opts == caller_opts)
30302 return true;
30304 /* Callee's ISA features should be a subset of the caller's. */
30305 struct arm_build_target caller_target;
30306 struct arm_build_target callee_target;
30307 caller_target.isa = sbitmap_alloc (isa_num_bits);
30308 callee_target.isa = sbitmap_alloc (isa_num_bits);
30310 arm_configure_build_target (&caller_target, caller_opts, &global_options_set,
30311 false);
30312 arm_configure_build_target (&callee_target, callee_opts, &global_options_set,
30313 false);
30314 if (!bitmap_subset_p (callee_target.isa, caller_target.isa))
30315 can_inline = false;
30317 sbitmap_free (caller_target.isa);
30318 sbitmap_free (callee_target.isa);
30320 /* OK to inline between different modes.
30321 Function with mode specific instructions, e.g using asm,
30322 must be explicitly protected with noinline. */
30323 return can_inline;
30326 /* Hook to fix function's alignment affected by target attribute. */
30328 static void
30329 arm_relayout_function (tree fndecl)
30331 if (DECL_USER_ALIGN (fndecl))
30332 return;
30334 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30336 if (!callee_tree)
30337 callee_tree = target_option_default_node;
30339 struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
30340 SET_DECL_ALIGN
30341 (fndecl,
30342 FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts->x_target_flags)));
30345 /* Inner function to process the attribute((target(...))), take an argument and
30346 set the current options from the argument. If we have a list, recursively
30347 go over the list. */
30349 static bool
30350 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
30352 if (TREE_CODE (args) == TREE_LIST)
30354 bool ret = true;
30356 for (; args; args = TREE_CHAIN (args))
30357 if (TREE_VALUE (args)
30358 && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
30359 ret = false;
30360 return ret;
30363 else if (TREE_CODE (args) != STRING_CST)
30365 error ("attribute %<target%> argument not a string");
30366 return false;
30369 char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
30370 char *q;
30372 while ((q = strtok (argstr, ",")) != NULL)
30374 while (ISSPACE (*q)) ++q;
30376 argstr = NULL;
30377 if (!strncmp (q, "thumb", 5))
30378 opts->x_target_flags |= MASK_THUMB;
30380 else if (!strncmp (q, "arm", 3))
30381 opts->x_target_flags &= ~MASK_THUMB;
30383 else if (!strncmp (q, "fpu=", 4))
30385 int fpu_index;
30386 if (! opt_enum_arg_to_value (OPT_mfpu_, q+4,
30387 &fpu_index, CL_TARGET))
30389 error ("invalid fpu for attribute(target(\"%s\"))", q);
30390 return false;
30392 if (fpu_index == TARGET_FPU_auto)
30394 /* This doesn't really make sense until we support
30395 general dynamic selection of the architecture and all
30396 sub-features. */
30397 sorry ("auto fpu selection not currently permitted here");
30398 return false;
30400 opts->x_arm_fpu_index = (enum fpu_type) fpu_index;
30402 else
30404 error ("attribute(target(\"%s\")) is unknown", q);
30405 return false;
30409 return true;
30412 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
30414 tree
30415 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
30416 struct gcc_options *opts_set)
30418 struct cl_target_option cl_opts;
30420 if (!arm_valid_target_attribute_rec (args, opts))
30421 return NULL_TREE;
30423 cl_target_option_save (&cl_opts, opts);
30424 arm_configure_build_target (&arm_active_target, &cl_opts, opts_set, false);
30425 arm_option_check_internal (opts);
30426 /* Do any overrides, such as global options arch=xxx. */
30427 arm_option_override_internal (opts, opts_set);
30429 return build_target_option_node (opts);
30432 static void
30433 add_attribute (const char * mode, tree *attributes)
30435 size_t len = strlen (mode);
30436 tree value = build_string (len, mode);
30438 TREE_TYPE (value) = build_array_type (char_type_node,
30439 build_index_type (size_int (len)));
30441 *attributes = tree_cons (get_identifier ("target"),
30442 build_tree_list (NULL_TREE, value),
30443 *attributes);
30446 /* For testing. Insert thumb or arm modes alternatively on functions. */
30448 static void
30449 arm_insert_attributes (tree fndecl, tree * attributes)
30451 const char *mode;
30453 if (! TARGET_FLIP_THUMB)
30454 return;
30456 if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
30457 || DECL_BUILT_IN (fndecl) || DECL_ARTIFICIAL (fndecl))
30458 return;
30460 /* Nested definitions must inherit mode. */
30461 if (current_function_decl)
30463 mode = TARGET_THUMB ? "thumb" : "arm";
30464 add_attribute (mode, attributes);
30465 return;
30468 /* If there is already a setting don't change it. */
30469 if (lookup_attribute ("target", *attributes) != NULL)
30470 return;
30472 mode = thumb_flipper ? "thumb" : "arm";
30473 add_attribute (mode, attributes);
30475 thumb_flipper = !thumb_flipper;
30478 /* Hook to validate attribute((target("string"))). */
30480 static bool
30481 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
30482 tree args, int ARG_UNUSED (flags))
30484 bool ret = true;
30485 struct gcc_options func_options;
30486 tree cur_tree, new_optimize;
30487 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
30489 /* Get the optimization options of the current function. */
30490 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
30492 /* If the function changed the optimization levels as well as setting target
30493 options, start with the optimizations specified. */
30494 if (!func_optimize)
30495 func_optimize = optimization_default_node;
30497 /* Init func_options. */
30498 memset (&func_options, 0, sizeof (func_options));
30499 init_options_struct (&func_options, NULL);
30500 lang_hooks.init_options_struct (&func_options);
30502 /* Initialize func_options to the defaults. */
30503 cl_optimization_restore (&func_options,
30504 TREE_OPTIMIZATION (func_optimize));
30506 cl_target_option_restore (&func_options,
30507 TREE_TARGET_OPTION (target_option_default_node));
30509 /* Set func_options flags with new target mode. */
30510 cur_tree = arm_valid_target_attribute_tree (args, &func_options,
30511 &global_options_set);
30513 if (cur_tree == NULL_TREE)
30514 ret = false;
30516 new_optimize = build_optimization_node (&func_options);
30518 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
30520 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
30522 finalize_options_struct (&func_options);
30524 return ret;
30527 /* Match an ISA feature bitmap to a named FPU. We always use the
30528 first entry that exactly matches the feature set, so that we
30529 effectively canonicalize the FPU name for the assembler. */
30530 static const char*
30531 arm_identify_fpu_from_isa (sbitmap isa)
30533 auto_sbitmap fpubits (isa_num_bits);
30534 auto_sbitmap cand_fpubits (isa_num_bits);
30536 bitmap_and (fpubits, isa, isa_all_fpubits);
30538 /* If there are no ISA feature bits relating to the FPU, we must be
30539 doing soft-float. */
30540 if (bitmap_empty_p (fpubits))
30541 return "softvfp";
30543 for (unsigned int i = 0; i < ARRAY_SIZE (all_fpus); i++)
30545 arm_initialize_isa (cand_fpubits, all_fpus[i].isa_bits);
30546 if (bitmap_equal_p (fpubits, cand_fpubits))
30547 return all_fpus[i].name;
30549 /* We must find an entry, or things have gone wrong. */
30550 gcc_unreachable ();
30553 void
30554 arm_declare_function_name (FILE *stream, const char *name, tree decl)
30557 fprintf (stream, "\t.syntax unified\n");
30559 if (TARGET_THUMB)
30561 if (is_called_in_ARM_mode (decl)
30562 || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
30563 && cfun->is_thunk))
30564 fprintf (stream, "\t.code 32\n");
30565 else if (TARGET_THUMB1)
30566 fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
30567 else
30568 fprintf (stream, "\t.thumb\n\t.thumb_func\n");
30570 else
30571 fprintf (stream, "\t.arm\n");
30573 asm_fprintf (asm_out_file, "\t.fpu %s\n",
30574 (TARGET_SOFT_FLOAT
30575 ? "softvfp"
30576 : arm_identify_fpu_from_isa (arm_active_target.isa)));
30578 if (TARGET_POKE_FUNCTION_NAME)
30579 arm_poke_function_name (stream, (const char *) name);
30582 /* If MEM is in the form of [base+offset], extract the two parts
30583 of address and set to BASE and OFFSET, otherwise return false
30584 after clearing BASE and OFFSET. */
30586 static bool
30587 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
30589 rtx addr;
30591 gcc_assert (MEM_P (mem));
30593 addr = XEXP (mem, 0);
30595 /* Strip off const from addresses like (const (addr)). */
30596 if (GET_CODE (addr) == CONST)
30597 addr = XEXP (addr, 0);
30599 if (GET_CODE (addr) == REG)
30601 *base = addr;
30602 *offset = const0_rtx;
30603 return true;
30606 if (GET_CODE (addr) == PLUS
30607 && GET_CODE (XEXP (addr, 0)) == REG
30608 && CONST_INT_P (XEXP (addr, 1)))
30610 *base = XEXP (addr, 0);
30611 *offset = XEXP (addr, 1);
30612 return true;
30615 *base = NULL_RTX;
30616 *offset = NULL_RTX;
30618 return false;
30621 /* If INSN is a load or store of address in the form of [base+offset],
30622 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
30623 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
30624 otherwise return FALSE. */
30626 static bool
30627 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
30629 rtx x, dest, src;
30631 gcc_assert (INSN_P (insn));
30632 x = PATTERN (insn);
30633 if (GET_CODE (x) != SET)
30634 return false;
30636 src = SET_SRC (x);
30637 dest = SET_DEST (x);
30638 if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
30640 *is_load = false;
30641 extract_base_offset_in_addr (dest, base, offset);
30643 else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
30645 *is_load = true;
30646 extract_base_offset_in_addr (src, base, offset);
30648 else
30649 return false;
30651 return (*base != NULL_RTX && *offset != NULL_RTX);
30654 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
30656 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
30657 and PRI are only calculated for these instructions. For other instruction,
30658 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
30659 instruction fusion can be supported by returning different priorities.
30661 It's important that irrelevant instructions get the largest FUSION_PRI. */
30663 static void
30664 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
30665 int *fusion_pri, int *pri)
30667 int tmp, off_val;
30668 bool is_load;
30669 rtx base, offset;
30671 gcc_assert (INSN_P (insn));
30673 tmp = max_pri - 1;
30674 if (!fusion_load_store (insn, &base, &offset, &is_load))
30676 *pri = tmp;
30677 *fusion_pri = tmp;
30678 return;
30681 /* Load goes first. */
30682 if (is_load)
30683 *fusion_pri = tmp - 1;
30684 else
30685 *fusion_pri = tmp - 2;
30687 tmp /= 2;
30689 /* INSN with smaller base register goes first. */
30690 tmp -= ((REGNO (base) & 0xff) << 20);
30692 /* INSN with smaller offset goes first. */
30693 off_val = (int)(INTVAL (offset));
30694 if (off_val >= 0)
30695 tmp -= (off_val & 0xfffff);
30696 else
30697 tmp += ((- off_val) & 0xfffff);
30699 *pri = tmp;
30700 return;
30704 /* Construct and return a PARALLEL RTX vector with elements numbering the
30705 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
30706 the vector - from the perspective of the architecture. This does not
30707 line up with GCC's perspective on lane numbers, so we end up with
30708 different masks depending on our target endian-ness. The diagram
30709 below may help. We must draw the distinction when building masks
30710 which select one half of the vector. An instruction selecting
30711 architectural low-lanes for a big-endian target, must be described using
30712 a mask selecting GCC high-lanes.
30714 Big-Endian Little-Endian
30716 GCC 0 1 2 3 3 2 1 0
30717 | x | x | x | x | | x | x | x | x |
30718 Architecture 3 2 1 0 3 2 1 0
30720 Low Mask: { 2, 3 } { 0, 1 }
30721 High Mask: { 0, 1 } { 2, 3 }
30725 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
30727 int nunits = GET_MODE_NUNITS (mode);
30728 rtvec v = rtvec_alloc (nunits / 2);
30729 int high_base = nunits / 2;
30730 int low_base = 0;
30731 int base;
30732 rtx t1;
30733 int i;
30735 if (BYTES_BIG_ENDIAN)
30736 base = high ? low_base : high_base;
30737 else
30738 base = high ? high_base : low_base;
30740 for (i = 0; i < nunits / 2; i++)
30741 RTVEC_ELT (v, i) = GEN_INT (base + i);
30743 t1 = gen_rtx_PARALLEL (mode, v);
30744 return t1;
30747 /* Check OP for validity as a PARALLEL RTX vector with elements
30748 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
30749 from the perspective of the architecture. See the diagram above
30750 arm_simd_vect_par_cnst_half_p for more details. */
30752 bool
30753 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
30754 bool high)
30756 rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
30757 HOST_WIDE_INT count_op = XVECLEN (op, 0);
30758 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
30759 int i = 0;
30761 if (!VECTOR_MODE_P (mode))
30762 return false;
30764 if (count_op != count_ideal)
30765 return false;
30767 for (i = 0; i < count_ideal; i++)
30769 rtx elt_op = XVECEXP (op, 0, i);
30770 rtx elt_ideal = XVECEXP (ideal, 0, i);
30772 if (!CONST_INT_P (elt_op)
30773 || INTVAL (elt_ideal) != INTVAL (elt_op))
30774 return false;
30776 return true;
30779 /* Can output mi_thunk for all cases except for non-zero vcall_offset
30780 in Thumb1. */
30781 static bool
30782 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
30783 const_tree)
30785 /* For now, we punt and not handle this for TARGET_THUMB1. */
30786 if (vcall_offset && TARGET_THUMB1)
30787 return false;
30789 /* Otherwise ok. */
30790 return true;
30793 /* Generate RTL for a conditional branch with rtx comparison CODE in
30794 mode CC_MODE. The destination of the unlikely conditional branch
30795 is LABEL_REF. */
30797 void
30798 arm_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
30799 rtx label_ref)
30801 rtx x;
30802 x = gen_rtx_fmt_ee (code, VOIDmode,
30803 gen_rtx_REG (cc_mode, CC_REGNUM),
30804 const0_rtx);
30806 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
30807 gen_rtx_LABEL_REF (VOIDmode, label_ref),
30808 pc_rtx);
30809 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
30812 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
30814 For pure-code sections there is no letter code for this attribute, so
30815 output all the section flags numerically when this is needed. */
30817 static bool
30818 arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num)
30821 if (flags & SECTION_ARM_PURECODE)
30823 *num = 0x20000000;
30825 if (!(flags & SECTION_DEBUG))
30826 *num |= 0x2;
30827 if (flags & SECTION_EXCLUDE)
30828 *num |= 0x80000000;
30829 if (flags & SECTION_WRITE)
30830 *num |= 0x1;
30831 if (flags & SECTION_CODE)
30832 *num |= 0x4;
30833 if (flags & SECTION_MERGE)
30834 *num |= 0x10;
30835 if (flags & SECTION_STRINGS)
30836 *num |= 0x20;
30837 if (flags & SECTION_TLS)
30838 *num |= 0x400;
30839 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
30840 *num |= 0x200;
30842 return true;
30845 return false;
30848 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
30850 If pure-code is passed as an option, make sure all functions are in
30851 sections that have the SHF_ARM_PURECODE attribute. */
30853 static section *
30854 arm_function_section (tree decl, enum node_frequency freq,
30855 bool startup, bool exit)
30857 const char * section_name;
30858 section * sec;
30860 if (!decl || TREE_CODE (decl) != FUNCTION_DECL)
30861 return default_function_section (decl, freq, startup, exit);
30863 if (!target_pure_code)
30864 return default_function_section (decl, freq, startup, exit);
30867 section_name = DECL_SECTION_NAME (decl);
30869 /* If a function is not in a named section then it falls under the 'default'
30870 text section, also known as '.text'. We can preserve previous behavior as
30871 the default text section already has the SHF_ARM_PURECODE section
30872 attribute. */
30873 if (!section_name)
30875 section *default_sec = default_function_section (decl, freq, startup,
30876 exit);
30878 /* If default_sec is not null, then it must be a special section like for
30879 example .text.startup. We set the pure-code attribute and return the
30880 same section to preserve existing behavior. */
30881 if (default_sec)
30882 default_sec->common.flags |= SECTION_ARM_PURECODE;
30883 return default_sec;
30886 /* Otherwise look whether a section has already been created with
30887 'section_name'. */
30888 sec = get_named_section (decl, section_name, 0);
30889 if (!sec)
30890 /* If that is not the case passing NULL as the section's name to
30891 'get_named_section' will create a section with the declaration's
30892 section name. */
30893 sec = get_named_section (decl, NULL, 0);
30895 /* Set the SHF_ARM_PURECODE attribute. */
30896 sec->common.flags |= SECTION_ARM_PURECODE;
30898 return sec;
30901 /* Implements the TARGET_SECTION_FLAGS hook.
30903 If DECL is a function declaration and pure-code is passed as an option
30904 then add the SFH_ARM_PURECODE attribute to the section flags. NAME is the
30905 section's name and RELOC indicates whether the declarations initializer may
30906 contain runtime relocations. */
30908 static unsigned int
30909 arm_elf_section_type_flags (tree decl, const char *name, int reloc)
30911 unsigned int flags = default_section_type_flags (decl, name, reloc);
30913 if (decl && TREE_CODE (decl) == FUNCTION_DECL && target_pure_code)
30914 flags |= SECTION_ARM_PURECODE;
30916 return flags;
30919 /* Generate call to __aeabi_[mode]divmod (op0, op1). */
30921 static void
30922 arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
30923 rtx op0, rtx op1,
30924 rtx *quot_p, rtx *rem_p)
30926 if (mode == SImode)
30927 gcc_assert (!TARGET_IDIV);
30929 machine_mode libval_mode = smallest_mode_for_size (2 * GET_MODE_BITSIZE (mode),
30930 MODE_INT);
30932 rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
30933 libval_mode, 2,
30934 op0, GET_MODE (op0),
30935 op1, GET_MODE (op1));
30937 rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
30938 rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
30939 GET_MODE_SIZE (mode));
30941 gcc_assert (quotient);
30942 gcc_assert (remainder);
30944 *quot_p = quotient;
30945 *rem_p = remainder;
30948 /* This function checks for the availability of the coprocessor builtin passed
30949 in BUILTIN for the current target. Returns true if it is available and
30950 false otherwise. If a BUILTIN is passed for which this function has not
30951 been implemented it will cause an exception. */
30953 bool
30954 arm_coproc_builtin_available (enum unspecv builtin)
30956 /* None of these builtins are available in Thumb mode if the target only
30957 supports Thumb-1. */
30958 if (TARGET_THUMB1)
30959 return false;
30961 switch (builtin)
30963 case VUNSPEC_CDP:
30964 case VUNSPEC_LDC:
30965 case VUNSPEC_LDCL:
30966 case VUNSPEC_STC:
30967 case VUNSPEC_STCL:
30968 case VUNSPEC_MCR:
30969 case VUNSPEC_MRC:
30970 if (arm_arch4)
30971 return true;
30972 break;
30973 case VUNSPEC_CDP2:
30974 case VUNSPEC_LDC2:
30975 case VUNSPEC_LDC2L:
30976 case VUNSPEC_STC2:
30977 case VUNSPEC_STC2L:
30978 case VUNSPEC_MCR2:
30979 case VUNSPEC_MRC2:
30980 /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
30981 ARMv8-{A,M}. */
30982 if (arm_arch5)
30983 return true;
30984 break;
30985 case VUNSPEC_MCRR:
30986 case VUNSPEC_MRRC:
30987 /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
30988 ARMv8-{A,M}. */
30989 if (arm_arch6 || arm_arch5te)
30990 return true;
30991 break;
30992 case VUNSPEC_MCRR2:
30993 case VUNSPEC_MRRC2:
30994 if (arm_arch6)
30995 return true;
30996 break;
30997 default:
30998 gcc_unreachable ();
31000 return false;
31003 /* This function returns true if OP is a valid memory operand for the ldc and
31004 stc coprocessor instructions and false otherwise. */
31006 bool
31007 arm_coproc_ldc_stc_legitimate_address (rtx op)
31009 HOST_WIDE_INT range;
31010 /* Has to be a memory operand. */
31011 if (!MEM_P (op))
31012 return false;
31014 op = XEXP (op, 0);
31016 /* We accept registers. */
31017 if (REG_P (op))
31018 return true;
31020 switch GET_CODE (op)
31022 case PLUS:
31024 /* Or registers with an offset. */
31025 if (!REG_P (XEXP (op, 0)))
31026 return false;
31028 op = XEXP (op, 1);
31030 /* The offset must be an immediate though. */
31031 if (!CONST_INT_P (op))
31032 return false;
31034 range = INTVAL (op);
31036 /* Within the range of [-1020,1020]. */
31037 if (!IN_RANGE (range, -1020, 1020))
31038 return false;
31040 /* And a multiple of 4. */
31041 return (range % 4) == 0;
31043 case PRE_INC:
31044 case POST_INC:
31045 case PRE_DEC:
31046 case POST_DEC:
31047 return REG_P (XEXP (op, 0));
31048 default:
31049 gcc_unreachable ();
31051 return false;
31053 #include "gt-arm.h"